Passed
Push — master ( 3870cb...2758b6 )
by Lars
04:05
created

UTF8   F

Complexity

Total Complexity 1719

Size/Duplication

Total Lines 12679
Duplicated Lines 0 %

Test Coverage

Coverage 79.71%

Importance

Changes 90
Bugs 51 Features 5
Metric Value
eloc 4373
c 90
b 51
f 5
dl 0
loc 12679
ccs 3076
cts 3859
cp 0.7971
rs 0.8
wmc 1719

298 Methods

Rating   Name   Duplication   Size   Complexity  
A str_dasherize() 0 3 1
B str_delimit() 0 33 8
A ctype_loaded() 0 3 1
A decode_mimeheader() 0 15 5
A css_stripe_media_queries() 0 6 1
A json_loaded() 0 3 1
A collapse_whitespace() 0 8 2
A max() 0 14 3
A normalize_line_ending() 0 3 1
A add_bom_to_string() 0 7 2
A chr_to_int() 0 3 1
A __construct() 0 2 1
B between() 0 48 8
A char_at() 0 7 2
A chars() 0 3 1
A access() 0 11 4
A callback() 0 3 1
A bom() 0 3 1
A first_char() 0 11 4
A finfo_loaded() 0 3 1
B str_to_lines() 0 29 8
A toUTF8() 0 3 1
A strchr() 0 8 1
A strichr() 0 8 1
A strstr_in_byte() 0 12 4
A toLatin1() 0 3 1
A str_upper_camelize() 0 8 1
A strcasecmp() 0 5 1
A str_transliterate() 0 3 1
A strrpos_in_byte() 0 12 4
A str_underscored() 0 3 1
A strip_whitespace() 0 7 2
A toAscii() 0 3 1
A str_upper_first() 0 8 1
A toIso8859() 0 3 1
A strripos_in_byte() 0 12 4
A chr_to_decimal() 0 30 6
A filter_input() 0 13 2
A array_change_key_case() 0 20 5
D chr() 0 101 18
A chunk_split() 0 3 1
A fix_utf8() 0 30 4
D getCharDirection() 0 105 118
A filter_var_array() 0 9 2
A chr_map() 0 5 1
A fits_inside() 0 3 1
A chr_size_list() 0 17 3
A filter_var() 0 9 2
A fix_simple_utf8() 0 19 4
A checkForSupport() 0 47 4
A filter_input_array() 0 9 2
A getSupportInfo() 0 13 3
A chr_to_hex() 0 11 3
A binary_to_str() 0 12 3
A file_has_bom() 0 8 2
A parse_str() 0 16 4
A get_unique_string() 0 15 2
A is_bom() 0 10 3
A is_hexadecimal() 0 8 2
A encode_mimeheader() 0 25 5
A count_chars() 0 11 1
A has_uppercase() 0 8 2
A max_chr_width() 0 8 2
A isBinary() 0 3 1
A ltrim() 0 19 4
A emoji_decode() 0 16 2
D is_utf8() 0 144 31
A lcword() 0 8 1
A mbstring_loaded() 0 3 1
A html_escape() 0 6 1
C normalize_encoding() 0 134 14
C get_file_type() 0 96 15
C is_utf16() 0 65 16
A isHtml() 0 3 1
C filter() 0 56 13
A normalize_whitespace() 0 30 6
A isBase64() 0 3 1
A is_html() 0 14 2
A html_decode() 0 3 1
A isUtf32() 0 3 1
A emoji_encode() 0 16 2
A is_alpha() 0 8 2
B get_random_string() 0 53 10
A isUtf8() 0 3 1
A clean() 0 47 6
A is_serialized() 0 11 3
A is_uppercase() 0 8 2
A is_ascii() 0 7 2
A normalize_msword() 0 43 2
A is_blank() 0 8 2
A htmlspecialchars() 0 11 3
A decimal_to_chr() 0 3 1
A pcre_utf8_support() 0 4 1
A codepoints() 0 29 4
A lowerCaseFirst() 0 8 1
A cleanup() 0 25 2
B is_binary() 0 35 9
A intlChar_loaded() 0 3 1
A lcfirst() 0 44 5
A is_binary_file() 0 16 3
A intl_loaded() 0 3 1
A html_stripe_empty_tags() 0 6 1
F extract_text() 0 175 34
A isBom() 0 3 1
A int_to_chr() 0 3 1
A is_lowercase() 0 8 2
A hasBom() 0 3 1
A iconv_loaded() 0 3 1
A lcwords() 0 31 6
A isAscii() 0 3 1
A normalizeEncoding() 0 3 1
A is_empty() 0 3 1
B html_encode() 0 42 7
A isUtf16() 0 3 1
F encode() 0 139 37
C is_utf32() 0 65 16
C ord() 0 72 16
A is_alphanumeric() 0 8 2
A json_decode() 0 14 2
B is_json() 0 27 8
A int_to_hex() 0 7 2
A has_lowercase() 0 8 2
A json_encode() 0 10 2
A is_base64() 0 16 5
A hex_to_int() 0 14 3
A htmlentities() 0 23 3
A hex_to_chr() 0 3 1
A isJson() 0 3 1
A min() 0 14 3
C html_entity_decode() 0 86 17
B file_get_contents() 0 55 11
A str_substr_after_first_separator() 0 28 6
A str_begins() 0 3 1
B str_camelize() 0 70 10
A str_contains() 0 10 2
A substr_in_byte() 0 18 6
A strnatcasecmp() 0 5 1
A substr_left() 0 15 4
D strlen() 0 96 19
A str_isubstr_last() 0 20 4
A str_replace_beginning() 0 21 6
A remove_left() 0 21 4
B stripos() 0 59 11
A str_offset_exists() 0 10 2
D strrchr() 0 101 20
A to_filename() 0 24 2
A str_iends_with() 0 11 3
C utf8_decode() 0 60 13
A remove_html() 0 3 1
B str_longest_common_suffix() 0 51 10
C wordwrap() 0 68 14
B ucfirst() 0 57 7
A str_pad_both() 0 7 1
A str_index_last() 0 11 1
A str_substr_last() 0 33 6
A str_limit() 0 26 6
A string() 0 10 1
B rxClass() 0 39 8
A str_ensure_right() 0 13 4
B str_titleize_for_humans() 0 152 5
A str_starts_with() 0 11 3
A str_humanize() 0 15 1
C substr_count_in_byte() 0 54 15
A str_index_first() 0 11 1
A strlen_in_byte() 0 12 3
A str_ireplace_ending() 0 21 6
A rtrim() 0 19 4
C str_longest_common_substring() 0 73 16
A regex_replace() 0 20 3
A titlecase() 0 24 5
A getData() 0 6 1
A str_iindex_first() 0 11 1
B strtolower() 0 54 10
B urldecode() 0 37 8
A str_isubstr_before_first_separator() 0 16 5
B strrev() 0 43 10
A replace_all() 0 11 2
D substr_replace() 0 124 27
A removeBOM() 0 3 1
A str_matches_pattern() 0 3 1
B str_titleize() 0 55 10
A ws() 0 3 1
A str_replace_first() 0 17 2
A str_pad_right() 0 7 1
B ucwords() 0 48 9
A to_boolean() 0 35 5
C stristr() 0 68 15
A strncasecmp() 0 10 1
B strwidth() 0 40 8
A str_iends() 0 3 1
A trim() 0 19 4
A substr_compare() 0 33 6
C substr_count() 0 62 16
A strnatcmp() 0 9 2
D str_pad() 0 146 16
A urldecode_unicode_helper() 0 8 2
A str_ireplace() 0 18 3
A to_latin1() 0 3 1
A str_replace_ending() 0 21 6
A string_has_bom() 0 10 3
B strtr() 0 33 8
A str_contains_all() 0 23 6
A str_isubstr_after_last_separator() 0 23 5
D range() 0 53 18
B strspn() 0 30 10
B rawurldecode() 0 37 8
A str_ends() 0 3 1
B str_capitalize_name_helper() 0 80 10
A utf8_encode() 0 16 3
C str_detect_encoding() 0 76 13
A spaces_to_tabs() 0 11 3
A str_istarts_with() 0 11 3
A str_replace() 0 14 1
A substr_iright() 0 15 4
A replace() 0 11 2
A to_iso8859() 0 16 4
A words_limit() 0 17 5
A strip_tags() 0 15 4
A str_isubstr_before_last_separator() 0 21 6
D str_truncate_safe() 0 78 18
A substr_right() 0 31 6
D str_split() 0 125 28
A str_ends_with_any() 0 13 4
F strrpos() 0 118 25
A remove_right() 0 22 4
A remove_html_breaks() 0 3 1
A showSupport() 0 8 2
A remove_invisible_characters() 0 19 3
A single_chr_html_encode() 0 15 4
A str_replace_last() 0 19 2
A str_iindex_last() 0 11 1
A str_substr_before_last_separator() 0 31 6
B strtocasefold() 0 33 7
A tabs_to_spaces() 0 11 3
B str_truncate() 0 44 7
D strripos() 0 96 19
A strpos_in_byte() 0 12 4
A str_ends_with() 0 11 3
F to_ascii() 0 150 27
A reduce_string_array() 0 26 6
B str_longest_common_prefix() 0 48 8
A mbstring_overloaded() 0 11 2
A str_pad_left() 0 7 1
A str_substr_first() 0 33 6
A remove_bom() 0 21 5
A str_repeat() 0 5 1
A strpbrk() 0 11 4
A whitespace_table() 0 3 1
A substr_count_simple() 0 32 6
A str_iends_with_any() 0 13 4
A str_substr_after_last_separator() 0 28 6
A str_isubstr_after_first_separator() 0 23 5
B str_snakeize() 0 55 6
A str_sort() 0 15 3
D to_utf8() 0 117 35
A ucword() 0 3 1
A str_offset_get() 0 14 4
A getDataIfExists() 0 10 2
A str_ibegins() 0 3 1
A str_capitalize_name() 0 8 1
B str_limit_after_word() 0 53 11
A swapCase() 0 17 4
A substr_ileft() 0 15 4
A str_ensure_left() 0 11 3
B urldecode_fix_win1252_chars() 0 227 1
A strtonatfold() 0 6 1
C strcspn() 0 51 12
A fixStrCaseHelper() 0 33 5
B str_split_pattern() 0 49 11
D strstr() 0 92 18
A str_isubstr_first() 0 25 4
A str_last_char() 0 13 4
A str_ireplace_beginning() 0 21 6
A str_substr_before_first_separator() 0 32 6
F substr() 0 143 32
A wordwrap_per_line() 0 28 5
A str_surround() 0 3 1
A strncmp() 0 19 4
A str_insert() 0 28 4
A utf8_fix_win1252_chars() 0 3 1
A replace_diamond_question_mark() 0 35 5
A to_utf8_convert_helper() 0 28 5
B strtoupper() 0 54 10
A str_starts_with_any() 0 17 5
B strrichr() 0 54 11
A split() 0 6 1
A str_istarts_with_any() 0 17 5
B str_contains_any() 0 29 8
A initEmojiData() 0 26 4
A remove_duplicates() 0 14 4
B str_slice() 0 33 10
F strpos() 0 131 27
A str_shuffle() 0 35 6
A strcmp() 0 9 2
A str_word_count() 0 23 5
A str_to_binary() 0 9 2
A symfony_polyfill_used() 0 16 5
B str_to_words() 0 36 8

How to fix   Complexity   

Complex Class

Complex classes like UTF8 often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.

Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.

While breaking up the class, it is a good idea to analyze how other classes use UTF8, and based on these observations, apply Extract Interface, too.

1
<?php
2
3
declare(strict_types=1);
4
5
namespace voku\helper;
6
7
final class UTF8
8
{
9
    // (CRLF|([ZWNJ-ZWJ]|T+|L*(LV?V+|LV|LVT)T*|L+|[^Control])[Extend]*|[Control])
10
    // This regular expression is a work around for http://bugs.exim.org/1279
11
    const GRAPHEME_CLUSTER_RX = "(?:\r\n|(?:[ -~\x{200C}\x{200D}]|[ᆨ-ᇹ]+|[ᄀ-ᅟ]*(?:[가개갸걔거게겨계고과괘괴교구궈궤귀규그긔기까깨꺄꺠꺼께껴꼐꼬꽈꽤꾀꾜꾸꿔꿰뀌뀨끄끠끼나내냐냬너네녀녜노놔놰뇌뇨누눠눼뉘뉴느늬니다대댜댸더데뎌뎨도돠돼되됴두둬뒈뒤듀드듸디따때땨떄떠떼뗘뗴또똬뙈뙤뚀뚜뚸뛔뛰뜌뜨띄띠라래랴럐러레려례로롸뢔뢰료루뤄뤠뤼류르릐리마매먀먜머메며몌모뫄뫠뫼묘무뭐뭬뮈뮤므믜미바배뱌뱨버베벼볘보봐봬뵈뵤부붜붸뷔뷰브븨비빠빼뺘뺴뻐뻬뼈뼤뽀뽜뽸뾔뾰뿌뿨쀄쀠쀼쁘쁴삐사새샤섀서세셔셰소솨쇄쇠쇼수숴쉐쉬슈스싀시싸쌔쌰썌써쎄쎠쎼쏘쏴쐐쐬쑈쑤쒀쒜쒸쓔쓰씌씨아애야얘어에여예오와왜외요우워웨위유으의이자재쟈쟤저제져졔조좌좨죄죠주줘줴쥐쥬즈즤지짜째쨔쨰쩌쩨쪄쪠쪼쫘쫴쬐쬬쭈쭤쮀쮜쮸쯔쯰찌차채챠챼처체쳐쳬초촤쵀최쵸추춰췌취츄츠츼치카캐캬컈커케켜켸코콰쾌쾨쿄쿠쿼퀘퀴큐크킈키타태탸턔터테텨톄토톼퇘퇴툐투퉈퉤튀튜트틔티파패퍄퍠퍼페펴폐포퐈퐤푀표푸풔풰퓌퓨프픠피하해햐햬허헤혀혜호화홰회효후훠훼휘휴흐희히]?[ᅠ-ᆢ]+|[가-힣])[ᆨ-ᇹ]*|[ᄀ-ᅟ]+|[^\p{Cc}\p{Cf}\p{Zl}\p{Zp}])[\p{Mn}\p{Me}\x{09BE}\x{09D7}\x{0B3E}\x{0B57}\x{0BBE}\x{0BD7}\x{0CC2}\x{0CD5}\x{0CD6}\x{0D3E}\x{0D57}\x{0DCF}\x{0DDF}\x{200C}\x{200D}\x{1D165}\x{1D16E}-\x{1D172}]*|[\p{Cc}\p{Cf}\p{Zl}\p{Zp}])";
12
13
    /**
14
     * Bom => Byte-Length
15
     *
16
     * INFO: https://en.wikipedia.org/wiki/Byte_order_mark
17
     *
18
     * @var array
19
     */
20
    private static $BOM = [
21
        "\xef\xbb\xbf"     => 3, // UTF-8 BOM
22
        ''              => 6, // UTF-8 BOM as "WINDOWS-1252" (one char has [maybe] more then one byte ...)
23
        "\x00\x00\xfe\xff" => 4, // UTF-32 (BE) BOM
24
        '  þÿ'             => 6, // UTF-32 (BE) BOM as "WINDOWS-1252"
25
        "\xff\xfe\x00\x00" => 4, // UTF-32 (LE) BOM
26
        'ÿþ  '             => 6, // UTF-32 (LE) BOM as "WINDOWS-1252"
27
        "\xfe\xff"         => 2, // UTF-16 (BE) BOM
28
        'þÿ'               => 4, // UTF-16 (BE) BOM as "WINDOWS-1252"
29
        "\xff\xfe"         => 2, // UTF-16 (LE) BOM
30
        'ÿþ'               => 4, // UTF-16 (LE) BOM as "WINDOWS-1252"
31
    ];
32
33
    /**
34
     * Numeric code point => UTF-8 Character
35
     *
36
     * url: http://www.w3schools.com/charsets/ref_utf_punctuation.asp
37
     *
38
     * @var array
39
     */
40
    private static $WHITESPACE = [
41
        // NUL Byte
42
        0 => "\x0",
43
        // Tab
44
        9 => "\x9",
45
        // New Line
46
        10 => "\xa",
47
        // Vertical Tab
48
        11 => "\xb",
49
        // Carriage Return
50
        13 => "\xd",
51
        // Ordinary Space
52
        32 => "\x20",
53
        // NO-BREAK SPACE
54
        160 => "\xc2\xa0",
55
        // OGHAM SPACE MARK
56
        5760 => "\xe1\x9a\x80",
57
        // MONGOLIAN VOWEL SEPARATOR
58
        6158 => "\xe1\xa0\x8e",
59
        // EN QUAD
60
        8192 => "\xe2\x80\x80",
61
        // EM QUAD
62
        8193 => "\xe2\x80\x81",
63
        // EN SPACE
64
        8194 => "\xe2\x80\x82",
65
        // EM SPACE
66
        8195 => "\xe2\x80\x83",
67
        // THREE-PER-EM SPACE
68
        8196 => "\xe2\x80\x84",
69
        // FOUR-PER-EM SPACE
70
        8197 => "\xe2\x80\x85",
71
        // SIX-PER-EM SPACE
72
        8198 => "\xe2\x80\x86",
73
        // FIGURE SPACE
74
        8199 => "\xe2\x80\x87",
75
        // PUNCTUATION SPACE
76
        8200 => "\xe2\x80\x88",
77
        // THIN SPACE
78
        8201 => "\xe2\x80\x89",
79
        //HAIR SPACE
80
        8202 => "\xe2\x80\x8a",
81
        // LINE SEPARATOR
82
        8232 => "\xe2\x80\xa8",
83
        // PARAGRAPH SEPARATOR
84
        8233 => "\xe2\x80\xa9",
85
        // NARROW NO-BREAK SPACE
86
        8239 => "\xe2\x80\xaf",
87
        // MEDIUM MATHEMATICAL SPACE
88
        8287 => "\xe2\x81\x9f",
89
        // IDEOGRAPHIC SPACE
90
        12288 => "\xe3\x80\x80",
91
    ];
92
93
    /**
94
     * @var array
95
     */
96
    private static $WHITESPACE_TABLE = [
97
        'SPACE'                     => "\x20",
98
        'NO-BREAK SPACE'            => "\xc2\xa0",
99
        'OGHAM SPACE MARK'          => "\xe1\x9a\x80",
100
        'EN QUAD'                   => "\xe2\x80\x80",
101
        'EM QUAD'                   => "\xe2\x80\x81",
102
        'EN SPACE'                  => "\xe2\x80\x82",
103
        'EM SPACE'                  => "\xe2\x80\x83",
104
        'THREE-PER-EM SPACE'        => "\xe2\x80\x84",
105
        'FOUR-PER-EM SPACE'         => "\xe2\x80\x85",
106
        'SIX-PER-EM SPACE'          => "\xe2\x80\x86",
107
        'FIGURE SPACE'              => "\xe2\x80\x87",
108
        'PUNCTUATION SPACE'         => "\xe2\x80\x88",
109
        'THIN SPACE'                => "\xe2\x80\x89",
110
        'HAIR SPACE'                => "\xe2\x80\x8a",
111
        'LINE SEPARATOR'            => "\xe2\x80\xa8",
112
        'PARAGRAPH SEPARATOR'       => "\xe2\x80\xa9",
113
        'ZERO WIDTH SPACE'          => "\xe2\x80\x8b",
114
        'NARROW NO-BREAK SPACE'     => "\xe2\x80\xaf",
115
        'MEDIUM MATHEMATICAL SPACE' => "\xe2\x81\x9f",
116
        'IDEOGRAPHIC SPACE'         => "\xe3\x80\x80",
117
    ];
118
119
    /**
120
     * bidirectional text chars
121
     *
122
     * url: https://www.w3.org/International/questions/qa-bidi-unicode-controls
123
     *
124
     * @var array
125
     */
126
    private static $BIDI_UNI_CODE_CONTROLS_TABLE = [
127
        // LEFT-TO-RIGHT EMBEDDING (use -> dir = "ltr")
128
        8234 => "\xE2\x80\xAA",
129
        // RIGHT-TO-LEFT EMBEDDING (use -> dir = "rtl")
130
        8235 => "\xE2\x80\xAB",
131
        // POP DIRECTIONAL FORMATTING // (use -> </bdo>)
132
        8236 => "\xE2\x80\xAC",
133
        // LEFT-TO-RIGHT OVERRIDE // (use -> <bdo dir = "ltr">)
134
        8237 => "\xE2\x80\xAD",
135
        // RIGHT-TO-LEFT OVERRIDE // (use -> <bdo dir = "rtl">)
136
        8238 => "\xE2\x80\xAE",
137
        // LEFT-TO-RIGHT ISOLATE // (use -> dir = "ltr")
138
        8294 => "\xE2\x81\xA6",
139
        // RIGHT-TO-LEFT ISOLATE // (use -> dir = "rtl")
140
        8295 => "\xE2\x81\xA7",
141
        // FIRST STRONG ISOLATE // (use -> dir = "auto")
142
        8296 => "\xE2\x81\xA8",
143
        // POP DIRECTIONAL ISOLATE
144
        8297 => "\xE2\x81\xA9",
145
    ];
146
147
    /**
148
     * @var array
149
     */
150
    private static $COMMON_CASE_FOLD = [
151
        'upper' => [
152
            'µ',
153
            'ſ',
154
            "\xCD\x85",
155
            'ς',
156
            'ẞ',
157
            "\xCF\x90",
158
            "\xCF\x91",
159
            "\xCF\x95",
160
            "\xCF\x96",
161
            "\xCF\xB0",
162
            "\xCF\xB1",
163
            "\xCF\xB5",
164
            "\xE1\xBA\x9B",
165
            "\xE1\xBE\xBE",
166
        ],
167
        'lower' => [
168
            'μ',
169
            's',
170
            'ι',
171
            'σ',
172
            'ß',
173
            'β',
174
            'θ',
175
            'φ',
176
            'π',
177
            'κ',
178
            'ρ',
179
            'ε',
180
            "\xE1\xB9\xA1",
181
            'ι',
182
        ],
183
    ];
184
185
    /**
186
     * @var array
187
     */
188
    private static $SUPPORT = [];
189
190
    /**
191
     * @var array|null
192
     */
193
    private static $BROKEN_UTF8_FIX;
194
195
    /**
196
     * @var array|null
197
     */
198
    private static $WIN1252_TO_UTF8;
199
200
    /**
201
     * @var array|null
202
     */
203
    private static $INTL_TRANSLITERATOR_LIST;
204
205
    /**
206
     * @var array|null
207
     */
208
    private static $ENCODINGS;
209
210
    /**
211
     * @var array|null
212
     */
213
    private static $ORD;
214
215
    /**
216
     * @var array|null
217
     */
218
    private static $EMOJI;
219
220
    /**
221
     * @var array|null
222
     */
223
    private static $EMOJI_VALUES_CACHE;
224
225
    /**
226
     * @var array|null
227
     */
228
    private static $EMOJI_KEYS_CACHE;
229
230
    /**
231
     * @var array|null
232
     */
233
    private static $EMOJI_KEYS_REVERSIBLE_CACHE;
234
235
    /**
236
     * @var array|null
237
     */
238
    private static $CHR;
239
240
    /**
241
     * __construct()
242
     */
243 32
    public function __construct()
244
    {
245 32
    }
246
247
    /**
248
     * Return the character at the specified position: $str[1] like functionality.
249
     *
250
     * @param string $str      <p>A UTF-8 string.</p>
251
     * @param int    $pos      <p>The position of character to return.</p>
252
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
253
     *
254
     * @return string single multi-byte character
255
     */
256 3
    public static function access(string $str, int $pos, string $encoding = 'UTF-8'): string
257
    {
258 3
        if ($str === '' || $pos < 0) {
259 2
            return '';
260
        }
261
262 3
        if ($encoding === 'UTF-8') {
263 3
            return (string) \mb_substr($str, $pos, 1);
264
        }
265
266
        return (string) self::substr($str, $pos, 1, $encoding);
267
    }
268
269
    /**
270
     * Prepends UTF-8 BOM character to the string and returns the whole string.
271
     *
272
     * INFO: If BOM already existed there, the Input string is returned.
273
     *
274
     * @param string $str <p>The input string.</p>
275
     *
276
     * @return string the output string that contains BOM
277
     */
278 2
    public static function add_bom_to_string(string $str): string
279
    {
280 2
        if (self::string_has_bom($str) === false) {
281 2
            $str = self::bom() . $str;
282
        }
283
284 2
        return $str;
285
    }
286
287
    /**
288
     * Changes all keys in an array.
289
     *
290
     * @param array  $array    <p>The array to work on</p>
291
     * @param int    $case     [optional] <p> Either <strong>CASE_UPPER</strong><br>
292
     *                         or <strong>CASE_LOWER</strong> (default)</p>
293
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
294
     *
295
     * @return string[] an array with its keys lower or uppercased
296
     */
297 2
    public static function array_change_key_case(array $array, int $case = \CASE_LOWER, string $encoding = 'UTF-8'): array
298
    {
299
        if (
300 2
            $case !== \CASE_LOWER
301
            &&
302 2
            $case !== \CASE_UPPER
303
        ) {
304
            $case = \CASE_LOWER;
305
        }
306
307 2
        $return = [];
308 2
        foreach ($array as $key => &$value) {
309 2
            $key = $case === \CASE_LOWER
310 2
                ? self::strtolower((string) $key, $encoding)
311 2
                : self::strtoupper((string) $key, $encoding);
312
313 2
            $return[$key] = $value;
314
        }
315
316 2
        return $return;
317
    }
318
319
    /**
320
     * Returns the substring between $start and $end, if found, or an empty
321
     * string. An optional offset may be supplied from which to begin the
322
     * search for the start string.
323
     *
324
     * @param string $str
325
     * @param string $start    <p>Delimiter marking the start of the substring.</p>
326
     * @param string $end      <p>Delimiter marking the end of the substring.</p>
327
     * @param int    $offset   [optional] <p>Index from which to begin the search. Default: 0</p>
328
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
329
     *
330
     * @return string
331
     */
332 16
    public static function between(
333
        string $str,
334
        string $start,
335
        string $end,
336
        int $offset = 0,
337
        string $encoding = 'UTF-8'
338
    ): string {
339 16
        if ($encoding === 'UTF-8') {
340 8
            $posStart = \mb_strpos($str, $start, $offset);
341 8
            if ($posStart === false) {
342 1
                return '';
343
            }
344
345 7
            $substrIndex = $posStart + (int) \mb_strlen($start);
346 7
            $posEnd = \mb_strpos($str, $end, $substrIndex);
347
            if (
348 7
                $posEnd === false
349
                ||
350 7
                $posEnd === $substrIndex
351
            ) {
352 2
                return '';
353
            }
354
355 5
            return (string) \mb_substr($str, $substrIndex, $posEnd - $substrIndex);
356
        }
357
358 8
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
359
360 8
        $posStart = self::strpos($str, $start, $offset, $encoding);
361 8
        if ($posStart === false) {
362 1
            return '';
363
        }
364
365 7
        $substrIndex = $posStart + (int) self::strlen($start, $encoding);
366 7
        $posEnd = self::strpos($str, $end, $substrIndex, $encoding);
367
        if (
368 7
            $posEnd === false
369
            ||
370 7
            $posEnd === $substrIndex
371
        ) {
372 2
            return '';
373
        }
374
375 5
        return (string) self::substr(
376 5
            $str,
377 5
            $substrIndex,
378 5
            $posEnd - $substrIndex,
379 5
            $encoding
380
        );
381
    }
382
383
    /**
384
     * Convert binary into an string.
385
     *
386
     * @param mixed $bin 1|0
387
     *
388
     * @return string
389
     */
390 2
    public static function binary_to_str($bin): string
391
    {
392 2
        if (!isset($bin[0])) {
393
            return '';
394
        }
395
396 2
        $convert = \base_convert($bin, 2, 16);
397 2
        if ($convert === '0') {
398 1
            return '';
399
        }
400
401 2
        return \pack('H*', $convert);
402
    }
403
404
    /**
405
     * Returns the UTF-8 Byte Order Mark Character.
406
     *
407
     * INFO: take a look at UTF8::$bom for e.g. UTF-16 and UTF-32 BOM values
408
     *
409
     * @return string UTF-8 Byte Order Mark
410
     */
411 4
    public static function bom(): string
412
    {
413 4
        return "\xef\xbb\xbf";
414
    }
415
416
    /**
417
     * @alias of UTF8::chr_map()
418
     *
419
     * @param array|string $callback
420
     * @param string       $str
421
     *
422
     * @return string[]
423
     *
424
     * @see UTF8::chr_map()
425
     */
426 2
    public static function callback($callback, string $str): array
427
    {
428 2
        return self::chr_map($callback, $str);
429
    }
430
431
    /**
432
     * Returns the character at $index, with indexes starting at 0.
433
     *
434
     * @param string $str      <p>The input string.</p>
435
     * @param int    $index    <p>Position of the character.</p>
436
     * @param string $encoding [optional] <p>Default is UTF-8</p>
437
     *
438
     * @return string the character at $index
439
     */
440 9
    public static function char_at(string $str, int $index, string $encoding = 'UTF-8'): string
441
    {
442 9
        if ($encoding === 'UTF-8') {
443 5
            return (string) \mb_substr($str, $index, 1);
444
        }
445
446 4
        return (string) self::substr($str, $index, 1, $encoding);
447
    }
448
449
    /**
450
     * Returns an array consisting of the characters in the string.
451
     *
452
     * @param string $str <p>The input string.</p>
453
     *
454
     * @return string[] an array of chars
455
     */
456 3
    public static function chars(string $str): array
457
    {
458 3
        return self::str_split($str);
459
    }
460
461
    /**
462
     * This method will auto-detect your server environment for UTF-8 support.
463
     *
464
     * @return true|null
465
     *
466
     * @internal <p>You don't need to run it manually, it will be triggered if it's needed.</p>
467
     */
468 5
    public static function checkForSupport()
469
    {
470 5
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
471
            self::$SUPPORT['already_checked_via_portable_utf8'] = true;
472
473
            // http://php.net/manual/en/book.mbstring.php
474
            self::$SUPPORT['mbstring'] = self::mbstring_loaded();
475
            self::$SUPPORT['mbstring_func_overload'] = self::mbstring_overloaded();
476
            if (self::$SUPPORT['mbstring'] === true) {
477
                \mb_internal_encoding('UTF-8');
478
                /** @noinspection UnusedFunctionResultInspection */
479
                /** @noinspection PhpComposerExtensionStubsInspection */
480
                \mb_regex_encoding('UTF-8');
481
                self::$SUPPORT['mbstring_internal_encoding'] = 'UTF-8';
482
            }
483
484
            // http://php.net/manual/en/book.iconv.php
485
            self::$SUPPORT['iconv'] = self::iconv_loaded();
486
487
            // http://php.net/manual/en/book.intl.php
488
            self::$SUPPORT['intl'] = self::intl_loaded();
489
490
            // http://php.net/manual/en/class.intlchar.php
491
            self::$SUPPORT['intlChar'] = self::intlChar_loaded();
492
493
            // http://php.net/manual/en/book.ctype.php
494
            self::$SUPPORT['ctype'] = self::ctype_loaded();
495
496
            // http://php.net/manual/en/class.finfo.php
497
            self::$SUPPORT['finfo'] = self::finfo_loaded();
498
499
            // http://php.net/manual/en/book.json.php
500
            self::$SUPPORT['json'] = self::json_loaded();
501
502
            // http://php.net/manual/en/book.pcre.php
503
            self::$SUPPORT['pcre_utf8'] = self::pcre_utf8_support();
504
505
            self::$SUPPORT['symfony_polyfill_used'] = self::symfony_polyfill_used();
506
            if (self::$SUPPORT['symfony_polyfill_used'] === true) {
507
                \mb_internal_encoding('UTF-8');
508
                self::$SUPPORT['mbstring_internal_encoding'] = 'UTF-8';
509
            }
510
511
            return true;
512
        }
513
514 5
        return null;
515
    }
516
517
    /**
518
     * Generates a UTF-8 encoded character from the given code point.
519
     *
520
     * INFO: opposite to UTF8::ord()
521
     *
522
     * @param int|string $code_point <p>The code point for which to generate a character.</p>
523
     * @param string     $encoding   [optional] <p>Default is UTF-8</p>
524
     *
525
     * @return string|null multi-byte character, returns null on failure or empty input
526
     */
527 25
    public static function chr($code_point, string $encoding = 'UTF-8')
528
    {
529
        // init
530 25
        static $CHAR_CACHE = [];
531
532 25
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
533 5
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
534
        }
535
536
        if (
537 25
            $encoding !== 'UTF-8'
538
            &&
539 25
            $encoding !== 'ISO-8859-1'
540
            &&
541 25
            $encoding !== 'WINDOWS-1252'
542
            &&
543 25
            self::$SUPPORT['mbstring'] === false
544
        ) {
545
            \trigger_error('UTF8::chr() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
546
        }
547
548 25
        $cacheKey = $code_point . $encoding;
549 25
        if (isset($CHAR_CACHE[$cacheKey]) === true) {
550 23
            return $CHAR_CACHE[$cacheKey];
551
        }
552
553 14
        if ($code_point <= 127) { // use "simple"-char only until "\x80"
554
555 13
            if (self::$CHR === null) {
556
                self::$CHR = self::getData('chr');
557
            }
558
559
            /**
560
             * @psalm-suppress PossiblyNullArrayAccess
561
             */
562 13
            $chr = self::$CHR[$code_point];
563
564 13
            if ($encoding !== 'UTF-8') {
565 1
                $chr = self::encode($encoding, $chr);
566
            }
567
568 13
            return $CHAR_CACHE[$cacheKey] = $chr;
569
        }
570
571
        //
572
        // fallback via "IntlChar"
573
        //
574
575 7
        if (self::$SUPPORT['intlChar'] === true) {
576
            /** @noinspection PhpComposerExtensionStubsInspection */
577 7
            $chr = \IntlChar::chr($code_point);
578
579 7
            if ($encoding !== 'UTF-8') {
580
                $chr = self::encode($encoding, $chr);
581
            }
582
583 7
            return $CHAR_CACHE[$cacheKey] = $chr;
584
        }
585
586
        //
587
        // fallback via vanilla php
588
        //
589
590
        if (self::$CHR === null) {
591
            self::$CHR = self::getData('chr');
592
        }
593
594
        $code_point = (int) $code_point;
595
        if ($code_point <= 0x7F) {
596
            /**
597
             * @psalm-suppress PossiblyNullArrayAccess
598
             */
599
            $chr = self::$CHR[$code_point];
600
        } elseif ($code_point <= 0x7FF) {
601
            /**
602
             * @psalm-suppress PossiblyNullArrayAccess
603
             */
604
            $chr = self::$CHR[($code_point >> 6) + 0xC0] .
605
                   self::$CHR[($code_point & 0x3F) + 0x80];
606
        } elseif ($code_point <= 0xFFFF) {
607
            /**
608
             * @psalm-suppress PossiblyNullArrayAccess
609
             */
610
            $chr = self::$CHR[($code_point >> 12) + 0xE0] .
611
                   self::$CHR[(($code_point >> 6) & 0x3F) + 0x80] .
612
                   self::$CHR[($code_point & 0x3F) + 0x80];
613
        } else {
614
            /**
615
             * @psalm-suppress PossiblyNullArrayAccess
616
             */
617
            $chr = self::$CHR[($code_point >> 18) + 0xF0] .
618
                   self::$CHR[(($code_point >> 12) & 0x3F) + 0x80] .
619
                   self::$CHR[(($code_point >> 6) & 0x3F) + 0x80] .
620
                   self::$CHR[($code_point & 0x3F) + 0x80];
621
        }
622
623
        if ($encoding !== 'UTF-8') {
624
            $chr = self::encode($encoding, $chr);
625
        }
626
627
        return $CHAR_CACHE[$cacheKey] = $chr;
628
    }
629
630
    /**
631
     * Applies callback to all characters of a string.
632
     *
633
     * @param array|string $callback <p>The callback function.</p>
634
     * @param string       $str      <p>UTF-8 string to run callback on.</p>
635
     *
636
     * @return string[] the outcome of callback
637
     */
638 2
    public static function chr_map($callback, string $str): array
639
    {
640 2
        return \array_map(
641 2
            $callback,
642 2
            self::str_split($str)
643
        );
644
    }
645
646
    /**
647
     * Generates an array of byte length of each character of a Unicode string.
648
     *
649
     * 1 byte => U+0000  - U+007F
650
     * 2 byte => U+0080  - U+07FF
651
     * 3 byte => U+0800  - U+FFFF
652
     * 4 byte => U+10000 - U+10FFFF
653
     *
654
     * @param string $str <p>The original unicode string.</p>
655
     *
656
     * @return int[] an array of byte lengths of each character
657
     */
658 4
    public static function chr_size_list(string $str): array
659
    {
660 4
        if ($str === '') {
661 4
            return [];
662
        }
663
664 4
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
665
            return \array_map(
666
                static function (string $data): int {
667
                    // "mb_" is available if overload is used, so use it ...
668
                    return \mb_strlen($data, 'CP850'); // 8-BIT
669
                },
670
                self::str_split($str)
671
            );
672
        }
673
674 4
        return \array_map('\strlen', self::str_split($str));
675
    }
676
677
    /**
678
     * Get a decimal code representation of a specific character.
679
     *
680
     * @param string $char <p>The input character.</p>
681
     *
682
     * @return int
683
     */
684 4
    public static function chr_to_decimal(string $char): int
685
    {
686 4
        $code = self::ord($char[0]);
687 4
        $bytes = 1;
688
689 4
        if (!($code & 0x80)) {
690
            // 0xxxxxxx
691 4
            return $code;
692
        }
693
694 4
        if (($code & 0xe0) === 0xc0) {
695
            // 110xxxxx
696 4
            $bytes = 2;
697 4
            $code &= ~0xc0;
698 4
        } elseif (($code & 0xf0) === 0xe0) {
699
            // 1110xxxx
700 4
            $bytes = 3;
701 4
            $code &= ~0xe0;
702 2
        } elseif (($code & 0xf8) === 0xf0) {
703
            // 11110xxx
704 2
            $bytes = 4;
705 2
            $code &= ~0xf0;
706
        }
707
708 4
        for ($i = 2; $i <= $bytes; ++$i) {
709
            // 10xxxxxx
710 4
            $code = ($code << 6) + (self::ord($char[$i - 1]) & ~0x80);
711
        }
712
713 4
        return $code;
714
    }
715
716
    /**
717
     * Get hexadecimal code point (U+xxxx) of a UTF-8 encoded character.
718
     *
719
     * @param int|string $char <p>The input character</p>
720
     * @param string     $pfix [optional]
721
     *
722
     * @return string The code point encoded as U+xxxx
723
     */
724 2
    public static function chr_to_hex($char, string $pfix = 'U+'): string
725
    {
726 2
        if ($char === '') {
727 2
            return '';
728
        }
729
730 2
        if ($char === '&#0;') {
731 2
            $char = '';
732
        }
733
734 2
        return self::int_to_hex(self::ord((string) $char), $pfix);
735
    }
736
737
    /**
738
     * alias for "UTF8::chr_to_decimal()"
739
     *
740
     * @param string $chr
741
     *
742
     * @return int
743
     *
744
     * @see UTF8::chr_to_decimal()
745
     */
746 2
    public static function chr_to_int(string $chr): int
747
    {
748 2
        return self::chr_to_decimal($chr);
749
    }
750
751
    /**
752
     * Splits a string into smaller chunks and multiple lines, using the specified line ending character.
753
     *
754
     * @param string $body     <p>The original string to be split.</p>
755
     * @param int    $chunklen [optional] <p>The maximum character length of a chunk.</p>
756
     * @param string $end      [optional] <p>The character(s) to be inserted at the end of each chunk.</p>
757
     *
758
     * @return string the chunked string
759
     */
760 4
    public static function chunk_split(string $body, int $chunklen = 76, string $end = "\r\n"): string
761
    {
762 4
        return \implode($end, self::str_split($body, $chunklen));
763
    }
764
765
    /**
766
     * Accepts a string and removes all non-UTF-8 characters from it + extras if needed.
767
     *
768
     * @param string $str                           <p>The string to be sanitized.</p>
769
     * @param bool   $remove_bom                    [optional] <p>Set to true, if you need to remove UTF-BOM.</p>
770
     * @param bool   $normalize_whitespace          [optional] <p>Set to true, if you need to normalize the
771
     *                                              whitespace.</p>
772
     * @param bool   $normalize_msword              [optional] <p>Set to true, if you need to normalize MS Word chars
773
     *                                              e.g.: "…"
774
     *                                              => "..."</p>
775
     * @param bool   $keep_non_breaking_space       [optional] <p>Set to true, to keep non-breaking-spaces, in
776
     *                                              combination with
777
     *                                              $normalize_whitespace</p>
778
     * @param bool   $replace_diamond_question_mark [optional] <p>Set to true, if you need to remove diamond question
779
     *                                              mark e.g.: "�"</p>
780
     * @param bool   $remove_invisible_characters   [optional] <p>Set to false, if you not want to remove invisible
781
     *                                              characters e.g.: "\0"</p>
782
     *
783
     * @return string clean UTF-8 encoded string
784
     */
785 114
    public static function clean(
786
        string $str,
787
        bool $remove_bom = false,
788
        bool $normalize_whitespace = false,
789
        bool $normalize_msword = false,
790
        bool $keep_non_breaking_space = false,
791
        bool $replace_diamond_question_mark = false,
792
        bool $remove_invisible_characters = true
793
    ): string {
794
        // http://stackoverflow.com/questions/1401317/remove-non-utf8-characters-from-string
795
        // caused connection reset problem on larger strings
796
797 114
        $regex = '/
798
          (
799
            (?: [\x00-\x7F]               # single-byte sequences   0xxxxxxx
800
            |   [\xC0-\xDF][\x80-\xBF]    # double-byte sequences   110xxxxx 10xxxxxx
801
            |   [\xE0-\xEF][\x80-\xBF]{2} # triple-byte sequences   1110xxxx 10xxxxxx * 2
802
            |   [\xF0-\xF7][\x80-\xBF]{3} # quadruple-byte sequence 11110xxx 10xxxxxx * 3
803
            ){1,100}                      # ...one or more times
804
          )
805
        | ( [\x80-\xBF] )                 # invalid byte in range 10000000 - 10111111
806
        | ( [\xC0-\xFF] )                 # invalid byte in range 11000000 - 11111111
807
        /x';
808
        /** @noinspection NotOptimalRegularExpressionsInspection */
809 114
        $str = (string) \preg_replace($regex, '$1', $str);
810
811 114
        if ($replace_diamond_question_mark === true) {
812 60
            $str = self::replace_diamond_question_mark($str, '');
813
        }
814
815 114
        if ($remove_invisible_characters === true) {
816 114
            $str = self::remove_invisible_characters($str);
817
        }
818
819 114
        if ($normalize_whitespace === true) {
820 64
            $str = self::normalize_whitespace($str, $keep_non_breaking_space);
821
        }
822
823 114
        if ($normalize_msword === true) {
824 32
            $str = self::normalize_msword($str);
825
        }
826
827 114
        if ($remove_bom === true) {
828 64
            $str = self::remove_bom($str);
829
        }
830
831 114
        return $str;
832
    }
833
834
    /**
835
     * Clean-up a and show only printable UTF-8 chars at the end  + fix UTF-8 encoding.
836
     *
837
     * @param string $str <p>The input string.</p>
838
     *
839
     * @return string
840
     */
841 33
    public static function cleanup($str): string
842
    {
843
        // init
844 33
        $str = (string) $str;
845
846 33
        if ($str === '') {
847 5
            return '';
848
        }
849
850
        // fixed ISO <-> UTF-8 Errors
851 33
        $str = self::fix_simple_utf8($str);
852
853
        // remove all none UTF-8 symbols
854
        // && remove diamond question mark (�)
855
        // && remove remove invisible characters (e.g. "\0")
856
        // && remove BOM
857
        // && normalize whitespace chars (but keep non-breaking-spaces)
858 33
        return self::clean(
859 33
            $str,
860 33
            true,
861 33
            true,
862 33
            false,
863 33
            true,
864 33
            true,
865 33
            true
866
        );
867
    }
868
869
    /**
870
     * Accepts a string or a array of strings and returns an array of Unicode code points.
871
     *
872
     * INFO: opposite to UTF8::string()
873
     *
874
     * @param string|string[] $arg     <p>A UTF-8 encoded string or an array of such strings.</p>
875
     * @param bool            $u_style <p>If True, will return code points in U+xxxx format,
876
     *                                 default, code points will be returned as integers.</p>
877
     *
878
     * @return array<int|string>
879
     *                           The array of code points:<br>
880
     *                           array<int> for $u_style === false<br>
881
     *                           array<string> for $u_style === true<br>
882
     */
883 12
    public static function codepoints($arg, bool $u_style = false): array
884
    {
885 12
        if (\is_string($arg) === true) {
886 12
            $arg = self::str_split($arg);
887
        }
888
889 12
        $arg = \array_map(
890
            [
891 12
                self::class,
892
                'ord',
893
            ],
894 12
            $arg
895
        );
896
897 12
        if (\count($arg) === 0) {
898 7
            return [];
899
        }
900
901 11
        if ($u_style === true) {
902 2
            $arg = \array_map(
903
                [
904 2
                    self::class,
905
                    'int_to_hex',
906
                ],
907 2
                $arg
908
            );
909
        }
910
911 11
        return $arg;
912
    }
913
914
    /**
915
     * Trims the string and replaces consecutive whitespace characters with a
916
     * single space. This includes tabs and newline characters, as well as
917
     * multibyte whitespace such as the thin space and ideographic space.
918
     *
919
     * @param string $str <p>The input string.</p>
920
     *
921
     * @return string string with a trimmed $str and condensed whitespace
922
     */
923 13
    public static function collapse_whitespace(string $str): string
924
    {
925 13
        if (self::$SUPPORT['mbstring'] === true) {
926
            /** @noinspection PhpComposerExtensionStubsInspection */
927 13
            return \trim((string) \mb_ereg_replace('[[:space:]]+', ' ', $str));
928
        }
929
930
        return \trim(self::regex_replace($str, '[[:space:]]+', ' '));
931
    }
932
933
    /**
934
     * Returns count of characters used in a string.
935
     *
936
     * @param string $str                <p>The input string.</p>
937
     * @param bool   $cleanUtf8          [optional] <p>Remove non UTF-8 chars from the string.</p>
938
     * @param bool   $tryToUseMbFunction [optional] <p>Set to false, if you don't want to use
939
     *
940
     * @return int[] an associative array of Character as keys and
941
     *               their count as values
942
     */
943 19
    public static function count_chars(
944
        string $str,
945
        bool $cleanUtf8 = false,
946
        bool $tryToUseMbFunction = true
947
    ): array {
948 19
        return \array_count_values(
949 19
            self::str_split(
950 19
                $str,
951 19
                1,
952 19
                $cleanUtf8,
953 19
                $tryToUseMbFunction
954
            )
955
        );
956
    }
957
958
    /**
959
     * Remove css media-queries.
960
     *
961
     * @param string $str
962
     *
963
     * @return string
964
     */
965 1
    public static function css_stripe_media_queries(string $str): string
966
    {
967 1
        return (string) \preg_replace(
968 1
            '#@media\\s+(?:only\\s)?(?:[\\s{\\(]|screen|all)\\s?[^{]+{.*}\\s*}\\s*#isumU',
969 1
            '',
970 1
            $str
971
        );
972
    }
973
974
    /**
975
     * Checks whether ctype is available on the server.
976
     *
977
     * @return bool
978
     *              <strong>true</strong> if available, <strong>false</strong> otherwise
979
     */
980
    public static function ctype_loaded(): bool
981
    {
982
        return \extension_loaded('ctype');
983
    }
984
985
    /**
986
     * Converts a int-value into an UTF-8 character.
987
     *
988
     * @param mixed $int
989
     *
990
     * @return string
991
     */
992 19
    public static function decimal_to_chr($int): string
993
    {
994 19
        return self::html_entity_decode('&#' . $int . ';', \ENT_QUOTES | \ENT_HTML5);
995
    }
996
997
    /**
998
     * Decodes a MIME header field
999
     *
1000
     * @param string $str
1001
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
1002
     *
1003
     * @return false|string
1004
     *                      A decoded MIME field on success,
1005
     *                      or false if an error occurs during the decoding
1006
     */
1007
    public static function decode_mimeheader($str, string $encoding = 'UTF-8')
1008
    {
1009
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
1010
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
1011
        }
1012
1013
        if (self::$SUPPORT['iconv'] === true) {
1014
            return \iconv_mime_decode($str, \ICONV_MIME_DECODE_CONTINUE_ON_ERROR, $encoding);
1015
        }
1016
1017
        if ($encoding !== 'UTF-8') {
1018
            $str = self::encode($encoding, $str);
1019
        }
1020
1021
        return \mb_decode_mimeheader($str);
1022
    }
1023
1024
    /**
1025
     * Decodes a string which was encoded by "UTF8::emoji_encode()".
1026
     *
1027
     * @param string $str                        <p>The input string.</p>
1028
     * @param bool   $useReversibleStringMapping [optional] <p>
1029
     *                                           When <b>TRUE</b>, we se a reversible string mapping
1030
     *                                           between "emoji_encode" and "emoji_decode".</p>
1031
     *
1032
     * @return string
1033
     */
1034 9
    public static function emoji_decode(string $str, bool $useReversibleStringMapping = false): string
1035
    {
1036 9
        self::initEmojiData();
1037
1038 9
        if ($useReversibleStringMapping === true) {
1039 9
            return (string) \str_replace(
1040 9
                (array) self::$EMOJI_KEYS_REVERSIBLE_CACHE,
1041 9
                (array) self::$EMOJI_VALUES_CACHE,
1042 9
                $str
1043
            );
1044
        }
1045
1046 1
        return (string) \str_replace(
1047 1
            (array) self::$EMOJI_KEYS_CACHE,
1048 1
            (array) self::$EMOJI_VALUES_CACHE,
1049 1
            $str
1050
        );
1051
    }
1052
1053
    /**
1054
     * Encode a string with emoji chars into a non-emoji string.
1055
     *
1056
     * @param string $str                        <p>The input string</p>
1057
     * @param bool   $useReversibleStringMapping [optional] <p>
1058
     *                                           when <b>TRUE</b>, we se a reversible string mapping
1059
     *                                           between "emoji_encode" and "emoji_decode"</p>
1060
     *
1061
     * @return string
1062
     */
1063 12
    public static function emoji_encode(string $str, bool $useReversibleStringMapping = false): string
1064
    {
1065 12
        self::initEmojiData();
1066
1067 12
        if ($useReversibleStringMapping === true) {
1068 9
            return (string) \str_replace(
1069 9
                (array) self::$EMOJI_VALUES_CACHE,
1070 9
                (array) self::$EMOJI_KEYS_REVERSIBLE_CACHE,
1071 9
                $str
1072
            );
1073
        }
1074
1075 4
        return (string) \str_replace(
1076 4
            (array) self::$EMOJI_VALUES_CACHE,
1077 4
            (array) self::$EMOJI_KEYS_CACHE,
1078 4
            $str
1079
        );
1080
    }
1081
1082
    /**
1083
     * Encode a string with a new charset-encoding.
1084
     *
1085
     * INFO:  The different to "UTF8::utf8_encode()" is that this function, try to fix also broken / double encoding,
1086
     *        so you can call this function also on a UTF-8 String and you don't mess the string.
1087
     *
1088
     * @param string $toEncoding             <p>e.g. 'UTF-16', 'UTF-8', 'ISO-8859-1', etc.</p>
1089
     * @param string $str                    <p>The input string</p>
1090
     * @param bool   $autodetectFromEncoding [optional] <p>Force the new encoding (we try to fix broken / double
1091
     *                                       encoding for UTF-8)<br> otherwise we auto-detect the current
1092
     *                                       string-encoding</p>
1093
     * @param string $fromEncoding           [optional] <p>e.g. 'UTF-16', 'UTF-8', 'ISO-8859-1', etc.<br>
1094
     *                                       A empty string will trigger the autodetect anyway.</p>
1095
     *
1096
     * @return string
1097
     *
1098
     * @psalm-suppress InvalidReturnStatement
1099
     */
1100 28
    public static function encode(
1101
        string $toEncoding,
1102
        string $str,
1103
        bool $autodetectFromEncoding = true,
1104
        string $fromEncoding = ''
1105
    ): string {
1106 28
        if ($str === '' || $toEncoding === '') {
1107 13
            return $str;
1108
        }
1109
1110 28
        if ($toEncoding !== 'UTF-8' && $toEncoding !== 'CP850') {
1111 7
            $toEncoding = self::normalize_encoding($toEncoding, 'UTF-8');
1112
        }
1113
1114 28
        if ($fromEncoding && $fromEncoding !== 'UTF-8' && $fromEncoding !== 'CP850') {
1115 2
            $fromEncoding = self::normalize_encoding($fromEncoding, null);
1116
        }
1117
1118
        if (
1119 28
            $toEncoding
1120
            &&
1121 28
            $fromEncoding
1122
            &&
1123 28
            $fromEncoding === $toEncoding
1124
        ) {
1125
            return $str;
1126
        }
1127
1128 28
        if ($toEncoding === 'JSON') {
1129 1
            $return = self::json_encode($str);
1130 1
            if ($return === false) {
1131
                throw new \InvalidArgumentException('The input string [' . $str . '] can not be used for json_encode().');
1132
            }
1133
1134 1
            return $return;
1135
        }
1136 28
        if ($fromEncoding === 'JSON') {
1137 1
            $str = self::json_decode($str);
1138 1
            $fromEncoding = '';
1139
        }
1140
1141 28
        if ($toEncoding === 'BASE64') {
1142 2
            return \base64_encode($str);
1143
        }
1144 28
        if ($fromEncoding === 'BASE64') {
1145 2
            $str = \base64_decode($str, true);
1146 2
            $fromEncoding = '';
1147
        }
1148
1149 28
        if ($toEncoding === 'HTML-ENTITIES') {
1150 2
            return self::html_encode($str, true, 'UTF-8');
1151
        }
1152 28
        if ($fromEncoding === 'HTML-ENTITIES') {
1153 2
            $str = self::html_decode($str, \ENT_COMPAT, 'UTF-8');
1154 2
            $fromEncoding = '';
1155
        }
1156
1157 28
        $fromEncodingDetected = false;
1158
        if (
1159 28
            $autodetectFromEncoding === true
1160
            ||
1161 28
            !$fromEncoding
1162
        ) {
1163 28
            $fromEncodingDetected = self::str_detect_encoding($str);
1164
        }
1165
1166
        // DEBUG
1167
        //var_dump($toEncoding, $fromEncoding, $fromEncodingDetected, $str, "\n\n");
1168
1169 28
        if ($fromEncodingDetected !== false) {
1170 24
            $fromEncoding = $fromEncodingDetected;
1171 7
        } elseif ($autodetectFromEncoding === true) {
1172
            // fallback for the "autodetect"-mode
1173 7
            return self::to_utf8($str);
1174
        }
1175
1176
        if (
1177 24
            !$fromEncoding
1178
            ||
1179 24
            $fromEncoding === $toEncoding
1180
        ) {
1181 15
            return $str;
1182
        }
1183
1184
        if (
1185 19
            $toEncoding === 'UTF-8'
1186
            &&
1187
            (
1188 17
                $fromEncoding === 'WINDOWS-1252'
1189
                ||
1190 19
                $fromEncoding === 'ISO-8859-1'
1191
            )
1192
        ) {
1193 13
            return self::to_utf8($str);
1194
        }
1195
1196
        if (
1197 12
            $toEncoding === 'ISO-8859-1'
1198
            &&
1199
            (
1200 6
                $fromEncoding === 'WINDOWS-1252'
1201
                ||
1202 12
                $fromEncoding === 'UTF-8'
1203
            )
1204
        ) {
1205 6
            return self::to_iso8859($str);
1206
        }
1207
1208
        if (
1209 10
            $toEncoding !== 'UTF-8'
1210
            &&
1211 10
            $toEncoding !== 'ISO-8859-1'
1212
            &&
1213 10
            $toEncoding !== 'WINDOWS-1252'
1214
            &&
1215 10
            self::$SUPPORT['mbstring'] === false
1216
        ) {
1217
            \trigger_error('UTF8::encode() without mbstring cannot handle "' . $toEncoding . '" encoding', \E_USER_WARNING);
1218
        }
1219
1220 10
        if (self::$SUPPORT['mbstring'] === true) {
1221
            // warning: do not use the symfony polyfill here
1222 10
            $strEncoded = \mb_convert_encoding(
1223 10
                $str,
1224 10
                $toEncoding,
1225 10
                $fromEncoding
1226
            );
1227
1228 10
            if ($strEncoded) {
1229 10
                return $strEncoded;
1230
            }
1231
        }
1232
1233
        $return = \iconv($fromEncoding, $toEncoding, $str);
1234
        if ($return !== false) {
1235
            return $return;
1236
        }
1237
1238
        return $str;
1239
    }
1240
1241
    /**
1242
     * @param string $str
1243
     * @param string $fromCharset      [optional] <p>Set the input charset.</p>
1244
     * @param string $toCharset        [optional] <p>Set the output charset.</p>
1245
     * @param string $transferEncoding [optional] <p>Set the transfer encoding.</p>
1246
     * @param string $linefeed         [optional] <p>Set the used linefeed.</p>
1247
     * @param int    $indent           [optional] <p>Set the max length indent.</p>
1248
     *
1249
     * @return false|string
1250
     *                      An encoded MIME field on success,
1251
     *                      or false if an error occurs during the encoding
1252
     */
1253
    public static function encode_mimeheader(
1254
        $str,
1255
        $fromCharset = 'UTF-8',
1256
        $toCharset = 'UTF-8',
1257
        $transferEncoding = 'Q',
1258
        $linefeed = '\\r\\n',
1259
        $indent = 76
1260
    ) {
1261
        if ($fromCharset !== 'UTF-8' && $fromCharset !== 'CP850') {
1262
            $fromCharset = self::normalize_encoding($fromCharset, 'UTF-8');
1263
        }
1264
1265
        if ($toCharset !== 'UTF-8' && $toCharset !== 'CP850') {
1266
            $toCharset = self::normalize_encoding($toCharset, 'UTF-8');
1267
        }
1268
1269
        return \iconv_mime_encode(
1270
            '',
1271
            $str,
1272
            [
1273
                'scheme'           => $transferEncoding,
1274
                'line-length'      => $indent,
1275
                'input-charset'    => $fromCharset,
1276
                'output-charset'   => $toCharset,
1277
                'line-break-chars' => $linefeed,
1278
            ]
1279
        );
1280
    }
1281
1282
    /**
1283
     * Create an extract from a sentence, so if the search-string was found, it try to centered in the output.
1284
     *
1285
     * @param string   $str                    <p>The input string.</p>
1286
     * @param string   $search                 <p>The searched string.</p>
1287
     * @param int|null $length                 [optional] <p>Default: null === text->length / 2</p>
1288
     * @param string   $replacerForSkippedText [optional] <p>Default: …</p>
1289
     * @param string   $encoding               [optional] <p>Set the charset for e.g. "mb_" function</p>
1290
     *
1291
     * @return string
1292
     */
1293 1
    public static function extract_text(
1294
        string $str,
1295
        string $search = '',
1296
        int $length = null,
1297
        string $replacerForSkippedText = '…',
1298
        string $encoding = 'UTF-8'
1299
    ): string {
1300 1
        if ($str === '') {
1301 1
            return '';
1302
        }
1303
1304 1
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
1305
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
1306
        }
1307
1308 1
        $trimChars = "\t\r\n -_()!~?=+/*\\,.:;\"'[]{}`&";
1309
1310 1
        if ($length === null) {
1311 1
            $length = (int) \round((int) self::strlen($str, $encoding) / 2, 0);
1312
        }
1313
1314 1
        if ($search === '') {
1315 1
            if ($encoding === 'UTF-8') {
1316 1
                if ($length > 0) {
1317 1
                    $stringLength = (int) \mb_strlen($str);
1318 1
                    $end = ($length - 1) > $stringLength ? $stringLength : ($length - 1);
1319
                } else {
1320 1
                    $end = 0;
1321
                }
1322
1323 1
                $pos = (int) \min(
1324 1
                    \mb_strpos($str, ' ', $end),
1325 1
                    \mb_strpos($str, '.', $end)
1326
                );
1327
            } else {
1328
                if ($length > 0) {
1329
                    $stringLength = (int) self::strlen($str, $encoding);
1330
                    $end = ($length - 1) > $stringLength ? $stringLength : ($length - 1);
1331
                } else {
1332
                    $end = 0;
1333
                }
1334
1335
                $pos = (int) \min(
1336
                    self::strpos($str, ' ', $end, $encoding),
1337
                    self::strpos($str, '.', $end, $encoding)
1338
                );
1339
            }
1340
1341 1
            if ($pos) {
1342 1
                if ($encoding === 'UTF-8') {
1343 1
                    $strSub = \mb_substr($str, 0, $pos);
1344
                } else {
1345
                    $strSub = self::substr($str, 0, $pos, $encoding);
1346
                }
1347
1348 1
                if ($strSub === false) {
1349
                    return '';
1350
                }
1351
1352 1
                return \rtrim($strSub, $trimChars) . $replacerForSkippedText;
1353
            }
1354
1355
            return $str;
1356
        }
1357
1358 1
        if ($encoding === 'UTF-8') {
1359 1
            $wordPos = (int) \mb_stripos($str, $search);
1360 1
            $halfSide = (int) ($wordPos - $length / 2 + (int) \mb_strlen($search) / 2);
1361
        } else {
1362
            $wordPos = (int) self::stripos($str, $search, 0, $encoding);
1363
            $halfSide = (int) ($wordPos - $length / 2 + (int) self::strlen($search, $encoding) / 2);
1364
        }
1365
1366 1
        $pos_start = 0;
1367 1
        if ($halfSide > 0) {
1368 1
            if ($encoding === 'UTF-8') {
1369 1
                $halfText = \mb_substr($str, 0, $halfSide);
1370
            } else {
1371
                $halfText = self::substr($str, 0, $halfSide, $encoding);
1372
            }
1373 1
            if ($halfText !== false) {
1374 1
                if ($encoding === 'UTF-8') {
1375 1
                    $pos_start = (int) \max(
1376 1
                        \mb_strrpos($halfText, ' '),
1377 1
                        \mb_strrpos($halfText, '.')
1378
                    );
1379
                } else {
1380
                    $pos_start = (int) \max(
1381
                        self::strrpos($halfText, ' ', 0, $encoding),
1382
                        self::strrpos($halfText, '.', 0, $encoding)
1383
                    );
1384
                }
1385
            }
1386
        }
1387
1388 1
        if ($wordPos && $halfSide > 0) {
1389 1
            $offset = $pos_start + $length - 1;
1390 1
            $realLength = (int) self::strlen($str, $encoding);
1391
1392 1
            if ($offset > $realLength) {
1393
                $offset = $realLength;
1394
            }
1395
1396 1
            if ($encoding === 'UTF-8') {
1397 1
                $pos_end = (int) \min(
1398 1
                    \mb_strpos($str, ' ', $offset),
1399 1
                    \mb_strpos($str, '.', $offset)
1400 1
                    ) - $pos_start;
1401
            } else {
1402
                $pos_end = (int) \min(
1403
                    self::strpos($str, ' ', $offset, $encoding),
1404
                    self::strpos($str, '.', $offset, $encoding)
1405
                    ) - $pos_start;
1406
            }
1407
1408 1
            if (!$pos_end || $pos_end <= 0) {
1409 1
                if ($encoding === 'UTF-8') {
1410 1
                    $strSub = \mb_substr($str, $pos_start, (int) \mb_strlen($str));
1411
                } else {
1412
                    $strSub = self::substr($str, $pos_start, (int) self::strlen($str, $encoding), $encoding);
1413
                }
1414 1
                if ($strSub !== false) {
1415 1
                    $extract = $replacerForSkippedText . \ltrim($strSub, $trimChars);
1416
                } else {
1417 1
                    $extract = '';
1418
                }
1419
            } else {
1420 1
                if ($encoding === 'UTF-8') {
1421 1
                    $strSub = \mb_substr($str, $pos_start, $pos_end);
1422
                } else {
1423
                    $strSub = self::substr($str, $pos_start, $pos_end, $encoding);
1424
                }
1425 1
                if ($strSub !== false) {
1426 1
                    $extract = $replacerForSkippedText . \trim($strSub, $trimChars) . $replacerForSkippedText;
1427
                } else {
1428 1
                    $extract = '';
1429
                }
1430
            }
1431
        } else {
1432 1
            $offset = $length - 1;
1433 1
            $trueLength = (int) self::strlen($str, $encoding);
1434
1435 1
            if ($offset > $trueLength) {
1436
                $offset = $trueLength;
1437
            }
1438
1439 1
            if ($encoding === 'UTF-8') {
1440 1
                $pos_end = (int) \min(
1441 1
                    \mb_strpos($str, ' ', $offset),
1442 1
                    \mb_strpos($str, '.', $offset)
1443
                );
1444
            } else {
1445
                $pos_end = (int) \min(
1446
                    self::strpos($str, ' ', $offset, $encoding),
1447
                    self::strpos($str, '.', $offset, $encoding)
1448
                );
1449
            }
1450
1451 1
            if ($pos_end) {
1452 1
                if ($encoding === 'UTF-8') {
1453 1
                    $strSub = \mb_substr($str, 0, $pos_end);
1454
                } else {
1455
                    $strSub = self::substr($str, 0, $pos_end, $encoding);
1456
                }
1457 1
                if ($strSub !== false) {
1458 1
                    $extract = \rtrim($strSub, $trimChars) . $replacerForSkippedText;
1459
                } else {
1460 1
                    $extract = '';
1461
                }
1462
            } else {
1463 1
                $extract = $str;
1464
            }
1465
        }
1466
1467 1
        return $extract;
1468
    }
1469
1470
    /**
1471
     * Reads entire file into a string.
1472
     *
1473
     * WARNING: do not use UTF-8 Option ($convertToUtf8) for binary-files (e.g.: images) !!!
1474
     *
1475
     * @see http://php.net/manual/en/function.file-get-contents.php
1476
     *
1477
     * @param string        $filename         <p>
1478
     *                                        Name of the file to read.
1479
     *                                        </p>
1480
     * @param bool          $use_include_path [optional] <p>
1481
     *                                        Prior to PHP 5, this parameter is called
1482
     *                                        use_include_path and is a bool.
1483
     *                                        As of PHP 5 the FILE_USE_INCLUDE_PATH can be used
1484
     *                                        to trigger include path
1485
     *                                        search.
1486
     *                                        </p>
1487
     * @param resource|null $context          [optional] <p>
1488
     *                                        A valid context resource created with
1489
     *                                        stream_context_create. If you don't need to use a
1490
     *                                        custom context, you can skip this parameter by &null;.
1491
     *                                        </p>
1492
     * @param int|null      $offset           [optional] <p>
1493
     *                                        The offset where the reading starts.
1494
     *                                        </p>
1495
     * @param int|null      $maxLength        [optional] <p>
1496
     *                                        Maximum length of data read. The default is to read until end
1497
     *                                        of file is reached.
1498
     *                                        </p>
1499
     * @param int           $timeout          <p>The time in seconds for the timeout.</p>
1500
     * @param bool          $convertToUtf8    <strong>WARNING!!!</strong> <p>Maybe you can't use this option for
1501
     *                                        some files, because they used non default utf-8 chars. Binary files
1502
     *                                        like images or pdf will not be converted.</p>
1503
     * @param string        $fromEncoding     [optional] <p>e.g. 'UTF-16', 'UTF-8', 'ISO-8859-1', etc.<br>
1504
     *                                        A empty string will trigger the autodetect anyway.</p>
1505
     *
1506
     * @return false|string the function returns the read data as string or <b>false</b> on failure
1507
     */
1508 12
    public static function file_get_contents(
1509
        string $filename,
1510
        bool $use_include_path = false,
1511
        $context = null,
1512
        int $offset = null,
1513
        int $maxLength = null,
1514
        int $timeout = 10,
1515
        bool $convertToUtf8 = true,
1516
        string $fromEncoding = ''
1517
    ) {
1518
        // init
1519 12
        $filename = \filter_var($filename, \FILTER_SANITIZE_STRING);
1520 12
        if ($filename === false) {
1521
            return false;
1522
        }
1523
1524 12
        if ($timeout && $context === null) {
1525 9
            $context = \stream_context_create(
1526
                [
1527
                    'http' => [
1528 9
                        'timeout' => $timeout,
1529
                    ],
1530
                ]
1531
            );
1532
        }
1533
1534 12
        if ($offset === null) {
1535 12
            $offset = 0;
1536
        }
1537
1538 12
        if (\is_int($maxLength) === true) {
1539 2
            $data = \file_get_contents($filename, $use_include_path, $context, $offset, $maxLength);
1540
        } else {
1541 12
            $data = \file_get_contents($filename, $use_include_path, $context, $offset);
1542
        }
1543
1544
        // return false on error
1545 12
        if ($data === false) {
1546
            return false;
1547
        }
1548
1549 12
        if ($convertToUtf8 === true) {
1550
            if (
1551 12
                self::is_binary($data, true) !== true
1552
                ||
1553 9
                self::is_utf16($data, false) !== false
1554
                ||
1555 12
                self::is_utf32($data, false) !== false
1556
            ) {
1557 9
                $data = self::encode('UTF-8', $data, false, $fromEncoding);
1558 9
                $data = self::cleanup($data);
1559
            }
1560
        }
1561
1562 12
        return $data;
1563
    }
1564
1565
    /**
1566
     * Checks if a file starts with BOM (Byte Order Mark) character.
1567
     *
1568
     * @param string $file_path <p>Path to a valid file.</p>
1569
     *
1570
     * @throws \RuntimeException if file_get_contents() returned false
1571
     *
1572
     * @return bool
1573
     *              <strong>true</strong> if the file has BOM at the start, <strong>false</strong> otherwise
1574
     */
1575 2
    public static function file_has_bom(string $file_path): bool
1576
    {
1577 2
        $file_content = \file_get_contents($file_path);
1578 2
        if ($file_content === false) {
1579
            throw new \RuntimeException('file_get_contents() returned false for:' . $file_path);
1580
        }
1581
1582 2
        return self::string_has_bom($file_content);
1583
    }
1584
1585
    /**
1586
     * Normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1587
     *
1588
     * @param mixed  $var
1589
     * @param int    $normalization_form
1590
     * @param string $leading_combining
1591
     *
1592
     * @return mixed
1593
     */
1594 62
    public static function filter($var, int $normalization_form = \Normalizer::NFC, string $leading_combining = '◌')
1595
    {
1596 62
        switch (\gettype($var)) {
1597 62
            case 'array':
1598
                /** @noinspection ForeachSourceInspection */
1599 6
                foreach ($var as $k => &$v) {
1600 6
                    $v = self::filter($v, $normalization_form, $leading_combining);
1601
                }
1602 6
                unset($v);
1603
1604 6
                break;
1605 62
            case 'object':
1606
                /** @noinspection ForeachSourceInspection */
1607 4
                foreach ($var as $k => &$v) {
1608 4
                    $v = self::filter($v, $normalization_form, $leading_combining);
1609
                }
1610 4
                unset($v);
1611
1612 4
                break;
1613 62
            case 'string':
1614
1615 62
                if (\strpos($var, "\r") !== false) {
1616
                    // Workaround https://bugs.php.net/65732
1617 3
                    $var = self::normalize_line_ending($var);
1618
                }
1619
1620 62
                if (self::is_ascii($var) === false) {
1621 32
                    if (\Normalizer::isNormalized($var, $normalization_form)) {
1622 27
                        $n = '-';
1623
                    } else {
1624 12
                        $n = \Normalizer::normalize($var, $normalization_form);
1625
1626 12
                        if (isset($n[0])) {
1627 7
                            $var = $n;
1628
                        } else {
1629 8
                            $var = self::encode('UTF-8', $var, true);
1630
                        }
1631
                    }
1632
1633
                    if (
1634 32
                        $var[0] >= "\x80"
1635
                        &&
1636 32
                        isset($n[0], $leading_combining[0])
1637
                        &&
1638 32
                        \preg_match('/^\\p{Mn}/u', $var)
1639
                    ) {
1640
                        // Prevent leading combining chars
1641
                        // for NFC-safe concatenations.
1642 3
                        $var = $leading_combining . $var;
1643
                    }
1644
                }
1645
1646 62
                break;
1647
        }
1648
1649 62
        return $var;
1650
    }
1651
1652
    /**
1653
     * "filter_input()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1654
     *
1655
     * Gets a specific external variable by name and optionally filters it
1656
     *
1657
     * @see http://php.net/manual/en/function.filter-input.php
1658
     *
1659
     * @param int    $type          <p>
1660
     *                              One of <b>INPUT_GET</b>, <b>INPUT_POST</b>,
1661
     *                              <b>INPUT_COOKIE</b>, <b>INPUT_SERVER</b>, or
1662
     *                              <b>INPUT_ENV</b>.
1663
     *                              </p>
1664
     * @param string $variable_name <p>
1665
     *                              Name of a variable to get.
1666
     *                              </p>
1667
     * @param int    $filter        [optional] <p>
1668
     *                              The ID of the filter to apply. The
1669
     *                              manual page lists the available filters.
1670
     *                              </p>
1671
     * @param mixed  $options       [optional] <p>
1672
     *                              Associative array of options or bitwise disjunction of flags. If filter
1673
     *                              accepts options, flags can be provided in "flags" field of array.
1674
     *                              </p>
1675
     *
1676
     * @return mixed Value of the requested variable on success, <b>FALSE</b> if the filter fails, or <b>NULL</b> if the
1677
     *               <i>variable_name</i> variable is not set. If the flag <b>FILTER_NULL_ON_FAILURE</b> is used, it
1678
     *               returns <b>FALSE</b> if the variable is not set and <b>NULL</b> if the filter fails.
1679
     */
1680
    public static function filter_input(
1681
        int $type,
1682
        string $variable_name,
1683
        int $filter = \FILTER_DEFAULT,
1684
        $options = null
1685
    ) {
1686
        if (\func_num_args() < 4) {
1687
            $var = \filter_input($type, $variable_name, $filter);
1688
        } else {
1689
            $var = \filter_input($type, $variable_name, $filter, $options);
1690
        }
1691
1692
        return self::filter($var);
1693
    }
1694
1695
    /**
1696
     * "filter_input_array()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1697
     *
1698
     * Gets external variables and optionally filters them
1699
     *
1700
     * @see http://php.net/manual/en/function.filter-input-array.php
1701
     *
1702
     * @param int   $type       <p>
1703
     *                          One of <b>INPUT_GET</b>, <b>INPUT_POST</b>,
1704
     *                          <b>INPUT_COOKIE</b>, <b>INPUT_SERVER</b>, or
1705
     *                          <b>INPUT_ENV</b>.
1706
     *                          </p>
1707
     * @param mixed $definition [optional] <p>
1708
     *                          An array defining the arguments. A valid key is a string
1709
     *                          containing a variable name and a valid value is either a filter type, or an array
1710
     *                          optionally specifying the filter, flags and options. If the value is an
1711
     *                          array, valid keys are filter which specifies the
1712
     *                          filter type,
1713
     *                          flags which specifies any flags that apply to the
1714
     *                          filter, and options which specifies any options that
1715
     *                          apply to the filter. See the example below for a better understanding.
1716
     *                          </p>
1717
     *                          <p>
1718
     *                          This parameter can be also an integer holding a filter constant. Then all values in the
1719
     *                          input array are filtered by this filter.
1720
     *                          </p>
1721
     * @param bool  $add_empty  [optional] <p>
1722
     *                          Add missing keys as <b>NULL</b> to the return value.
1723
     *                          </p>
1724
     *
1725
     * @return mixed An array containing the values of the requested variables on success, or <b>FALSE</b> on failure.
1726
     *               An array value will be <b>FALSE</b> if the filter fails, or <b>NULL</b> if the variable is not
1727
     *               set. Or if the flag <b>FILTER_NULL_ON_FAILURE</b> is used, it returns <b>FALSE</b> if the variable
1728
     *               is not set and <b>NULL</b> if the filter fails.
1729
     */
1730
    public static function filter_input_array(int $type, $definition = null, bool $add_empty = true)
1731
    {
1732
        if (\func_num_args() < 2) {
1733
            $a = \filter_input_array($type);
1734
        } else {
1735
            $a = \filter_input_array($type, $definition, $add_empty);
1736
        }
1737
1738
        return self::filter($a);
1739
    }
1740
1741
    /**
1742
     * "filter_var()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1743
     *
1744
     * Filters a variable with a specified filter
1745
     *
1746
     * @see http://php.net/manual/en/function.filter-var.php
1747
     *
1748
     * @param mixed $variable <p>
1749
     *                        Value to filter.
1750
     *                        </p>
1751
     * @param int   $filter   [optional] <p>
1752
     *                        The ID of the filter to apply. The
1753
     *                        manual page lists the available filters.
1754
     *                        </p>
1755
     * @param mixed $options  [optional] <p>
1756
     *                        Associative array of options or bitwise disjunction of flags. If filter
1757
     *                        accepts options, flags can be provided in "flags" field of array. For
1758
     *                        the "callback" filter, callable type should be passed. The
1759
     *                        callback must accept one argument, the value to be filtered, and return
1760
     *                        the value after filtering/sanitizing it.
1761
     *                        </p>
1762
     *                        <p>
1763
     *                        <code>
1764
     *                        // for filters that accept options, use this format
1765
     *                        $options = array(
1766
     *                        'options' => array(
1767
     *                        'default' => 3, // value to return if the filter fails
1768
     *                        // other options here
1769
     *                        'min_range' => 0
1770
     *                        ),
1771
     *                        'flags' => FILTER_FLAG_ALLOW_OCTAL,
1772
     *                        );
1773
     *                        $var = filter_var('0755', FILTER_VALIDATE_INT, $options);
1774
     *                        // for filter that only accept flags, you can pass them directly
1775
     *                        $var = filter_var('oops', FILTER_VALIDATE_BOOLEAN, FILTER_NULL_ON_FAILURE);
1776
     *                        // for filter that only accept flags, you can also pass as an array
1777
     *                        $var = filter_var('oops', FILTER_VALIDATE_BOOLEAN,
1778
     *                        array('flags' => FILTER_NULL_ON_FAILURE));
1779
     *                        // callback validate filter
1780
     *                        function foo($value)
1781
     *                        {
1782
     *                        // Expected format: Surname, GivenNames
1783
     *                        if (strpos($value, ", ") === false) return false;
1784
     *                        list($surname, $givennames) = explode(", ", $value, 2);
1785
     *                        $empty = (empty($surname) || empty($givennames));
1786
     *                        $notstrings = (!is_string($surname) || !is_string($givennames));
1787
     *                        if ($empty || $notstrings) {
1788
     *                        return false;
1789
     *                        } else {
1790
     *                        return $value;
1791
     *                        }
1792
     *                        }
1793
     *                        $var = filter_var('Doe, Jane Sue', FILTER_CALLBACK, array('options' => 'foo'));
1794
     *                        </code>
1795
     *                        </p>
1796
     *
1797
     * @return mixed the filtered data, or <b>FALSE</b> if the filter fails
1798
     */
1799 2
    public static function filter_var($variable, int $filter = \FILTER_DEFAULT, $options = null)
1800
    {
1801 2
        if (\func_num_args() < 3) {
1802 2
            $variable = \filter_var($variable, $filter);
1803
        } else {
1804 2
            $variable = \filter_var($variable, $filter, $options);
1805
        }
1806
1807 2
        return self::filter($variable);
1808
    }
1809
1810
    /**
1811
     * "filter_var_array()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1812
     *
1813
     * Gets multiple variables and optionally filters them
1814
     *
1815
     * @see http://php.net/manual/en/function.filter-var-array.php
1816
     *
1817
     * @param array $data       <p>
1818
     *                          An array with string keys containing the data to filter.
1819
     *                          </p>
1820
     * @param mixed $definition [optional] <p>
1821
     *                          An array defining the arguments. A valid key is a string
1822
     *                          containing a variable name and a valid value is either a
1823
     *                          filter type, or an
1824
     *                          array optionally specifying the filter, flags and options.
1825
     *                          If the value is an array, valid keys are filter
1826
     *                          which specifies the filter type,
1827
     *                          flags which specifies any flags that apply to the
1828
     *                          filter, and options which specifies any options that
1829
     *                          apply to the filter. See the example below for a better understanding.
1830
     *                          </p>
1831
     *                          <p>
1832
     *                          This parameter can be also an integer holding a filter constant. Then all values in the
1833
     *                          input array are filtered by this filter.
1834
     *                          </p>
1835
     * @param bool  $add_empty  [optional] <p>
1836
     *                          Add missing keys as <b>NULL</b> to the return value.
1837
     *                          </p>
1838
     *
1839
     * @return mixed an array containing the values of the requested variables on success, or <b>FALSE</b> on failure.
1840
     *               An array value will be <b>FALSE</b> if the filter fails, or <b>NULL</b> if the variable is not
1841
     *               set
1842
     */
1843 2
    public static function filter_var_array(array $data, $definition = null, bool $add_empty = true)
1844
    {
1845 2
        if (\func_num_args() < 2) {
1846 2
            $a = \filter_var_array($data);
1847
        } else {
1848 2
            $a = \filter_var_array($data, $definition, $add_empty);
1849
        }
1850
1851 2
        return self::filter($a);
1852
    }
1853
1854
    /**
1855
     * Checks whether finfo is available on the server.
1856
     *
1857
     * @return bool
1858
     *              <strong>true</strong> if available, <strong>false</strong> otherwise
1859
     */
1860
    public static function finfo_loaded(): bool
1861
    {
1862
        return \class_exists('finfo');
1863
    }
1864
1865
    /**
1866
     * Returns the first $n characters of the string.
1867
     *
1868
     * @param string $str      <p>The input string.</p>
1869
     * @param int    $n        <p>Number of characters to retrieve from the start.</p>
1870
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
1871
     *
1872
     * @return string
1873
     */
1874 13
    public static function first_char(string $str, int $n = 1, string $encoding = 'UTF-8'): string
1875
    {
1876 13
        if ($str === '' || $n <= 0) {
1877 5
            return '';
1878
        }
1879
1880 8
        if ($encoding === 'UTF-8') {
1881 4
            return (string) \mb_substr($str, 0, $n);
1882
        }
1883
1884 4
        return (string) self::substr($str, 0, $n, $encoding);
1885
    }
1886
1887
    /**
1888
     * Check if the number of unicode characters are not more than the specified integer.
1889
     *
1890
     * @param string $str      the original string to be checked
1891
     * @param int    $box_size the size in number of chars to be checked against string
1892
     *
1893
     * @return bool true if string is less than or equal to $box_size, false otherwise
1894
     */
1895 2
    public static function fits_inside(string $str, int $box_size): bool
1896
    {
1897 2
        return self::strlen($str) <= $box_size;
1898
    }
1899
1900
    /**
1901
     * Try to fix simple broken UTF-8 strings.
1902
     *
1903
     * INFO: Take a look at "UTF8::fix_utf8()" if you need a more advanced fix for broken UTF-8 strings.
1904
     *
1905
     * If you received an UTF-8 string that was converted from Windows-1252 as it was ISO-8859-1
1906
     * (ignoring Windows-1252 chars from 80 to 9F) use this function to fix it.
1907
     * See: http://en.wikipedia.org/wiki/Windows-1252
1908
     *
1909
     * @param string $str <p>The input string</p>
1910
     *
1911
     * @return string
1912
     */
1913 46
    public static function fix_simple_utf8(string $str): string
1914
    {
1915 46
        if ($str === '') {
1916 4
            return '';
1917
        }
1918
1919 46
        static $BROKEN_UTF8_TO_UTF8_KEYS_CACHE = null;
1920 46
        static $BROKEN_UTF8_TO_UTF8_VALUES_CACHE = null;
1921
1922 46
        if ($BROKEN_UTF8_TO_UTF8_KEYS_CACHE === null) {
1923 1
            if (self::$BROKEN_UTF8_FIX === null) {
1924 1
                self::$BROKEN_UTF8_FIX = self::getData('utf8_fix');
1925
            }
1926
1927 1
            $BROKEN_UTF8_TO_UTF8_KEYS_CACHE = \array_keys(self::$BROKEN_UTF8_FIX);
1928 1
            $BROKEN_UTF8_TO_UTF8_VALUES_CACHE = \array_values(self::$BROKEN_UTF8_FIX);
1929
        }
1930
1931 46
        return \str_replace($BROKEN_UTF8_TO_UTF8_KEYS_CACHE, $BROKEN_UTF8_TO_UTF8_VALUES_CACHE, $str);
1932
    }
1933
1934
    /**
1935
     * Fix a double (or multiple) encoded UTF8 string.
1936
     *
1937
     * @param string|string[] $str you can use a string or an array of strings
1938
     *
1939
     * @return string|string[]
1940
     *                         Will return the fixed input-"array" or
1941
     *                         the fixed input-"string"
1942
     *
1943
     * @psalm-suppress InvalidReturnType
1944
     */
1945 2
    public static function fix_utf8($str)
1946
    {
1947 2
        if (\is_array($str) === true) {
1948 2
            foreach ($str as $k => &$v) {
1949 2
                $v = self::fix_utf8($v);
1950
            }
1951 2
            unset($v);
1952
1953
            /**
1954
             * @psalm-suppress InvalidReturnStatement
1955
             */
1956 2
            return $str;
1957
        }
1958
1959 2
        $str = (string) $str;
1960 2
        $last = '';
1961 2
        while ($last !== $str) {
1962 2
            $last = $str;
1963
            /**
1964
             * @psalm-suppress PossiblyInvalidArgument
1965
             */
1966 2
            $str = self::to_utf8(
1967 2
                self::utf8_decode($str, true)
1968
            );
1969
        }
1970
1971
        /**
1972
         * @psalm-suppress InvalidReturnStatement
1973
         */
1974 2
        return $str;
1975
    }
1976
1977
    /**
1978
     * Get character of a specific character.
1979
     *
1980
     * @param string $char
1981
     *
1982
     * @return string 'RTL' or 'LTR'
1983
     */
1984 2
    public static function getCharDirection(string $char): string
1985
    {
1986 2
        if (self::$SUPPORT['intlChar'] === true) {
1987
            /** @noinspection PhpComposerExtensionStubsInspection */
1988 2
            $tmpReturn = \IntlChar::charDirection($char);
1989
1990
            // from "IntlChar"-Class
1991
            $charDirection = [
1992 2
                'RTL' => [1, 13, 14, 15, 21],
1993
                'LTR' => [0, 11, 12, 20],
1994
            ];
1995
1996 2
            if (\in_array($tmpReturn, $charDirection['LTR'], true)) {
1997
                return 'LTR';
1998
            }
1999
2000 2
            if (\in_array($tmpReturn, $charDirection['RTL'], true)) {
2001 2
                return 'RTL';
2002
            }
2003
        }
2004
2005 2
        $c = static::chr_to_decimal($char);
2006
2007 2
        if (!($c >= 0x5be && $c <= 0x10b7f)) {
2008 2
            return 'LTR';
2009
        }
2010
2011 2
        if ($c <= 0x85e) {
2012 2
            if ($c === 0x5be ||
2013 2
                $c === 0x5c0 ||
2014 2
                $c === 0x5c3 ||
2015 2
                $c === 0x5c6 ||
2016 2
                ($c >= 0x5d0 && $c <= 0x5ea) ||
2017 2
                ($c >= 0x5f0 && $c <= 0x5f4) ||
2018 2
                $c === 0x608 ||
2019 2
                $c === 0x60b ||
2020 2
                $c === 0x60d ||
2021 2
                $c === 0x61b ||
2022 2
                ($c >= 0x61e && $c <= 0x64a) ||
2023
                ($c >= 0x66d && $c <= 0x66f) ||
2024
                ($c >= 0x671 && $c <= 0x6d5) ||
2025
                ($c >= 0x6e5 && $c <= 0x6e6) ||
2026
                ($c >= 0x6ee && $c <= 0x6ef) ||
2027
                ($c >= 0x6fa && $c <= 0x70d) ||
2028
                $c === 0x710 ||
2029
                ($c >= 0x712 && $c <= 0x72f) ||
2030
                ($c >= 0x74d && $c <= 0x7a5) ||
2031
                $c === 0x7b1 ||
2032
                ($c >= 0x7c0 && $c <= 0x7ea) ||
2033
                ($c >= 0x7f4 && $c <= 0x7f5) ||
2034
                $c === 0x7fa ||
2035
                ($c >= 0x800 && $c <= 0x815) ||
2036
                $c === 0x81a ||
2037
                $c === 0x824 ||
2038
                $c === 0x828 ||
2039
                ($c >= 0x830 && $c <= 0x83e) ||
2040
                ($c >= 0x840 && $c <= 0x858) ||
2041 2
                $c === 0x85e
2042
            ) {
2043 2
                return 'RTL';
2044
            }
2045 2
        } elseif ($c === 0x200f) {
2046
            return 'RTL';
2047 2
        } elseif ($c >= 0xfb1d) {
2048 2
            if ($c === 0xfb1d ||
2049 2
                ($c >= 0xfb1f && $c <= 0xfb28) ||
2050 2
                ($c >= 0xfb2a && $c <= 0xfb36) ||
2051 2
                ($c >= 0xfb38 && $c <= 0xfb3c) ||
2052 2
                $c === 0xfb3e ||
2053 2
                ($c >= 0xfb40 && $c <= 0xfb41) ||
2054 2
                ($c >= 0xfb43 && $c <= 0xfb44) ||
2055 2
                ($c >= 0xfb46 && $c <= 0xfbc1) ||
2056 2
                ($c >= 0xfbd3 && $c <= 0xfd3d) ||
2057 2
                ($c >= 0xfd50 && $c <= 0xfd8f) ||
2058 2
                ($c >= 0xfd92 && $c <= 0xfdc7) ||
2059 2
                ($c >= 0xfdf0 && $c <= 0xfdfc) ||
2060 2
                ($c >= 0xfe70 && $c <= 0xfe74) ||
2061 2
                ($c >= 0xfe76 && $c <= 0xfefc) ||
2062 2
                ($c >= 0x10800 && $c <= 0x10805) ||
2063 2
                $c === 0x10808 ||
2064 2
                ($c >= 0x1080a && $c <= 0x10835) ||
2065 2
                ($c >= 0x10837 && $c <= 0x10838) ||
2066 2
                $c === 0x1083c ||
2067 2
                ($c >= 0x1083f && $c <= 0x10855) ||
2068 2
                ($c >= 0x10857 && $c <= 0x1085f) ||
2069 2
                ($c >= 0x10900 && $c <= 0x1091b) ||
2070 2
                ($c >= 0x10920 && $c <= 0x10939) ||
2071 2
                $c === 0x1093f ||
2072 2
                $c === 0x10a00 ||
2073 2
                ($c >= 0x10a10 && $c <= 0x10a13) ||
2074 2
                ($c >= 0x10a15 && $c <= 0x10a17) ||
2075 2
                ($c >= 0x10a19 && $c <= 0x10a33) ||
2076 2
                ($c >= 0x10a40 && $c <= 0x10a47) ||
2077 2
                ($c >= 0x10a50 && $c <= 0x10a58) ||
2078 2
                ($c >= 0x10a60 && $c <= 0x10a7f) ||
2079 2
                ($c >= 0x10b00 && $c <= 0x10b35) ||
2080 2
                ($c >= 0x10b40 && $c <= 0x10b55) ||
2081 2
                ($c >= 0x10b58 && $c <= 0x10b72) ||
2082 2
                ($c >= 0x10b78 && $c <= 0x10b7f)
2083
            ) {
2084 2
                return 'RTL';
2085
            }
2086
        }
2087
2088 2
        return 'LTR';
2089
    }
2090
2091
    /**
2092
     * Check for php-support.
2093
     *
2094
     * @param string|null $key
2095
     *
2096
     * @return mixed
2097
     *               Return the full support-"array", if $key === null<br>
2098
     *               return bool-value, if $key is used and available<br>
2099
     *               otherwise return <strong>null</strong>
2100
     */
2101 27
    public static function getSupportInfo(string $key = null)
2102
    {
2103 27
        if ($key === null) {
2104 4
            return self::$SUPPORT;
2105
        }
2106
2107 25
        if (self::$INTL_TRANSLITERATOR_LIST === null) {
2108 1
            self::$INTL_TRANSLITERATOR_LIST = self::getData('transliterator_list');
2109
        }
2110
        // compatibility fix for old versions
2111 25
        self::$SUPPORT['intl__transliterator_list_ids'] = self::$INTL_TRANSLITERATOR_LIST;
2112
2113 25
        return self::$SUPPORT[$key] ?? null;
2114
    }
2115
2116
    /**
2117
     * Warning: this method only works for some file-types (png bmp gif jpg rar zip midi exe pdf)
2118
     *          if you need more supported types, please use e.g. "finfo"
2119
     *
2120
     * @param string $str
2121
     * @param array  $fallback with this keys: 'ext', 'mime', 'type'
2122
     *
2123
     * @return array
2124
     *               with this keys: 'ext', 'mime', 'type'
2125
     */
2126 39
    public static function get_file_type(
2127
        string $str,
2128
        array $fallback = [
2129
            'ext'  => null,
2130
            'mime' => 'application/octet-stream',
2131
            'type' => null,
2132
        ]
2133
    ): array {
2134 39
        if ($str === '') {
2135
            return $fallback;
2136
        }
2137
2138 39
        $str_info = \substr($str, 0, 2);
2139 39
        if ($str_info === false || \strlen($str_info) !== 2) {
2140 11
            return $fallback;
2141
        }
2142
2143 35
        $str_info = \unpack('C2chars', $str_info);
2144 35
        if ($str_info === false) {
2145
            return $fallback;
2146
        }
2147
        /** @noinspection OffsetOperationsInspection */
2148 35
        $type_code = (int) ($str_info['chars1'] . $str_info['chars2']);
2149
2150
        // DEBUG
2151
        //var_dump($type_code);
2152
2153
        switch ($type_code) {
2154 35
            case 3780:
2155 5
                $ext = 'pdf';
2156 5
                $mime = 'application/pdf';
2157 5
                $type = 'binary';
2158
2159 5
                break;
2160 35
            case 7790:
2161
                $ext = 'exe';
2162
                $mime = 'application/octet-stream';
2163
                $type = 'binary';
2164
2165
                break;
2166 35
            case 7784:
2167
                $ext = 'midi';
2168
                $mime = 'audio/x-midi';
2169
                $type = 'binary';
2170
2171
                break;
2172 35
            case 8075:
2173 7
                $ext = 'zip';
2174 7
                $mime = 'application/zip';
2175 7
                $type = 'binary';
2176
2177 7
                break;
2178 35
            case 8297:
2179
                $ext = 'rar';
2180
                $mime = 'application/rar';
2181
                $type = 'binary';
2182
2183
                break;
2184 35
            case 255216:
2185
                $ext = 'jpg';
2186
                $mime = 'image/jpeg';
2187
                $type = 'binary';
2188
2189
                break;
2190 35
            case 7173:
2191
                $ext = 'gif';
2192
                $mime = 'image/gif';
2193
                $type = 'binary';
2194
2195
                break;
2196 35
            case 7373:
2197
                $ext = 'tiff';
2198
                $mime = 'image/tiff';
2199
                $type = 'binary';
2200
2201
                break;
2202 35
            case 6677:
2203
                $ext = 'bmp';
2204
                $mime = 'image/bmp';
2205
                $type = 'binary';
2206
2207
                break;
2208 35
            case 13780:
2209 7
                $ext = 'png';
2210 7
                $mime = 'image/png';
2211 7
                $type = 'binary';
2212
2213 7
                break;
2214
            default:
2215 32
                return $fallback;
2216
        }
2217
2218
        return [
2219 7
            'ext'  => $ext,
2220 7
            'mime' => $mime,
2221 7
            'type' => $type,
2222
        ];
2223
    }
2224
2225
    /**
2226
     * @param int    $length        <p>Length of the random string.</p>
2227
     * @param string $possibleChars [optional] <p>Characters string for the random selection.</p>
2228
     * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
2229
     *
2230
     * @return string
2231
     */
2232 1
    public static function get_random_string(int $length, string $possibleChars = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789', string $encoding = 'UTF-8'): string
2233
    {
2234
        // init
2235 1
        $i = 0;
2236 1
        $str = '';
2237
2238
        //
2239
        // add random chars
2240
        //
2241
2242 1
        if ($encoding === 'UTF-8') {
2243 1
            $maxlength = (int) \mb_strlen($possibleChars);
2244 1
            if ($maxlength === 0) {
2245 1
                return '';
2246
            }
2247
2248 1
            while ($i < $length) {
2249
                try {
2250 1
                    $randInt = \random_int(0, $maxlength - 1);
2251
                } catch (\Exception $e) {
2252
                    /** @noinspection RandomApiMigrationInspection */
2253
                    $randInt = \mt_rand(0, $maxlength - 1);
2254
                }
2255 1
                $char = \mb_substr($possibleChars, $randInt, 1);
2256 1
                if ($char !== false) {
2257 1
                    $str .= $char;
2258 1
                    ++$i;
2259
                }
2260
            }
2261
        } else {
2262
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
2263
2264
            $maxlength = (int) self::strlen($possibleChars, $encoding);
2265
            if ($maxlength === 0) {
2266
                return '';
2267
            }
2268
2269
            while ($i < $length) {
2270
                try {
2271
                    $randInt = \random_int(0, $maxlength - 1);
2272
                } catch (\Exception $e) {
2273
                    /** @noinspection RandomApiMigrationInspection */
2274
                    $randInt = \mt_rand(0, $maxlength - 1);
2275
                }
2276
                $char = self::substr($possibleChars, $randInt, 1, $encoding);
2277
                if ($char !== false) {
2278
                    $str .= $char;
2279
                    ++$i;
2280
                }
2281
            }
2282
        }
2283
2284 1
        return $str;
2285
    }
2286
2287
    /**
2288
     * @param int|string $entropyExtra [optional] <p>Extra entropy via a string or int value.</p>
2289
     * @param bool       $md5          [optional] <p>Return the unique identifier as md5-hash? Default: true</p>
2290
     *
2291
     * @return string
2292
     */
2293 1
    public static function get_unique_string($entropyExtra = '', bool $md5 = true): string
2294
    {
2295 1
        $uniqueHelper = \random_int(0, \mt_getrandmax()) .
2296 1
                        \session_id() .
2297 1
                        ($_SERVER['REMOTE_ADDR'] ?? '') .
2298 1
                        ($_SERVER['SERVER_ADDR'] ?? '') .
2299 1
                        $entropyExtra;
2300
2301 1
        $uniqueString = \uniqid($uniqueHelper, true);
2302
2303 1
        if ($md5) {
2304 1
            $uniqueString = \md5($uniqueString . $uniqueHelper);
2305
        }
2306
2307 1
        return $uniqueString;
2308
    }
2309
2310
    /**
2311
     * alias for "UTF8::string_has_bom()"
2312
     *
2313
     * @param string $str
2314
     *
2315
     * @return bool
2316
     *
2317
     * @see UTF8::string_has_bom()
2318
     * @deprecated <p>use "UTF8::string_has_bom()"</p>
2319
     */
2320 2
    public static function hasBom(string $str): bool
2321
    {
2322 2
        return self::string_has_bom($str);
2323
    }
2324
2325
    /**
2326
     * Returns true if the string contains a lower case char, false otherwise.
2327
     *
2328
     * @param string $str <p>The input string.</p>
2329
     *
2330
     * @return bool whether or not the string contains a lower case character
2331
     */
2332 47
    public static function has_lowercase(string $str): bool
2333
    {
2334 47
        if (self::$SUPPORT['mbstring'] === true) {
2335
            /** @noinspection PhpComposerExtensionStubsInspection */
2336 47
            return \mb_ereg_match('.*[[:lower:]]', $str);
2337
        }
2338
2339
        return self::str_matches_pattern($str, '.*[[:lower:]]');
2340
    }
2341
2342
    /**
2343
     * Returns true if the string contains an upper case char, false otherwise.
2344
     *
2345
     * @param string $str <p>The input string.</p>
2346
     *
2347
     * @return bool whether or not the string contains an upper case character
2348
     */
2349 12
    public static function has_uppercase(string $str): bool
2350
    {
2351 12
        if (self::$SUPPORT['mbstring'] === true) {
2352
            /** @noinspection PhpComposerExtensionStubsInspection */
2353 12
            return \mb_ereg_match('.*[[:upper:]]', $str);
2354
        }
2355
2356
        return self::str_matches_pattern($str, '.*[[:upper:]]');
2357
    }
2358
2359
    /**
2360
     * Converts a hexadecimal-value into an UTF-8 character.
2361
     *
2362
     * @param string $hexdec <p>The hexadecimal value.</p>
2363
     *
2364
     * @return false|string one single UTF-8 character
2365
     */
2366 4
    public static function hex_to_chr(string $hexdec)
2367
    {
2368 4
        return self::decimal_to_chr(\hexdec($hexdec));
2369
    }
2370
2371
    /**
2372
     * Converts hexadecimal U+xxxx code point representation to integer.
2373
     *
2374
     * INFO: opposite to UTF8::int_to_hex()
2375
     *
2376
     * @param string $hexDec <p>The hexadecimal code point representation.</p>
2377
     *
2378
     * @return false|int the code point, or false on failure
2379
     */
2380 2
    public static function hex_to_int($hexDec)
2381
    {
2382
        // init
2383 2
        $hexDec = (string) $hexDec;
2384
2385 2
        if ($hexDec === '') {
2386 2
            return false;
2387
        }
2388
2389 2
        if (\preg_match('/^(?:\\\u|U\+|)([a-zA-Z0-9]{4,6})$/', $hexDec, $match)) {
2390 2
            return \intval($match[1], 16);
2391
        }
2392
2393 2
        return false;
2394
    }
2395
2396
    /**
2397
     * alias for "UTF8::html_entity_decode()"
2398
     *
2399
     * @param string $str
2400
     * @param int    $flags
2401
     * @param string $encoding
2402
     *
2403
     * @return string
2404
     *
2405
     * @see UTF8::html_entity_decode()
2406
     */
2407 4
    public static function html_decode(string $str, int $flags = null, string $encoding = 'UTF-8'): string
2408
    {
2409 4
        return self::html_entity_decode($str, $flags, $encoding);
2410
    }
2411
2412
    /**
2413
     * Converts a UTF-8 string to a series of HTML numbered entities.
2414
     *
2415
     * INFO: opposite to UTF8::html_decode()
2416
     *
2417
     * @param string $str            <p>The Unicode string to be encoded as numbered entities.</p>
2418
     * @param bool   $keepAsciiChars [optional] <p>Keep ASCII chars.</p>
2419
     * @param string $encoding       [optional] <p>Set the charset for e.g. "mb_" function</p>
2420
     *
2421
     * @return string HTML numbered entities
2422
     */
2423 14
    public static function html_encode(string $str, bool $keepAsciiChars = false, string $encoding = 'UTF-8'): string
2424
    {
2425 14
        if ($str === '') {
2426 4
            return '';
2427
        }
2428
2429 14
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
2430 4
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
2431
        }
2432
2433
        // INFO: http://stackoverflow.com/questions/35854535/better-explanation-of-convmap-in-mb-encode-numericentity
2434 14
        if (self::$SUPPORT['mbstring'] === true) {
2435 14
            $startCode = 0x00;
2436 14
            if ($keepAsciiChars === true) {
2437 13
                $startCode = 0x80;
2438
            }
2439
2440 14
            if ($encoding === 'UTF-8') {
2441 14
                return \mb_encode_numericentity(
2442 14
                    $str,
2443 14
                    [$startCode, 0xfffff, 0, 0xfffff, 0]
2444
                );
2445
            }
2446
2447 4
            return \mb_encode_numericentity(
2448 4
                $str,
2449 4
                [$startCode, 0xfffff, 0, 0xfffff, 0],
2450 4
                $encoding
2451
            );
2452
        }
2453
2454
        //
2455
        // fallback via vanilla php
2456
        //
2457
2458
        return \implode(
2459
            '',
2460
            \array_map(
2461
                static function (string $chr) use ($keepAsciiChars, $encoding): string {
2462
                    return self::single_chr_html_encode($chr, $keepAsciiChars, $encoding);
2463
                },
2464
                self::str_split($str)
2465
            )
2466
        );
2467
    }
2468
2469
    /**
2470
     * UTF-8 version of html_entity_decode()
2471
     *
2472
     * The reason we are not using html_entity_decode() by itself is because
2473
     * while it is not technically correct to leave out the semicolon
2474
     * at the end of an entity most browsers will still interpret the entity
2475
     * correctly. html_entity_decode() does not convert entities without
2476
     * semicolons, so we are left with our own little solution here. Bummer.
2477
     *
2478
     * Convert all HTML entities to their applicable characters
2479
     *
2480
     * INFO: opposite to UTF8::html_encode()
2481
     *
2482
     * @see http://php.net/manual/en/function.html-entity-decode.php
2483
     *
2484
     * @param string $str      <p>
2485
     *                         The input string.
2486
     *                         </p>
2487
     * @param int    $flags    [optional] <p>
2488
     *                         A bitmask of one or more of the following flags, which specify how to handle quotes
2489
     *                         and which document type to use. The default is ENT_COMPAT | ENT_HTML401.
2490
     *                         <table>
2491
     *                         Available <i>flags</i> constants
2492
     *                         <tr valign="top">
2493
     *                         <td>Constant Name</td>
2494
     *                         <td>Description</td>
2495
     *                         </tr>
2496
     *                         <tr valign="top">
2497
     *                         <td><b>ENT_COMPAT</b></td>
2498
     *                         <td>Will convert double-quotes and leave single-quotes alone.</td>
2499
     *                         </tr>
2500
     *                         <tr valign="top">
2501
     *                         <td><b>ENT_QUOTES</b></td>
2502
     *                         <td>Will convert both double and single quotes.</td>
2503
     *                         </tr>
2504
     *                         <tr valign="top">
2505
     *                         <td><b>ENT_NOQUOTES</b></td>
2506
     *                         <td>Will leave both double and single quotes unconverted.</td>
2507
     *                         </tr>
2508
     *                         <tr valign="top">
2509
     *                         <td><b>ENT_HTML401</b></td>
2510
     *                         <td>
2511
     *                         Handle code as HTML 4.01.
2512
     *                         </td>
2513
     *                         </tr>
2514
     *                         <tr valign="top">
2515
     *                         <td><b>ENT_XML1</b></td>
2516
     *                         <td>
2517
     *                         Handle code as XML 1.
2518
     *                         </td>
2519
     *                         </tr>
2520
     *                         <tr valign="top">
2521
     *                         <td><b>ENT_XHTML</b></td>
2522
     *                         <td>
2523
     *                         Handle code as XHTML.
2524
     *                         </td>
2525
     *                         </tr>
2526
     *                         <tr valign="top">
2527
     *                         <td><b>ENT_HTML5</b></td>
2528
     *                         <td>
2529
     *                         Handle code as HTML 5.
2530
     *                         </td>
2531
     *                         </tr>
2532
     *                         </table>
2533
     *                         </p>
2534
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
2535
     *
2536
     * @return string the decoded string
2537
     */
2538 46
    public static function html_entity_decode(string $str, int $flags = null, string $encoding = 'UTF-8'): string
2539
    {
2540
        if (
2541 46
            !isset($str[3]) // examples: &; || &x;
2542
            ||
2543 46
            \strpos($str, '&') === false // no "&"
2544
        ) {
2545 23
            return $str;
2546
        }
2547
2548 44
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
2549 9
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
2550
        }
2551
2552 44
        if ($flags === null) {
2553 10
            $flags = \ENT_QUOTES | \ENT_HTML5;
2554
        }
2555
2556
        if (
2557 44
            $encoding !== 'UTF-8'
2558
            &&
2559 44
            $encoding !== 'ISO-8859-1'
2560
            &&
2561 44
            $encoding !== 'WINDOWS-1252'
2562
            &&
2563 44
            self::$SUPPORT['mbstring'] === false
2564
        ) {
2565
            \trigger_error('UTF8::html_entity_decode() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
2566
        }
2567
2568
        do {
2569 44
            $str_compare = $str;
2570
2571
            // INFO: http://stackoverflow.com/questions/35854535/better-explanation-of-convmap-in-mb-encode-numericentity
2572 44
            if (self::$SUPPORT['mbstring'] === true) {
2573 44
                if ($encoding === 'UTF-8') {
2574 44
                    $str = \mb_decode_numericentity(
2575 44
                        $str,
2576 44
                        [0x80, 0xfffff, 0, 0xfffff, 0]
2577
                    );
2578
                } else {
2579 4
                    $str = \mb_decode_numericentity(
2580 4
                        $str,
2581 4
                        [0x80, 0xfffff, 0, 0xfffff, 0],
2582 44
                        $encoding
2583
                    );
2584
                }
2585
            } else {
2586
                $str = (string) \preg_replace_callback(
2587
                    "/&#\d{2,6};/",
2588
                    /**
2589
                     * @param string[] $matches
2590
                     *
2591
                     * @return string
2592
                     */
2593
                    static function (array $matches) use ($encoding): string {
2594
                        $returnTmp = \mb_convert_encoding($matches[0], $encoding, 'HTML-ENTITIES');
2595
                        if ($returnTmp !== '"' && $returnTmp !== "'") {
2596
                            return $returnTmp;
2597
                        }
2598
2599
                        return $matches[0];
2600
                    },
2601
                    $str
2602
                );
2603
            }
2604
2605 44
            if (\strpos($str, '&') !== false) {
2606 40
                if (\strpos($str, '&#') !== false) {
2607
                    // decode also numeric & UTF16 two byte entities
2608 32
                    $str = (string) \preg_replace(
2609 32
                        '/(&#(?:x0*[0-9a-fA-F]{2,6}(?![0-9a-fA-F;])|(?:0*\d{2,6}(?![0-9;]))))/S',
2610 32
                        '$1;',
2611 32
                        $str
2612
                    );
2613
                }
2614
2615 40
                $str = \html_entity_decode(
2616 40
                    $str,
2617 40
                    $flags,
2618 40
                    $encoding
2619
                );
2620
            }
2621 44
        } while ($str_compare !== $str);
2622
2623 44
        return $str;
2624
    }
2625
2626
    /**
2627
     * Create a escape html version of the string via "UTF8::htmlspecialchars()".
2628
     *
2629
     * @param string $str
2630
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
2631
     *
2632
     * @return string
2633
     */
2634 6
    public static function html_escape(string $str, string $encoding = 'UTF-8'): string
2635
    {
2636 6
        return self::htmlspecialchars(
2637 6
            $str,
2638 6
            \ENT_QUOTES | \ENT_SUBSTITUTE,
2639 6
            $encoding
2640
        );
2641
    }
2642
2643
    /**
2644
     * Remove empty html-tag.
2645
     *
2646
     * e.g.: <tag></tag>
2647
     *
2648
     * @param string $str
2649
     *
2650
     * @return string
2651
     */
2652 1
    public static function html_stripe_empty_tags(string $str): string
2653
    {
2654 1
        return (string) \preg_replace(
2655 1
            '/<[^\\/>]*?>\\s*?<\\/[^>]*?>/u',
2656 1
            '',
2657 1
            $str
2658
        );
2659
    }
2660
2661
    /**
2662
     * Convert all applicable characters to HTML entities: UTF-8 version of htmlentities()
2663
     *
2664
     * @see http://php.net/manual/en/function.htmlentities.php
2665
     *
2666
     * @param string $str           <p>
2667
     *                              The input string.
2668
     *                              </p>
2669
     * @param int    $flags         [optional] <p>
2670
     *                              A bitmask of one or more of the following flags, which specify how to handle
2671
     *                              quotes, invalid code unit sequences and the used document type. The default is
2672
     *                              ENT_COMPAT | ENT_HTML401.
2673
     *                              <table>
2674
     *                              Available <i>flags</i> constants
2675
     *                              <tr valign="top">
2676
     *                              <td>Constant Name</td>
2677
     *                              <td>Description</td>
2678
     *                              </tr>
2679
     *                              <tr valign="top">
2680
     *                              <td><b>ENT_COMPAT</b></td>
2681
     *                              <td>Will convert double-quotes and leave single-quotes alone.</td>
2682
     *                              </tr>
2683
     *                              <tr valign="top">
2684
     *                              <td><b>ENT_QUOTES</b></td>
2685
     *                              <td>Will convert both double and single quotes.</td>
2686
     *                              </tr>
2687
     *                              <tr valign="top">
2688
     *                              <td><b>ENT_NOQUOTES</b></td>
2689
     *                              <td>Will leave both double and single quotes unconverted.</td>
2690
     *                              </tr>
2691
     *                              <tr valign="top">
2692
     *                              <td><b>ENT_IGNORE</b></td>
2693
     *                              <td>
2694
     *                              Silently discard invalid code unit sequences instead of returning
2695
     *                              an empty string. Using this flag is discouraged as it
2696
     *                              may have security implications.
2697
     *                              </td>
2698
     *                              </tr>
2699
     *                              <tr valign="top">
2700
     *                              <td><b>ENT_SUBSTITUTE</b></td>
2701
     *                              <td>
2702
     *                              Replace invalid code unit sequences with a Unicode Replacement Character
2703
     *                              U+FFFD (UTF-8) or &#38;#38;#FFFD; (otherwise) instead of returning an empty
2704
     *                              string.
2705
     *                              </td>
2706
     *                              </tr>
2707
     *                              <tr valign="top">
2708
     *                              <td><b>ENT_DISALLOWED</b></td>
2709
     *                              <td>
2710
     *                              Replace invalid code points for the given document type with a
2711
     *                              Unicode Replacement Character U+FFFD (UTF-8) or &#38;#38;#FFFD;
2712
     *                              (otherwise) instead of leaving them as is. This may be useful, for
2713
     *                              instance, to ensure the well-formedness of XML documents with
2714
     *                              embedded external content.
2715
     *                              </td>
2716
     *                              </tr>
2717
     *                              <tr valign="top">
2718
     *                              <td><b>ENT_HTML401</b></td>
2719
     *                              <td>
2720
     *                              Handle code as HTML 4.01.
2721
     *                              </td>
2722
     *                              </tr>
2723
     *                              <tr valign="top">
2724
     *                              <td><b>ENT_XML1</b></td>
2725
     *                              <td>
2726
     *                              Handle code as XML 1.
2727
     *                              </td>
2728
     *                              </tr>
2729
     *                              <tr valign="top">
2730
     *                              <td><b>ENT_XHTML</b></td>
2731
     *                              <td>
2732
     *                              Handle code as XHTML.
2733
     *                              </td>
2734
     *                              </tr>
2735
     *                              <tr valign="top">
2736
     *                              <td><b>ENT_HTML5</b></td>
2737
     *                              <td>
2738
     *                              Handle code as HTML 5.
2739
     *                              </td>
2740
     *                              </tr>
2741
     *                              </table>
2742
     *                              </p>
2743
     * @param string $encoding      [optional] <p>
2744
     *                              Like <b>htmlspecialchars</b>,
2745
     *                              <b>htmlentities</b> takes an optional third argument
2746
     *                              <i>encoding</i> which defines encoding used in
2747
     *                              conversion.
2748
     *                              Although this argument is technically optional, you are highly
2749
     *                              encouraged to specify the correct value for your code.
2750
     *                              </p>
2751
     * @param bool   $double_encode [optional] <p>
2752
     *                              When <i>double_encode</i> is turned off PHP will not
2753
     *                              encode existing html entities. The default is to convert everything.
2754
     *                              </p>
2755
     *
2756
     * @return string
2757
     *                <p>
2758
     *                The encoded string.
2759
     *                <br><br>
2760
     *                If the input <i>string</i> contains an invalid code unit
2761
     *                sequence within the given <i>encoding</i> an empty string
2762
     *                will be returned, unless either the <b>ENT_IGNORE</b> or
2763
     *                <b>ENT_SUBSTITUTE</b> flags are set.
2764
     *                </p>
2765
     */
2766 9
    public static function htmlentities(
2767
        string $str,
2768
        int $flags = \ENT_COMPAT,
2769
        string $encoding = 'UTF-8',
2770
        bool $double_encode = true
2771
    ): string {
2772 9
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
2773 7
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
2774
        }
2775
2776 9
        $str = \htmlentities($str, $flags, $encoding, $double_encode);
2777
2778
        /**
2779
         * PHP doesn't replace a backslash to its html entity since this is something
2780
         * that's mostly used to escape characters when inserting in a database. Since
2781
         * we're using a decent database layer, we don't need this shit and we're replacing
2782
         * the double backslashes by its' html entity equivalent.
2783
         *
2784
         * https://github.com/forkcms/library/blob/master/spoon/filter/filter.php#L303
2785
         */
2786 9
        $str = \str_replace('\\', '&#92;', $str);
2787
2788 9
        return self::html_encode($str, true, $encoding);
2789
    }
2790
2791
    /**
2792
     * Convert only special characters to HTML entities: UTF-8 version of htmlspecialchars()
2793
     *
2794
     * INFO: Take a look at "UTF8::htmlentities()"
2795
     *
2796
     * @see http://php.net/manual/en/function.htmlspecialchars.php
2797
     *
2798
     * @param string $str           <p>
2799
     *                              The string being converted.
2800
     *                              </p>
2801
     * @param int    $flags         [optional] <p>
2802
     *                              A bitmask of one or more of the following flags, which specify how to handle
2803
     *                              quotes, invalid code unit sequences and the used document type. The default is
2804
     *                              ENT_COMPAT | ENT_HTML401.
2805
     *                              <table>
2806
     *                              Available <i>flags</i> constants
2807
     *                              <tr valign="top">
2808
     *                              <td>Constant Name</td>
2809
     *                              <td>Description</td>
2810
     *                              </tr>
2811
     *                              <tr valign="top">
2812
     *                              <td><b>ENT_COMPAT</b></td>
2813
     *                              <td>Will convert double-quotes and leave single-quotes alone.</td>
2814
     *                              </tr>
2815
     *                              <tr valign="top">
2816
     *                              <td><b>ENT_QUOTES</b></td>
2817
     *                              <td>Will convert both double and single quotes.</td>
2818
     *                              </tr>
2819
     *                              <tr valign="top">
2820
     *                              <td><b>ENT_NOQUOTES</b></td>
2821
     *                              <td>Will leave both double and single quotes unconverted.</td>
2822
     *                              </tr>
2823
     *                              <tr valign="top">
2824
     *                              <td><b>ENT_IGNORE</b></td>
2825
     *                              <td>
2826
     *                              Silently discard invalid code unit sequences instead of returning
2827
     *                              an empty string. Using this flag is discouraged as it
2828
     *                              may have security implications.
2829
     *                              </td>
2830
     *                              </tr>
2831
     *                              <tr valign="top">
2832
     *                              <td><b>ENT_SUBSTITUTE</b></td>
2833
     *                              <td>
2834
     *                              Replace invalid code unit sequences with a Unicode Replacement Character
2835
     *                              U+FFFD (UTF-8) or &#38;#38;#FFFD; (otherwise) instead of returning an empty
2836
     *                              string.
2837
     *                              </td>
2838
     *                              </tr>
2839
     *                              <tr valign="top">
2840
     *                              <td><b>ENT_DISALLOWED</b></td>
2841
     *                              <td>
2842
     *                              Replace invalid code points for the given document type with a
2843
     *                              Unicode Replacement Character U+FFFD (UTF-8) or &#38;#38;#FFFD;
2844
     *                              (otherwise) instead of leaving them as is. This may be useful, for
2845
     *                              instance, to ensure the well-formedness of XML documents with
2846
     *                              embedded external content.
2847
     *                              </td>
2848
     *                              </tr>
2849
     *                              <tr valign="top">
2850
     *                              <td><b>ENT_HTML401</b></td>
2851
     *                              <td>
2852
     *                              Handle code as HTML 4.01.
2853
     *                              </td>
2854
     *                              </tr>
2855
     *                              <tr valign="top">
2856
     *                              <td><b>ENT_XML1</b></td>
2857
     *                              <td>
2858
     *                              Handle code as XML 1.
2859
     *                              </td>
2860
     *                              </tr>
2861
     *                              <tr valign="top">
2862
     *                              <td><b>ENT_XHTML</b></td>
2863
     *                              <td>
2864
     *                              Handle code as XHTML.
2865
     *                              </td>
2866
     *                              </tr>
2867
     *                              <tr valign="top">
2868
     *                              <td><b>ENT_HTML5</b></td>
2869
     *                              <td>
2870
     *                              Handle code as HTML 5.
2871
     *                              </td>
2872
     *                              </tr>
2873
     *                              </table>
2874
     *                              </p>
2875
     * @param string $encoding      [optional] <p>
2876
     *                              Defines encoding used in conversion.
2877
     *                              </p>
2878
     *                              <p>
2879
     *                              For the purposes of this function, the encodings
2880
     *                              ISO-8859-1, ISO-8859-15,
2881
     *                              UTF-8, cp866,
2882
     *                              cp1251, cp1252, and
2883
     *                              KOI8-R are effectively equivalent, provided the
2884
     *                              <i>string</i> itself is valid for the encoding, as
2885
     *                              the characters affected by <b>htmlspecialchars</b> occupy
2886
     *                              the same positions in all of these encodings.
2887
     *                              </p>
2888
     * @param bool   $double_encode [optional] <p>
2889
     *                              When <i>double_encode</i> is turned off PHP will not
2890
     *                              encode existing html entities, the default is to convert everything.
2891
     *                              </p>
2892
     *
2893
     * @return string the converted string.
2894
     *                </p>
2895
     *                <p>
2896
     *                If the input <i>string</i> contains an invalid code unit
2897
     *                sequence within the given <i>encoding</i> an empty string
2898
     *                will be returned, unless either the <b>ENT_IGNORE</b> or
2899
     *                <b>ENT_SUBSTITUTE</b> flags are set
2900
     */
2901 8
    public static function htmlspecialchars(
2902
        string $str,
2903
        int $flags = \ENT_COMPAT,
2904
        string $encoding = 'UTF-8',
2905
        bool $double_encode = true
2906
    ): string {
2907 8
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
2908 8
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
2909
        }
2910
2911 8
        return \htmlspecialchars($str, $flags, $encoding, $double_encode);
2912
    }
2913
2914
    /**
2915
     * Checks whether iconv is available on the server.
2916
     *
2917
     * @return bool
2918
     *              <strong>true</strong> if available, <strong>false</strong> otherwise
2919
     */
2920
    public static function iconv_loaded(): bool
2921
    {
2922
        return \extension_loaded('iconv');
2923
    }
2924
2925
    /**
2926
     * alias for "UTF8::decimal_to_chr()"
2927
     *
2928
     * @param mixed $int
2929
     *
2930
     * @return string
2931
     *
2932
     * @see UTF8::decimal_to_chr()
2933
     */
2934 4
    public static function int_to_chr($int): string
2935
    {
2936 4
        return self::decimal_to_chr($int);
2937
    }
2938
2939
    /**
2940
     * Converts Integer to hexadecimal U+xxxx code point representation.
2941
     *
2942
     * INFO: opposite to UTF8::hex_to_int()
2943
     *
2944
     * @param int    $int  <p>The integer to be converted to hexadecimal code point.</p>
2945
     * @param string $pfix [optional]
2946
     *
2947
     * @return string the code point, or empty string on failure
2948
     */
2949 6
    public static function int_to_hex(int $int, string $pfix = 'U+'): string
2950
    {
2951 6
        $hex = \dechex($int);
2952
2953 6
        $hex = (\strlen($hex) < 4 ? \substr('0000' . $hex, -4) : $hex);
2954
2955 6
        return $pfix . $hex . '';
2956
    }
2957
2958
    /**
2959
     * Checks whether intl-char is available on the server.
2960
     *
2961
     * @return bool
2962
     *              <strong>true</strong> if available, <strong>false</strong> otherwise
2963
     */
2964
    public static function intlChar_loaded(): bool
2965
    {
2966
        return \class_exists('IntlChar');
2967
    }
2968
2969
    /**
2970
     * Checks whether intl is available on the server.
2971
     *
2972
     * @return bool
2973
     *              <strong>true</strong> if available, <strong>false</strong> otherwise
2974
     */
2975 5
    public static function intl_loaded(): bool
2976
    {
2977 5
        return \extension_loaded('intl');
2978
    }
2979
2980
    /**
2981
     * alias for "UTF8::is_ascii()"
2982
     *
2983
     * @param string $str
2984
     *
2985
     * @return bool
2986
     *
2987
     * @see UTF8::is_ascii()
2988
     * @deprecated <p>use "UTF8::is_ascii()"</p>
2989
     */
2990 2
    public static function isAscii(string $str): bool
2991
    {
2992 2
        return self::is_ascii($str);
2993
    }
2994
2995
    /**
2996
     * alias for "UTF8::is_base64()"
2997
     *
2998
     * @param string $str
2999
     *
3000
     * @return bool
3001
     *
3002
     * @see UTF8::is_base64()
3003
     * @deprecated <p>use "UTF8::is_base64()"</p>
3004
     */
3005 2
    public static function isBase64($str): bool
3006
    {
3007 2
        return self::is_base64($str);
3008
    }
3009
3010
    /**
3011
     * alias for "UTF8::is_binary()"
3012
     *
3013
     * @param mixed $str
3014
     * @param bool  $strict
3015
     *
3016
     * @return bool
3017
     *
3018
     * @see UTF8::is_binary()
3019
     * @deprecated <p>use "UTF8::is_binary()"</p>
3020
     */
3021 4
    public static function isBinary($str, $strict = false): bool
3022
    {
3023 4
        return self::is_binary($str, $strict);
3024
    }
3025
3026
    /**
3027
     * alias for "UTF8::is_bom()"
3028
     *
3029
     * @param string $utf8_chr
3030
     *
3031
     * @return bool
3032
     *
3033
     * @see UTF8::is_bom()
3034
     * @deprecated <p>use "UTF8::is_bom()"</p>
3035
     */
3036 2
    public static function isBom(string $utf8_chr): bool
3037
    {
3038 2
        return self::is_bom($utf8_chr);
3039
    }
3040
3041
    /**
3042
     * alias for "UTF8::is_html()"
3043
     *
3044
     * @param string $str
3045
     *
3046
     * @return bool
3047
     *
3048
     * @see UTF8::is_html()
3049
     * @deprecated <p>use "UTF8::is_html()"</p>
3050
     */
3051 2
    public static function isHtml(string $str): bool
3052
    {
3053 2
        return self::is_html($str);
3054
    }
3055
3056
    /**
3057
     * alias for "UTF8::is_json()"
3058
     *
3059
     * @param string $str
3060
     *
3061
     * @return bool
3062
     *
3063
     * @see UTF8::is_json()
3064
     * @deprecated <p>use "UTF8::is_json()"</p>
3065
     */
3066
    public static function isJson(string $str): bool
3067
    {
3068
        return self::is_json($str);
3069
    }
3070
3071
    /**
3072
     * alias for "UTF8::is_utf16()"
3073
     *
3074
     * @param mixed $str
3075
     *
3076
     * @return false|int
3077
     *                   <strong>false</strong> if is't not UTF16,<br>
3078
     *                   <strong>1</strong> for UTF-16LE,<br>
3079
     *                   <strong>2</strong> for UTF-16BE
3080
     *
3081
     * @see UTF8::is_utf16()
3082
     * @deprecated <p>use "UTF8::is_utf16()"</p>
3083
     */
3084 2
    public static function isUtf16($str)
3085
    {
3086 2
        return self::is_utf16($str);
3087
    }
3088
3089
    /**
3090
     * alias for "UTF8::is_utf32()"
3091
     *
3092
     * @param mixed $str
3093
     *
3094
     * @return false|int
3095
     *                   <strong>false</strong> if is't not UTF16,
3096
     *                   <strong>1</strong> for UTF-32LE,
3097
     *                   <strong>2</strong> for UTF-32BE
3098
     *
3099
     * @see UTF8::is_utf32()
3100
     * @deprecated <p>use "UTF8::is_utf32()"</p>
3101
     */
3102 2
    public static function isUtf32($str)
3103
    {
3104 2
        return self::is_utf32($str);
3105
    }
3106
3107
    /**
3108
     * alias for "UTF8::is_utf8()"
3109
     *
3110
     * @param string $str
3111
     * @param bool   $strict
3112
     *
3113
     * @return bool
3114
     *
3115
     * @see UTF8::is_utf8()
3116
     * @deprecated <p>use "UTF8::is_utf8()"</p>
3117
     */
3118 17
    public static function isUtf8($str, $strict = false): bool
3119
    {
3120 17
        return self::is_utf8($str, $strict);
3121
    }
3122
3123
    /**
3124
     * Returns true if the string contains only alphabetic chars, false otherwise.
3125
     *
3126
     * @param string $str
3127
     *
3128
     * @return bool
3129
     *              Whether or not $str contains only alphabetic chars
3130
     */
3131 10
    public static function is_alpha(string $str): bool
3132
    {
3133 10
        if (self::$SUPPORT['mbstring'] === true) {
3134
            /** @noinspection PhpComposerExtensionStubsInspection */
3135 10
            return \mb_ereg_match('^[[:alpha:]]*$', $str);
3136
        }
3137
3138
        return self::str_matches_pattern($str, '^[[:alpha:]]*$');
3139
    }
3140
3141
    /**
3142
     * Returns true if the string contains only alphabetic and numeric chars, false otherwise.
3143
     *
3144
     * @param string $str
3145
     *
3146
     * @return bool
3147
     *              Whether or not $str contains only alphanumeric chars
3148
     */
3149 13
    public static function is_alphanumeric(string $str): bool
3150
    {
3151 13
        if (self::$SUPPORT['mbstring'] === true) {
3152
            /** @noinspection PhpComposerExtensionStubsInspection */
3153 13
            return \mb_ereg_match('^[[:alnum:]]*$', $str);
3154
        }
3155
3156
        return self::str_matches_pattern($str, '^[[:alnum:]]*$');
3157
    }
3158
3159
    /**
3160
     * Checks if a string is 7 bit ASCII.
3161
     *
3162
     * @param string $str <p>The string to check.</p>
3163
     *
3164
     * @return bool
3165
     *              <strong>true</strong> if it is ASCII<br>
3166
     *              <strong>false</strong> otherwise
3167
     */
3168 137
    public static function is_ascii(string $str): bool
3169
    {
3170 137
        if ($str === '') {
3171 10
            return true;
3172
        }
3173
3174 136
        return !\preg_match('/[^\x09\x10\x13\x0A\x0D\x20-\x7E]/', $str);
3175
    }
3176
3177
    /**
3178
     * Returns true if the string is base64 encoded, false otherwise.
3179
     *
3180
     * @param mixed|string $str                <p>The input string.</p>
3181
     * @param bool         $emptyStringIsValid [optional] <p>Is an empty string valid base64 or not?</p>
3182
     *
3183
     * @return bool whether or not $str is base64 encoded
3184
     */
3185 16
    public static function is_base64($str, $emptyStringIsValid = false): bool
3186
    {
3187 16
        if ($emptyStringIsValid === false && $str === '') {
3188 3
            return false;
3189
        }
3190
3191
        /**
3192
         * @psalm-suppress RedundantConditionGivenDocblockType
3193
         */
3194 15
        if (\is_string($str) === false) {
3195 2
            return false;
3196
        }
3197
3198 15
        $base64String = \base64_decode($str, true);
3199
3200 15
        return $base64String !== false && \base64_encode($base64String) === $str;
3201
    }
3202
3203
    /**
3204
     * Check if the input is binary... (is look like a hack).
3205
     *
3206
     * @param mixed $input
3207
     * @param bool  $strict
3208
     *
3209
     * @return bool
3210
     */
3211 39
    public static function is_binary($input, bool $strict = false): bool
3212
    {
3213 39
        $input = (string) $input;
3214 39
        if ($input === '') {
3215 10
            return false;
3216
        }
3217
3218 39
        if (\preg_match('~^[01]+$~', $input)) {
3219 13
            return true;
3220
        }
3221
3222 39
        $ext = self::get_file_type($input);
3223 39
        if ($ext['type'] === 'binary') {
3224 7
            return true;
3225
        }
3226
3227 36
        $testLength = \strlen($input);
3228 36
        $testNull = \substr_count($input, "\x0", 0, $testLength);
3229 36
        if (($testNull / $testLength) > 0.25) {
3230 12
            return true;
3231
        }
3232
3233 34
        if ($strict === true) {
3234 34
            if (self::$SUPPORT['finfo'] === false) {
3235
                throw new \RuntimeException('ext-fileinfo: is not installed');
3236
            }
3237
3238
            /** @noinspection PhpComposerExtensionStubsInspection */
3239 34
            $finfo_encoding = (new \finfo(\FILEINFO_MIME_ENCODING))->buffer($input);
3240 34
            if ($finfo_encoding && $finfo_encoding === 'binary') {
3241 15
                return true;
3242
            }
3243
        }
3244
3245 30
        return false;
3246
    }
3247
3248
    /**
3249
     * Check if the file is binary.
3250
     *
3251
     * @param string $file
3252
     *
3253
     * @return bool
3254
     */
3255 6
    public static function is_binary_file($file): bool
3256
    {
3257
        // init
3258 6
        $block = '';
3259
3260 6
        $fp = \fopen($file, 'rb');
3261 6
        if (\is_resource($fp)) {
3262 6
            $block = \fread($fp, 512);
3263 6
            \fclose($fp);
3264
        }
3265
3266 6
        if ($block === '') {
3267 2
            return false;
3268
        }
3269
3270 6
        return self::is_binary($block, true);
3271
    }
3272
3273
    /**
3274
     * Returns true if the string contains only whitespace chars, false otherwise.
3275
     *
3276
     * @param string $str
3277
     *
3278
     * @return bool
3279
     *              Whether or not $str contains only whitespace characters
3280
     */
3281 15
    public static function is_blank(string $str): bool
3282
    {
3283 15
        if (self::$SUPPORT['mbstring'] === true) {
3284
            /** @noinspection PhpComposerExtensionStubsInspection */
3285 15
            return \mb_ereg_match('^[[:space:]]*$', $str);
3286
        }
3287
3288
        return self::str_matches_pattern($str, '^[[:space:]]*$');
3289
    }
3290
3291
    /**
3292
     * Checks if the given string is equal to any "Byte Order Mark".
3293
     *
3294
     * WARNING: Use "UTF8::string_has_bom()" if you will check BOM in a string.
3295
     *
3296
     * @param string $str <p>The input string.</p>
3297
     *
3298
     * @return bool
3299
     *              <strong>true</strong> if the $utf8_chr is Byte Order Mark, <strong>false</strong> otherwise
3300
     */
3301 2
    public static function is_bom($str): bool
3302
    {
3303
        /** @noinspection PhpUnusedLocalVariableInspection */
3304 2
        foreach (self::$BOM as $bomString => &$bomByteLength) {
3305 2
            if ($str === $bomString) {
3306 2
                return true;
3307
            }
3308
        }
3309
3310 2
        return false;
3311
    }
3312
3313
    /**
3314
     * Determine whether the string is considered to be empty.
3315
     *
3316
     * A variable is considered empty if it does not exist or if its value equals FALSE.
3317
     * empty() does not generate a warning if the variable does not exist.
3318
     *
3319
     * @param mixed $str
3320
     *
3321
     * @return bool whether or not $str is empty()
3322
     */
3323
    public static function is_empty($str): bool
3324
    {
3325
        return empty($str);
3326
    }
3327
3328
    /**
3329
     * Returns true if the string contains only hexadecimal chars, false otherwise.
3330
     *
3331
     * @param string $str
3332
     *
3333
     * @return bool
3334
     *              Whether or not $str contains only hexadecimal chars
3335
     */
3336 13
    public static function is_hexadecimal(string $str): bool
3337
    {
3338 13
        if (self::$SUPPORT['mbstring'] === true) {
3339
            /** @noinspection PhpComposerExtensionStubsInspection */
3340 13
            return \mb_ereg_match('^[[:xdigit:]]*$', $str);
3341
        }
3342
3343
        return self::str_matches_pattern($str, '^[[:xdigit:]]*$');
3344
    }
3345
3346
    /**
3347
     * Check if the string contains any html-tags <lall>.
3348
     *
3349
     * @param string $str <p>The input string.</p>
3350
     *
3351
     * @return bool
3352
     */
3353 3
    public static function is_html(string $str): bool
3354
    {
3355 3
        if ($str === '') {
3356 3
            return false;
3357
        }
3358
3359
        // init
3360 3
        $matches = [];
3361
3362 3
        $str = self::emoji_encode($str); // hack for emoji support :/
3363
3364 3
        \preg_match("/<\\/?\\w+(?:(?:\\s+\\w+(?:\\s*=\\s*(?:\".*?\"|'.*?'|[^'\">\\s]+))?)*\\s*|\\s*)\\/?>/u", $str, $matches);
3365
3366 3
        return \count($matches) !== 0;
3367
    }
3368
3369
    /**
3370
     * Try to check if "$str" is an json-string.
3371
     *
3372
     * @param string $str                              <p>The input string.</p>
3373
     * @param bool   $onlyArrayOrObjectResultsAreValid [optional] <p>Only array and objects are valid json results.</p>
3374
     *
3375
     * @return bool
3376
     */
3377 42
    public static function is_json(string $str, $onlyArrayOrObjectResultsAreValid = true): bool
3378
    {
3379 42
        if ($str === '') {
3380 4
            return false;
3381
        }
3382
3383 40
        if (self::$SUPPORT['json'] === false) {
3384
            throw new \RuntimeException('ext-json: is not installed');
3385
        }
3386
3387 40
        $json = self::json_decode($str);
3388 40
        if ($json === null && \strtoupper($str) !== 'NULL') {
3389 18
            return false;
3390
        }
3391
3392
        if (
3393 24
            $onlyArrayOrObjectResultsAreValid === true
3394
            &&
3395 24
            \is_object($json) === false
3396
            &&
3397 24
            \is_array($json) === false
3398
        ) {
3399 5
            return false;
3400
        }
3401
3402
        /** @noinspection PhpComposerExtensionStubsInspection */
3403 19
        return \json_last_error() === \JSON_ERROR_NONE;
3404
    }
3405
3406
    /**
3407
     * @param string $str
3408
     *
3409
     * @return bool
3410
     */
3411 8
    public static function is_lowercase(string $str): bool
3412
    {
3413 8
        if (self::$SUPPORT['mbstring'] === true) {
3414
            /** @noinspection PhpComposerExtensionStubsInspection */
3415 8
            return \mb_ereg_match('^[[:lower:]]*$', $str);
3416
        }
3417
3418
        return self::str_matches_pattern($str, '^[[:lower:]]*$');
3419
    }
3420
3421
    /**
3422
     * Returns true if the string is serialized, false otherwise.
3423
     *
3424
     * @param string $str
3425
     *
3426
     * @return bool whether or not $str is serialized
3427
     */
3428 7
    public static function is_serialized(string $str): bool
3429
    {
3430 7
        if ($str === '') {
3431 1
            return false;
3432
        }
3433
3434
        /** @noinspection PhpUsageOfSilenceOperatorInspection */
3435
        /** @noinspection UnserializeExploitsInspection */
3436 6
        return $str === 'b:0;'
3437
               ||
3438 6
               @\unserialize($str) !== false;
3439
    }
3440
3441
    /**
3442
     * Returns true if the string contains only lower case chars, false
3443
     * otherwise.
3444
     *
3445
     * @param string $str <p>The input string.</p>
3446
     *
3447
     * @return bool
3448
     *              Whether or not $str contains only lower case characters
3449
     */
3450 8
    public static function is_uppercase(string $str): bool
3451
    {
3452 8
        if (self::$SUPPORT['mbstring'] === true) {
3453
            /** @noinspection PhpComposerExtensionStubsInspection */
3454 8
            return \mb_ereg_match('^[[:upper:]]*$', $str);
3455
        }
3456
3457
        return self::str_matches_pattern($str, '^[[:upper:]]*$');
3458
    }
3459
3460
    /**
3461
     * Check if the string is UTF-16.
3462
     *
3463
     * @param mixed $str                   <p>The input string.</p>
3464
     * @param bool  $checkIfStringIsBinary
3465
     *
3466
     * @return false|int
3467
     *                   <strong>false</strong> if is't not UTF-16,<br>
3468
     *                   <strong>1</strong> for UTF-16LE,<br>
3469
     *                   <strong>2</strong> for UTF-16BE
3470
     */
3471 22
    public static function is_utf16($str, $checkIfStringIsBinary = true)
3472
    {
3473
        // init
3474 22
        $str = (string) $str;
3475 22
        $strChars = [];
3476
3477
        if (
3478 22
            $checkIfStringIsBinary === true
3479
            &&
3480 22
            self::is_binary($str, true) === false
3481
        ) {
3482 2
            return false;
3483
        }
3484
3485 22
        if (self::$SUPPORT['mbstring'] === false) {
3486 3
            \trigger_error('UTF8::is_utf16() without mbstring may did not work correctly', \E_USER_WARNING);
3487
        }
3488
3489 22
        $str = self::remove_bom($str);
3490
3491 22
        $maybeUTF16LE = 0;
3492 22
        $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-16LE');
3493 22
        if ($test) {
3494 15
            $test2 = \mb_convert_encoding($test, 'UTF-16LE', 'UTF-8');
3495 15
            $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-16LE');
3496 15
            if ($test3 === $test) {
3497 15
                if (\count($strChars) === 0) {
3498 15
                    $strChars = self::count_chars($str, true, false);
3499
                }
3500 15
                foreach (self::count_chars($test3) as $test3char => &$test3charEmpty) {
0 ignored issues
show
Bug introduced by
The expression self::count_chars($test3) cannot be used as a reference.

Let?s assume that you have the following foreach statement:

foreach ($array as &$itemValue) { }

$itemValue is assigned by reference. This is possible because the expression (in the example $array) can be used as a reference target.

However, if we were to replace $array with something different like the result of a function call as in

foreach (getArray() as &$itemValue) { }

then assigning by reference is not possible anymore as there is no target that could be modified.

Available Fixes

1. Do not assign by reference
foreach (getArray() as $itemValue) { }
2. Assign to a local variable first
$array = getArray();
foreach ($array as &$itemValue) {}
3. Return a reference
function &getArray() { $array = array(); return $array; }

foreach (getArray() as &$itemValue) { }
Loading history...
3501 15
                    if (\in_array($test3char, $strChars, true) === true) {
3502 15
                        ++$maybeUTF16LE;
3503
                    }
3504
                }
3505 15
                unset($test3charEmpty);
3506
            }
3507
        }
3508
3509 22
        $maybeUTF16BE = 0;
3510 22
        $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-16BE');
3511 22
        if ($test) {
3512 15
            $test2 = \mb_convert_encoding($test, 'UTF-16BE', 'UTF-8');
3513 15
            $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-16BE');
3514 15
            if ($test3 === $test) {
3515 15
                if (\count($strChars) === 0) {
3516 7
                    $strChars = self::count_chars($str, true, false);
3517
                }
3518 15
                foreach (self::count_chars($test3) as $test3char => &$test3charEmpty) {
0 ignored issues
show
Bug introduced by
The expression self::count_chars($test3) cannot be used as a reference.

Let?s assume that you have the following foreach statement:

foreach ($array as &$itemValue) { }

$itemValue is assigned by reference. This is possible because the expression (in the example $array) can be used as a reference target.

However, if we were to replace $array with something different like the result of a function call as in

foreach (getArray() as &$itemValue) { }

then assigning by reference is not possible anymore as there is no target that could be modified.

Available Fixes

1. Do not assign by reference
foreach (getArray() as $itemValue) { }
2. Assign to a local variable first
$array = getArray();
foreach ($array as &$itemValue) {}
3. Return a reference
function &getArray() { $array = array(); return $array; }

foreach (getArray() as &$itemValue) { }
Loading history...
3519 15
                    if (\in_array($test3char, $strChars, true) === true) {
3520 15
                        ++$maybeUTF16BE;
3521
                    }
3522
                }
3523 15
                unset($test3charEmpty);
3524
            }
3525
        }
3526
3527 22
        if ($maybeUTF16BE !== $maybeUTF16LE) {
3528 6
            if ($maybeUTF16LE > $maybeUTF16BE) {
3529 4
                return 1;
3530
            }
3531
3532 6
            return 2;
3533
        }
3534
3535 18
        return false;
3536
    }
3537
3538
    /**
3539
     * Check if the string is UTF-32.
3540
     *
3541
     * @param mixed $str                   <p>The input string.</p>
3542
     * @param bool  $checkIfStringIsBinary
3543
     *
3544
     * @return false|int
3545
     *                   <strong>false</strong> if is't not UTF-32,<br>
3546
     *                   <strong>1</strong> for UTF-32LE,<br>
3547
     *                   <strong>2</strong> for UTF-32BE
3548
     */
3549 18
    public static function is_utf32($str, $checkIfStringIsBinary = true)
3550
    {
3551
        // init
3552 18
        $str = (string) $str;
3553 18
        $strChars = [];
3554
3555
        if (
3556 18
            $checkIfStringIsBinary === true
3557
            &&
3558 18
            self::is_binary($str, true) === false
3559
        ) {
3560 2
            return false;
3561
        }
3562
3563 18
        if (self::$SUPPORT['mbstring'] === false) {
3564 3
            \trigger_error('UTF8::is_utf32() without mbstring may did not work correctly', \E_USER_WARNING);
3565
        }
3566
3567 18
        $str = self::remove_bom($str);
3568
3569 18
        $maybeUTF32LE = 0;
3570 18
        $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-32LE');
3571 18
        if ($test) {
3572 11
            $test2 = \mb_convert_encoding($test, 'UTF-32LE', 'UTF-8');
3573 11
            $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-32LE');
3574 11
            if ($test3 === $test) {
3575 11
                if (\count($strChars) === 0) {
3576 11
                    $strChars = self::count_chars($str, true, false);
3577
                }
3578 11
                foreach (self::count_chars($test3) as $test3char => &$test3charEmpty) {
0 ignored issues
show
Bug introduced by
The expression self::count_chars($test3) cannot be used as a reference.

Let?s assume that you have the following foreach statement:

foreach ($array as &$itemValue) { }

$itemValue is assigned by reference. This is possible because the expression (in the example $array) can be used as a reference target.

However, if we were to replace $array with something different like the result of a function call as in

foreach (getArray() as &$itemValue) { }

then assigning by reference is not possible anymore as there is no target that could be modified.

Available Fixes

1. Do not assign by reference
foreach (getArray() as $itemValue) { }
2. Assign to a local variable first
$array = getArray();
foreach ($array as &$itemValue) {}
3. Return a reference
function &getArray() { $array = array(); return $array; }

foreach (getArray() as &$itemValue) { }
Loading history...
3579 11
                    if (\in_array($test3char, $strChars, true) === true) {
3580 11
                        ++$maybeUTF32LE;
3581
                    }
3582
                }
3583 11
                unset($test3charEmpty);
3584
            }
3585
        }
3586
3587 18
        $maybeUTF32BE = 0;
3588 18
        $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-32BE');
3589 18
        if ($test) {
3590 11
            $test2 = \mb_convert_encoding($test, 'UTF-32BE', 'UTF-8');
3591 11
            $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-32BE');
3592 11
            if ($test3 === $test) {
3593 11
                if (\count($strChars) === 0) {
3594 7
                    $strChars = self::count_chars($str, true, false);
3595
                }
3596 11
                foreach (self::count_chars($test3) as $test3char => &$test3charEmpty) {
0 ignored issues
show
Bug introduced by
The expression self::count_chars($test3) cannot be used as a reference.

Let?s assume that you have the following foreach statement:

foreach ($array as &$itemValue) { }

$itemValue is assigned by reference. This is possible because the expression (in the example $array) can be used as a reference target.

However, if we were to replace $array with something different like the result of a function call as in

foreach (getArray() as &$itemValue) { }

then assigning by reference is not possible anymore as there is no target that could be modified.

Available Fixes

1. Do not assign by reference
foreach (getArray() as $itemValue) { }
2. Assign to a local variable first
$array = getArray();
foreach ($array as &$itemValue) {}
3. Return a reference
function &getArray() { $array = array(); return $array; }

foreach (getArray() as &$itemValue) { }
Loading history...
3597 11
                    if (\in_array($test3char, $strChars, true) === true) {
3598 11
                        ++$maybeUTF32BE;
3599
                    }
3600
                }
3601 11
                unset($test3charEmpty);
3602
            }
3603
        }
3604
3605 18
        if ($maybeUTF32BE !== $maybeUTF32LE) {
3606 2
            if ($maybeUTF32LE > $maybeUTF32BE) {
3607 2
                return 1;
3608
            }
3609
3610 2
            return 2;
3611
        }
3612
3613 18
        return false;
3614
    }
3615
3616
    /**
3617
     * Checks whether the passed string contains only byte sequences that appear valid UTF-8 characters.
3618
     *
3619
     * @see http://hsivonen.iki.fi/php-utf8/
3620
     *
3621
     * @param string|string[] $str    <p>The string to be checked.</p>
3622
     * @param bool            $strict <p>Check also if the string is not UTF-16 or UTF-32.</p>
3623
     *
3624
     * @return bool
3625
     */
3626 106
    public static function is_utf8($str, bool $strict = false): bool
3627
    {
3628 106
        if (\is_array($str) === true) {
3629 2
            foreach ($str as &$v) {
3630 2
                if (self::is_utf8($v, $strict) === false) {
3631 2
                    return false;
3632
                }
3633
            }
3634
3635
            return true;
3636
        }
3637
3638 106
        if ($str === '') {
3639 12
            return true;
3640
        }
3641
3642 102
        if ($strict === true) {
3643 2
            $isBinary = self::is_binary($str, true);
3644
3645 2
            if ($isBinary && self::is_utf16($str, false) !== false) {
3646 2
                return false;
3647
            }
3648
3649
            if ($isBinary && self::is_utf32($str, false) !== false) {
3650
                return false;
3651
            }
3652
        }
3653
3654 102
        if (self::pcre_utf8_support() !== true) {
3655
3656
            // If even just the first character can be matched, when the /u
3657
            // modifier is used, then it's valid UTF-8. If the UTF-8 is somehow
3658
            // invalid, nothing at all will match, even if the string contains
3659
            // some valid sequences
3660
            return \preg_match('/^.{1}/us', $str, $ar) === 1;
3661
        }
3662
3663 102
        $mState = 0; // cached expected number of octets after the current octet
3664
        // until the beginning of the next UTF8 character sequence
3665 102
        $mUcs4 = 0; // cached Unicode character
3666 102
        $mBytes = 1; // cached expected number of octets in the current sequence
3667
3668 102
        if (self::$ORD === null) {
3669
            self::$ORD = self::getData('ord');
3670
        }
3671
3672 102
        $len = \strlen((string) $str);
3673
        /** @noinspection ForeachInvariantsInspection */
3674 102
        for ($i = 0; $i < $len; ++$i) {
3675 102
            $in = self::$ORD[$str[$i]];
3676 102
            if ($mState === 0) {
3677
                // When mState is zero we expect either a US-ASCII character or a
3678
                // multi-octet sequence.
3679 102
                if ((0x80 & $in) === 0) {
3680
                    // US-ASCII, pass straight through.
3681 97
                    $mBytes = 1;
3682 83
                } elseif ((0xE0 & $in) === 0xC0) {
3683
                    // First octet of 2 octet sequence.
3684 73
                    $mUcs4 = $in;
3685 73
                    $mUcs4 = ($mUcs4 & 0x1F) << 6;
3686 73
                    $mState = 1;
3687 73
                    $mBytes = 2;
3688 58
                } elseif ((0xF0 & $in) === 0xE0) {
3689
                    // First octet of 3 octet sequence.
3690 42
                    $mUcs4 = $in;
3691 42
                    $mUcs4 = ($mUcs4 & 0x0F) << 12;
3692 42
                    $mState = 2;
3693 42
                    $mBytes = 3;
3694 29
                } elseif ((0xF8 & $in) === 0xF0) {
3695
                    // First octet of 4 octet sequence.
3696 18
                    $mUcs4 = $in;
3697 18
                    $mUcs4 = ($mUcs4 & 0x07) << 18;
3698 18
                    $mState = 3;
3699 18
                    $mBytes = 4;
3700 13
                } elseif ((0xFC & $in) === 0xF8) {
3701
                    /* First octet of 5 octet sequence.
3702
                     *
3703
                     * This is illegal because the encoded codepoint must be either
3704
                     * (a) not the shortest form or
3705
                     * (b) outside the Unicode range of 0-0x10FFFF.
3706
                     * Rather than trying to resynchronize, we will carry on until the end
3707
                     * of the sequence and let the later error handling code catch it.
3708
                     */
3709 5
                    $mUcs4 = $in;
3710 5
                    $mUcs4 = ($mUcs4 & 0x03) << 24;
3711 5
                    $mState = 4;
3712 5
                    $mBytes = 5;
3713 10
                } elseif ((0xFE & $in) === 0xFC) {
3714
                    // First octet of 6 octet sequence, see comments for 5 octet sequence.
3715 5
                    $mUcs4 = $in;
3716 5
                    $mUcs4 = ($mUcs4 & 1) << 30;
3717 5
                    $mState = 5;
3718 5
                    $mBytes = 6;
3719
                } else {
3720
                    // Current octet is neither in the US-ASCII range nor a legal first
3721
                    // octet of a multi-octet sequence.
3722 102
                    return false;
3723
                }
3724 83
            } elseif ((0xC0 & $in) === 0x80) {
3725
3726
                // When mState is non-zero, we expect a continuation of the multi-octet
3727
                // sequence
3728
3729
                // Legal continuation.
3730 75
                $shift = ($mState - 1) * 6;
3731 75
                $tmp = $in;
3732 75
                $tmp = ($tmp & 0x0000003F) << $shift;
3733 75
                $mUcs4 |= $tmp;
3734
                // Prefix: End of the multi-octet sequence. mUcs4 now contains the final
3735
                // Unicode code point to be output.
3736 75
                if (--$mState === 0) {
3737
                    // Check for illegal sequences and code points.
3738
                    //
3739
                    // From Unicode 3.1, non-shortest form is illegal
3740
                    if (
3741 75
                        ($mBytes === 2 && $mUcs4 < 0x0080)
3742
                        ||
3743 75
                        ($mBytes === 3 && $mUcs4 < 0x0800)
3744
                        ||
3745 75
                        ($mBytes === 4 && $mUcs4 < 0x10000)
3746
                        ||
3747 75
                        ($mBytes > 4)
3748
                        ||
3749
                        // From Unicode 3.2, surrogate characters are illegal.
3750 75
                        (($mUcs4 & 0xFFFFF800) === 0xD800)
3751
                        ||
3752
                        // Code points outside the Unicode range are illegal.
3753 75
                        ($mUcs4 > 0x10FFFF)
3754
                    ) {
3755 8
                        return false;
3756
                    }
3757
                    // initialize UTF8 cache
3758 75
                    $mState = 0;
3759 75
                    $mUcs4 = 0;
3760 75
                    $mBytes = 1;
3761
                }
3762
            } else {
3763
                // ((0xC0 & (*in) != 0x80) && (mState != 0))
3764
                // Incomplete multi-octet sequence.
3765 35
                return false;
3766
            }
3767
        }
3768
3769 67
        return true;
3770
    }
3771
3772
    /**
3773
     * (PHP 5 &gt;= 5.2.0, PECL json &gt;= 1.2.0)<br/>
3774
     * Decodes a JSON string
3775
     *
3776
     * @see http://php.net/manual/en/function.json-decode.php
3777
     *
3778
     * @param string $json    <p>
3779
     *                        The <i>json</i> string being decoded.
3780
     *                        </p>
3781
     *                        <p>
3782
     *                        This function only works with UTF-8 encoded strings.
3783
     *                        </p>
3784
     *                        <p>PHP implements a superset of
3785
     *                        JSON - it will also encode and decode scalar types and <b>NULL</b>. The JSON standard
3786
     *                        only supports these values when they are nested inside an array or an object.
3787
     *                        </p>
3788
     * @param bool   $assoc   [optional] <p>
3789
     *                        When <b>TRUE</b>, returned objects will be converted into
3790
     *                        associative arrays.
3791
     *                        </p>
3792
     * @param int    $depth   [optional] <p>
3793
     *                        User specified recursion depth.
3794
     *                        </p>
3795
     * @param int    $options [optional] <p>
3796
     *                        Bitmask of JSON decode options. Currently only
3797
     *                        <b>JSON_BIGINT_AS_STRING</b>
3798
     *                        is supported (default is to cast large integers as floats)
3799
     *                        </p>
3800
     *
3801
     * @return mixed
3802
     *               The value encoded in <i>json</i> in appropriate PHP type. Values true, false and
3803
     *               null (case-insensitive) are returned as <b>TRUE</b>, <b>FALSE</b> and <b>NULL</b> respectively.
3804
     *               <b>NULL</b> is returned if the <i>json</i> cannot be decoded or if the encoded data
3805
     *               is deeper than the recursion limit.
3806
     */
3807 43
    public static function json_decode(
3808
        string $json,
3809
        bool $assoc = false,
3810
        int $depth = 512,
3811
        int $options = 0
3812
    ) {
3813 43
        $json = self::filter($json);
3814
3815 43
        if (self::$SUPPORT['json'] === false) {
3816
            throw new \RuntimeException('ext-json: is not installed');
3817
        }
3818
3819
        /** @noinspection PhpComposerExtensionStubsInspection */
3820 43
        return \json_decode($json, $assoc, $depth, $options);
3821
    }
3822
3823
    /**
3824
     * (PHP 5 &gt;= 5.2.0, PECL json &gt;= 1.2.0)<br/>
3825
     * Returns the JSON representation of a value.
3826
     *
3827
     * @see http://php.net/manual/en/function.json-encode.php
3828
     *
3829
     * @param mixed $value   <p>
3830
     *                       The <i>value</i> being encoded. Can be any type except
3831
     *                       a resource.
3832
     *                       </p>
3833
     *                       <p>
3834
     *                       All string data must be UTF-8 encoded.
3835
     *                       </p>
3836
     *                       <p>PHP implements a superset of
3837
     *                       JSON - it will also encode and decode scalar types and <b>NULL</b>. The JSON standard
3838
     *                       only supports these values when they are nested inside an array or an object.
3839
     *                       </p>
3840
     * @param int   $options [optional] <p>
3841
     *                       Bitmask consisting of <b>JSON_HEX_QUOT</b>,
3842
     *                       <b>JSON_HEX_TAG</b>,
3843
     *                       <b>JSON_HEX_AMP</b>,
3844
     *                       <b>JSON_HEX_APOS</b>,
3845
     *                       <b>JSON_NUMERIC_CHECK</b>,
3846
     *                       <b>JSON_PRETTY_PRINT</b>,
3847
     *                       <b>JSON_UNESCAPED_SLASHES</b>,
3848
     *                       <b>JSON_FORCE_OBJECT</b>,
3849
     *                       <b>JSON_UNESCAPED_UNICODE</b>. The behaviour of these
3850
     *                       constants is described on
3851
     *                       the JSON constants page.
3852
     *                       </p>
3853
     * @param int   $depth   [optional] <p>
3854
     *                       Set the maximum depth. Must be greater than zero.
3855
     *                       </p>
3856
     *
3857
     * @return false|string
3858
     *                      A JSON encoded <strong>string</strong> on success or<br>
3859
     *                      <strong>FALSE</strong> on failure
3860
     */
3861 5
    public static function json_encode($value, int $options = 0, int $depth = 512)
3862
    {
3863 5
        $value = self::filter($value);
3864
3865 5
        if (self::$SUPPORT['json'] === false) {
3866
            throw new \RuntimeException('ext-json: is not installed');
3867
        }
3868
3869
        /** @noinspection PhpComposerExtensionStubsInspection */
3870 5
        return \json_encode($value, $options, $depth);
3871
    }
3872
3873
    /**
3874
     * Checks whether JSON is available on the server.
3875
     *
3876
     * @return bool
3877
     *              <strong>true</strong> if available, <strong>false</strong> otherwise
3878
     */
3879
    public static function json_loaded(): bool
3880
    {
3881
        return \function_exists('json_decode');
3882
    }
3883
3884
    /**
3885
     * Makes string's first char lowercase.
3886
     *
3887
     * @param string      $str                   <p>The input string</p>
3888
     * @param string      $encoding              [optional] <p>Set the charset for e.g. "mb_" function</p>
3889
     * @param bool        $cleanUtf8             [optional] <p>Remove non UTF-8 chars from the string.</p>
3890
     * @param string|null $lang                  [optional] <p>Set the language for special cases: az, el, lt, tr</p>
3891
     * @param bool        $tryToKeepStringLength [optional] <p>true === try to keep the string length: e.g. ẞ -> ß</p>
3892
     *
3893
     * @return string the resulting string
3894
     */
3895 46
    public static function lcfirst(
3896
        string $str,
3897
        string $encoding = 'UTF-8',
3898
        bool $cleanUtf8 = false,
3899
        string $lang = null,
3900
        bool $tryToKeepStringLength = false
3901
    ): string {
3902 46
        if ($cleanUtf8 === true) {
3903
            $str = self::clean($str);
3904
        }
3905
3906 46
        $useMbFunction = $lang === null && $tryToKeepStringLength === false;
3907
3908 46
        if ($encoding === 'UTF-8') {
3909 43
            $strPartTwo = (string) \mb_substr($str, 1);
3910
3911 43
            if ($useMbFunction === true) {
3912 43
                $strPartOne = \mb_strtolower(
3913 43
                    (string) \mb_substr($str, 0, 1)
3914
                );
3915
            } else {
3916
                $strPartOne = self::strtolower(
3917
                    (string) \mb_substr($str, 0, 1),
3918
                    $encoding,
3919
                    false,
3920
                    $lang,
3921 43
                    $tryToKeepStringLength
3922
                );
3923
            }
3924
        } else {
3925 3
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
3926
3927 3
            $strPartTwo = (string) self::substr($str, 1, null, $encoding);
3928
3929 3
            $strPartOne = self::strtolower(
3930 3
                (string) self::substr($str, 0, 1, $encoding),
3931 3
                $encoding,
3932 3
                false,
3933 3
                $lang,
3934 3
                $tryToKeepStringLength
3935
            );
3936
        }
3937
3938 46
        return $strPartOne . $strPartTwo;
3939
    }
3940
3941
    /**
3942
     * alias for "UTF8::lcfirst()"
3943
     *
3944
     * @param string      $str
3945
     * @param string      $encoding
3946
     * @param bool        $cleanUtf8
3947
     * @param string|null $lang
3948
     * @param bool        $tryToKeepStringLength
3949
     *
3950
     * @return string
3951
     *
3952
     * @see UTF8::lcfirst()
3953
     */
3954 2
    public static function lcword(
3955
        string $str,
3956
        string $encoding = 'UTF-8',
3957
        bool $cleanUtf8 = false,
3958
        string $lang = null,
3959
        bool $tryToKeepStringLength = false
3960
    ): string {
3961 2
        return self::lcfirst($str, $encoding, $cleanUtf8, $lang, $tryToKeepStringLength);
3962
    }
3963
3964
    /**
3965
     * Lowercase for all words in the string.
3966
     *
3967
     * @param string      $str                   <p>The input string.</p>
3968
     * @param string[]    $exceptions            [optional] <p>Exclusion for some words.</p>
3969
     * @param string      $charlist              [optional] <p>Additional chars that contains to words and do not start
3970
     *                                           a new word.</p>
3971
     * @param string      $encoding              [optional] <p>Set the charset.</p>
3972
     * @param bool        $cleanUtf8             [optional] <p>Remove non UTF-8 chars from the string.</p>
3973
     * @param string|null $lang                  [optional] <p>Set the language for special cases: az, el, lt, tr</p>
3974
     * @param bool        $tryToKeepStringLength [optional] <p>true === try to keep the string length: e.g. ẞ -> ß</p>
3975
     *
3976
     * @return string
3977
     */
3978 2
    public static function lcwords(
3979
        string $str,
3980
        array $exceptions = [],
3981
        string $charlist = '',
3982
        string $encoding = 'UTF-8',
3983
        bool $cleanUtf8 = false,
3984
        string $lang = null,
3985
        bool $tryToKeepStringLength = false
3986
    ): string {
3987 2
        if (!$str) {
3988 2
            return '';
3989
        }
3990
3991 2
        $words = self::str_to_words($str, $charlist);
3992 2
        $useExceptions = \count($exceptions) > 0;
3993
3994 2
        foreach ($words as &$word) {
3995 2
            if (!$word) {
3996 2
                continue;
3997
            }
3998
3999
            if (
4000 2
                $useExceptions === false
4001
                ||
4002 2
                !\in_array($word, $exceptions, true)
4003
            ) {
4004 2
                $word = self::lcfirst($word, $encoding, $cleanUtf8, $lang, $tryToKeepStringLength);
4005
            }
4006
        }
4007
4008 2
        return \implode('', $words);
4009
    }
4010
4011
    /**
4012
     * alias for "UTF8::lcfirst()"
4013
     *
4014
     * @param string      $str
4015
     * @param string      $encoding
4016
     * @param bool        $cleanUtf8
4017
     * @param string|null $lang
4018
     * @param bool        $tryToKeepStringLength
4019
     *
4020
     * @return string
4021
     *
4022
     * @see UTF8::lcfirst()
4023
     */
4024 5
    public static function lowerCaseFirst(
4025
        string $str,
4026
        string $encoding = 'UTF-8',
4027
        bool $cleanUtf8 = false,
4028
        string $lang = null,
4029
        bool $tryToKeepStringLength = false
4030
    ): string {
4031 5
        return self::lcfirst($str, $encoding, $cleanUtf8, $lang, $tryToKeepStringLength);
4032
    }
4033
4034
    /**
4035
     * Strip whitespace or other characters from beginning of a UTF-8 string.
4036
     *
4037
     * @param string      $str   <p>The string to be trimmed</p>
4038
     * @param string|null $chars <p>Optional characters to be stripped</p>
4039
     *
4040
     * @return string the string with unwanted characters stripped from the left
4041
     */
4042 22
    public static function ltrim(string $str = '', string $chars = null): string
4043
    {
4044 22
        if ($str === '') {
4045 3
            return '';
4046
        }
4047
4048 21
        if ($chars) {
4049 10
            $chars = \preg_quote($chars, '/');
4050 10
            $pattern = "^[${chars}]+";
4051
        } else {
4052 14
            $pattern = '^[\\s]+';
4053
        }
4054
4055 21
        if (self::$SUPPORT['mbstring'] === true) {
4056
            /** @noinspection PhpComposerExtensionStubsInspection */
4057 21
            return (string) \mb_ereg_replace($pattern, '', $str);
4058
        }
4059
4060
        return self::regex_replace($str, $pattern, '', '', '/');
4061
    }
4062
4063
    /**
4064
     * Returns the UTF-8 character with the maximum code point in the given data.
4065
     *
4066
     * @param array<string>|string $arg <p>A UTF-8 encoded string or an array of such strings.</p>
4067
     *
4068
     * @return string|null the character with the highest code point than others, returns null on failure or empty input
4069
     */
4070 2
    public static function max($arg)
4071
    {
4072 2
        if (\is_array($arg) === true) {
4073 2
            $arg = \implode('', $arg);
4074
        }
4075
4076 2
        $codepoints = self::codepoints($arg, false);
4077 2
        if (\count($codepoints) === 0) {
4078 2
            return null;
4079
        }
4080
4081 2
        $codepoint_max = \max($codepoints);
4082
4083 2
        return self::chr($codepoint_max);
4084
    }
4085
4086
    /**
4087
     * Calculates and returns the maximum number of bytes taken by any
4088
     * UTF-8 encoded character in the given string.
4089
     *
4090
     * @param string $str <p>The original Unicode string.</p>
4091
     *
4092
     * @return int max byte lengths of the given chars
4093
     */
4094 2
    public static function max_chr_width(string $str): int
4095
    {
4096 2
        $bytes = self::chr_size_list($str);
4097 2
        if (\count($bytes) > 0) {
4098 2
            return (int) \max($bytes);
4099
        }
4100
4101 2
        return 0;
4102
    }
4103
4104
    /**
4105
     * Checks whether mbstring is available on the server.
4106
     *
4107
     * @return bool
4108
     *              <strong>true</strong> if available, <strong>false</strong> otherwise
4109
     */
4110 27
    public static function mbstring_loaded(): bool
4111
    {
4112 27
        return \extension_loaded('mbstring');
4113
    }
4114
4115
    /**
4116
     * Returns the UTF-8 character with the minimum code point in the given data.
4117
     *
4118
     * @param mixed $arg <strong>A UTF-8 encoded string or an array of such strings.</strong>
4119
     *
4120
     * @return string|null the character with the lowest code point than others, returns null on failure or empty input
4121
     */
4122 2
    public static function min($arg)
4123
    {
4124 2
        if (\is_array($arg) === true) {
4125 2
            $arg = \implode('', $arg);
4126
        }
4127
4128 2
        $codepoints = self::codepoints($arg, false);
4129 2
        if (\count($codepoints) === 0) {
4130 2
            return null;
4131
        }
4132
4133 2
        $codepoint_min = \min($codepoints);
4134
4135 2
        return self::chr($codepoint_min);
4136
    }
4137
4138
    /**
4139
     * alias for "UTF8::normalize_encoding()"
4140
     *
4141
     * @param mixed $encoding
4142
     * @param mixed $fallback
4143
     *
4144
     * @return mixed
4145
     *
4146
     * @see UTF8::normalize_encoding()
4147
     * @deprecated <p>use "UTF8::normalize_encoding()"</p>
4148
     */
4149 2
    public static function normalizeEncoding($encoding, $fallback = '')
4150
    {
4151 2
        return self::normalize_encoding($encoding, $fallback);
4152
    }
4153
4154
    /**
4155
     * Normalize the encoding-"name" input.
4156
     *
4157
     * @param mixed $encoding <p>e.g.: ISO, UTF8, WINDOWS-1251 etc.</p>
4158
     * @param mixed $fallback <p>e.g.: UTF-8</p>
4159
     *
4160
     * @return mixed e.g.: ISO-8859-1, UTF-8, WINDOWS-1251 etc.<br>Will return a empty string as fallback (by default)
4161
     */
4162 327
    public static function normalize_encoding($encoding, $fallback = '')
4163
    {
4164 327
        static $STATIC_NORMALIZE_ENCODING_CACHE = [];
4165
4166
        // init
4167 327
        $encoding = (string) $encoding;
4168
4169 327
        if (!$encoding) {
4170 281
            return $fallback;
4171
        }
4172
4173
        if (
4174 51
            $encoding === 'UTF-8'
4175
            ||
4176 51
            $encoding === 'UTF8'
4177
        ) {
4178 26
            return 'UTF-8';
4179
        }
4180
4181
        if (
4182 43
            $encoding === '8BIT'
4183
            ||
4184 43
            $encoding === 'BINARY'
4185
        ) {
4186
            return 'CP850';
4187
        }
4188
4189
        if (
4190 43
            $encoding === 'HTML'
4191
            ||
4192 43
            $encoding === 'HTML-ENTITIES'
4193
        ) {
4194 2
            return 'HTML-ENTITIES';
4195
        }
4196
4197
        if (
4198 43
            $encoding === '1' // only a fallback, for non "strict_types" usage ...
4199
            ||
4200 43
            $encoding === '0' // only a fallback, for non "strict_types" usage ...
4201
        ) {
4202 1
            return $fallback;
4203
        }
4204
4205 42
        if (isset($STATIC_NORMALIZE_ENCODING_CACHE[$encoding])) {
4206 40
            return $STATIC_NORMALIZE_ENCODING_CACHE[$encoding];
4207
        }
4208
4209 6
        if (self::$ENCODINGS === null) {
4210 1
            self::$ENCODINGS = self::getData('encodings');
4211
        }
4212
4213 6
        if (\in_array($encoding, self::$ENCODINGS, true)) {
4214 4
            $STATIC_NORMALIZE_ENCODING_CACHE[$encoding] = $encoding;
4215
4216 4
            return $encoding;
4217
        }
4218
4219 5
        $encodingOrig = $encoding;
4220 5
        $encoding = \strtoupper($encoding);
4221 5
        $encodingUpperHelper = (string) \preg_replace('/[^a-zA-Z0-9]/u', '', $encoding);
4222
4223
        $equivalences = [
4224 5
            'ISO8859'     => 'ISO-8859-1',
4225
            'ISO88591'    => 'ISO-8859-1',
4226
            'ISO'         => 'ISO-8859-1',
4227
            'LATIN'       => 'ISO-8859-1',
4228
            'LATIN1'      => 'ISO-8859-1', // Western European
4229
            'ISO88592'    => 'ISO-8859-2',
4230
            'LATIN2'      => 'ISO-8859-2', // Central European
4231
            'ISO88593'    => 'ISO-8859-3',
4232
            'LATIN3'      => 'ISO-8859-3', // Southern European
4233
            'ISO88594'    => 'ISO-8859-4',
4234
            'LATIN4'      => 'ISO-8859-4', // Northern European
4235
            'ISO88595'    => 'ISO-8859-5',
4236
            'ISO88596'    => 'ISO-8859-6', // Greek
4237
            'ISO88597'    => 'ISO-8859-7',
4238
            'ISO88598'    => 'ISO-8859-8', // Hebrew
4239
            'ISO88599'    => 'ISO-8859-9',
4240
            'LATIN5'      => 'ISO-8859-9', // Turkish
4241
            'ISO885911'   => 'ISO-8859-11',
4242
            'TIS620'      => 'ISO-8859-11', // Thai
4243
            'ISO885910'   => 'ISO-8859-10',
4244
            'LATIN6'      => 'ISO-8859-10', // Nordic
4245
            'ISO885913'   => 'ISO-8859-13',
4246
            'LATIN7'      => 'ISO-8859-13', // Baltic
4247
            'ISO885914'   => 'ISO-8859-14',
4248
            'LATIN8'      => 'ISO-8859-14', // Celtic
4249
            'ISO885915'   => 'ISO-8859-15',
4250
            'LATIN9'      => 'ISO-8859-15', // Western European (with some extra chars e.g. €)
4251
            'ISO885916'   => 'ISO-8859-16',
4252
            'LATIN10'     => 'ISO-8859-16', // Southeast European
4253
            'CP1250'      => 'WINDOWS-1250',
4254
            'WIN1250'     => 'WINDOWS-1250',
4255
            'WINDOWS1250' => 'WINDOWS-1250',
4256
            'CP1251'      => 'WINDOWS-1251',
4257
            'WIN1251'     => 'WINDOWS-1251',
4258
            'WINDOWS1251' => 'WINDOWS-1251',
4259
            'CP1252'      => 'WINDOWS-1252',
4260
            'WIN1252'     => 'WINDOWS-1252',
4261
            'WINDOWS1252' => 'WINDOWS-1252',
4262
            'CP1253'      => 'WINDOWS-1253',
4263
            'WIN1253'     => 'WINDOWS-1253',
4264
            'WINDOWS1253' => 'WINDOWS-1253',
4265
            'CP1254'      => 'WINDOWS-1254',
4266
            'WIN1254'     => 'WINDOWS-1254',
4267
            'WINDOWS1254' => 'WINDOWS-1254',
4268
            'CP1255'      => 'WINDOWS-1255',
4269
            'WIN1255'     => 'WINDOWS-1255',
4270
            'WINDOWS1255' => 'WINDOWS-1255',
4271
            'CP1256'      => 'WINDOWS-1256',
4272
            'WIN1256'     => 'WINDOWS-1256',
4273
            'WINDOWS1256' => 'WINDOWS-1256',
4274
            'CP1257'      => 'WINDOWS-1257',
4275
            'WIN1257'     => 'WINDOWS-1257',
4276
            'WINDOWS1257' => 'WINDOWS-1257',
4277
            'CP1258'      => 'WINDOWS-1258',
4278
            'WIN1258'     => 'WINDOWS-1258',
4279
            'WINDOWS1258' => 'WINDOWS-1258',
4280
            'UTF16'       => 'UTF-16',
4281
            'UTF32'       => 'UTF-32',
4282
            'UTF8'        => 'UTF-8',
4283
            'UTF'         => 'UTF-8',
4284
            'UTF7'        => 'UTF-7',
4285
            '8BIT'        => 'CP850',
4286
            'BINARY'      => 'CP850',
4287
        ];
4288
4289 5
        if (!empty($equivalences[$encodingUpperHelper])) {
4290 4
            $encoding = $equivalences[$encodingUpperHelper];
4291
        }
4292
4293 5
        $STATIC_NORMALIZE_ENCODING_CACHE[$encodingOrig] = $encoding;
4294
4295 5
        return $encoding;
4296
    }
4297
4298
    /**
4299
     * Standardize line ending to unix-like.
4300
     *
4301
     * @param string $str
4302
     *
4303
     * @return string
4304
     */
4305 5
    public static function normalize_line_ending(string $str): string
4306
    {
4307 5
        return \str_replace(["\r\n", "\r"], "\n", $str);
4308
    }
4309
4310
    /**
4311
     * Normalize some MS Word special characters.
4312
     *
4313
     * @param string $str <p>The string to be normalized.</p>
4314
     *
4315
     * @return string
4316
     */
4317 38
    public static function normalize_msword(string $str): string
4318
    {
4319 38
        if ($str === '') {
4320 2
            return '';
4321
        }
4322
4323
        $keys = [
4324 38
            "\xc2\xab", // « (U+00AB) in UTF-8
4325
            "\xc2\xbb", // » (U+00BB) in UTF-8
4326
            "\xe2\x80\x98", // ‘ (U+2018) in UTF-8
4327
            "\xe2\x80\x99", // ’ (U+2019) in UTF-8
4328
            "\xe2\x80\x9a", // ‚ (U+201A) in UTF-8
4329
            "\xe2\x80\x9b", // ‛ (U+201B) in UTF-8
4330
            "\xe2\x80\x9c", // “ (U+201C) in UTF-8
4331
            "\xe2\x80\x9d", // ” (U+201D) in UTF-8
4332
            "\xe2\x80\x9e", // „ (U+201E) in UTF-8
4333
            "\xe2\x80\x9f", // ‟ (U+201F) in UTF-8
4334
            "\xe2\x80\xb9", // ‹ (U+2039) in UTF-8
4335
            "\xe2\x80\xba", // › (U+203A) in UTF-8
4336
            "\xe2\x80\x93", // – (U+2013) in UTF-8
4337
            "\xe2\x80\x94", // — (U+2014) in UTF-8
4338
            "\xe2\x80\xa6", // … (U+2026) in UTF-8
4339
        ];
4340
4341
        $values = [
4342 38
            '"', // « (U+00AB) in UTF-8
4343
            '"', // » (U+00BB) in UTF-8
4344
            "'", // ‘ (U+2018) in UTF-8
4345
            "'", // ’ (U+2019) in UTF-8
4346
            "'", // ‚ (U+201A) in UTF-8
4347
            "'", // ‛ (U+201B) in UTF-8
4348
            '"', // “ (U+201C) in UTF-8
4349
            '"', // ” (U+201D) in UTF-8
4350
            '"', // „ (U+201E) in UTF-8
4351
            '"', // ‟ (U+201F) in UTF-8
4352
            "'", // ‹ (U+2039) in UTF-8
4353
            "'", // › (U+203A) in UTF-8
4354
            '-', // – (U+2013) in UTF-8
4355
            '-', // — (U+2014) in UTF-8
4356
            '...', // … (U+2026) in UTF-8
4357
        ];
4358
4359 38
        return \str_replace($keys, $values, $str);
4360
    }
4361
4362
    /**
4363
     * Normalize the whitespace.
4364
     *
4365
     * @param string $str                     <p>The string to be normalized.</p>
4366
     * @param bool   $keepNonBreakingSpace    [optional] <p>Set to true, to keep non-breaking-spaces.</p>
4367
     * @param bool   $keepBidiUnicodeControls [optional] <p>Set to true, to keep non-printable (for the web)
4368
     *                                        bidirectional text chars.</p>
4369
     *
4370
     * @return string
4371
     */
4372 88
    public static function normalize_whitespace(string $str, bool $keepNonBreakingSpace = false, bool $keepBidiUnicodeControls = false): string
4373
    {
4374 88
        if ($str === '') {
4375 9
            return '';
4376
        }
4377
4378 88
        static $WHITESPACE_CACHE = [];
4379 88
        $cacheKey = (int) $keepNonBreakingSpace;
4380
4381 88
        if (!isset($WHITESPACE_CACHE[$cacheKey])) {
4382 2
            $WHITESPACE_CACHE[$cacheKey] = self::$WHITESPACE_TABLE;
4383
4384 2
            if ($keepNonBreakingSpace === true) {
4385 1
                unset($WHITESPACE_CACHE[$cacheKey]['NO-BREAK SPACE']);
4386
            }
4387
4388 2
            $WHITESPACE_CACHE[$cacheKey] = \array_values($WHITESPACE_CACHE[$cacheKey]);
4389
        }
4390
4391 88
        if ($keepBidiUnicodeControls === false) {
4392 88
            static $BIDI_UNICODE_CONTROLS_CACHE = null;
4393
4394 88
            if ($BIDI_UNICODE_CONTROLS_CACHE === null) {
4395 1
                $BIDI_UNICODE_CONTROLS_CACHE = \array_values(self::$BIDI_UNI_CODE_CONTROLS_TABLE);
4396
            }
4397
4398 88
            $str = \str_replace($BIDI_UNICODE_CONTROLS_CACHE, '', $str);
4399
        }
4400
4401 88
        return \str_replace($WHITESPACE_CACHE[$cacheKey], ' ', $str);
4402
    }
4403
4404
    /**
4405
     * Calculates Unicode code point of the given UTF-8 encoded character.
4406
     *
4407
     * INFO: opposite to UTF8::chr()
4408
     *
4409
     * @param string $chr      <p>The character of which to calculate code point.<p/>
4410
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
4411
     *
4412
     * @return int
4413
     *             Unicode code point of the given character,<br>
4414
     *             0 on invalid UTF-8 byte sequence
4415
     */
4416 30
    public static function ord($chr, string $encoding = 'UTF-8'): int
4417
    {
4418 30
        static $CHAR_CACHE = [];
4419
4420
        // init
4421 30
        $chr = (string) $chr;
4422
4423 30
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
4424 5
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
4425
        }
4426
4427 30
        $cacheKey = $chr . $encoding;
4428 30
        if (isset($CHAR_CACHE[$cacheKey]) === true) {
4429 30
            return $CHAR_CACHE[$cacheKey];
4430
        }
4431
4432
        // check again, if it's still not UTF-8
4433 12
        if ($encoding !== 'UTF-8') {
4434 3
            $chr = self::encode($encoding, $chr);
4435
        }
4436
4437 12
        if (self::$ORD === null) {
4438
            self::$ORD = self::getData('ord');
4439
        }
4440
4441 12
        if (isset(self::$ORD[$chr])) {
4442 12
            return $CHAR_CACHE[$cacheKey] = self::$ORD[$chr];
4443
        }
4444
4445
        //
4446
        // fallback via "IntlChar"
4447
        //
4448
4449 6
        if (self::$SUPPORT['intlChar'] === true) {
4450
            /** @noinspection PhpComposerExtensionStubsInspection */
4451 5
            $code = \IntlChar::ord($chr);
4452 5
            if ($code) {
4453 5
                return $CHAR_CACHE[$cacheKey] = $code;
4454
            }
4455
        }
4456
4457
        //
4458
        // fallback via vanilla php
4459
        //
4460
4461
        /** @noinspection CallableParameterUseCaseInTypeContextInspection */
4462 1
        $chr = \unpack('C*', (string) \substr($chr, 0, 4));
4463
        /** @noinspection OffsetOperationsInspection */
4464 1
        $code = $chr ? $chr[1] : 0;
4465
4466
        /** @noinspection OffsetOperationsInspection */
4467 1
        if ($code >= 0xF0 && isset($chr[4])) {
4468
            /** @noinspection UnnecessaryCastingInspection */
4469
            /** @noinspection OffsetOperationsInspection */
4470
            return $CHAR_CACHE[$cacheKey] = (int) ((($code - 0xF0) << 18) + (($chr[2] - 0x80) << 12) + (($chr[3] - 0x80) << 6) + $chr[4] - 0x80);
4471
        }
4472
4473
        /** @noinspection OffsetOperationsInspection */
4474 1
        if ($code >= 0xE0 && isset($chr[3])) {
4475
            /** @noinspection UnnecessaryCastingInspection */
4476
            /** @noinspection OffsetOperationsInspection */
4477 1
            return $CHAR_CACHE[$cacheKey] = (int) ((($code - 0xE0) << 12) + (($chr[2] - 0x80) << 6) + $chr[3] - 0x80);
4478
        }
4479
4480
        /** @noinspection OffsetOperationsInspection */
4481 1
        if ($code >= 0xC0 && isset($chr[2])) {
4482
            /** @noinspection UnnecessaryCastingInspection */
4483
            /** @noinspection OffsetOperationsInspection */
4484 1
            return $CHAR_CACHE[$cacheKey] = (int) ((($code - 0xC0) << 6) + $chr[2] - 0x80);
4485
        }
4486
4487
        return $CHAR_CACHE[$cacheKey] = $code;
4488
    }
4489
4490
    /**
4491
     * Parses the string into an array (into the the second parameter).
4492
     *
4493
     * WARNING: Instead of "parse_str()" this method do not (re-)placing variables in the current scope,
4494
     *          if the second parameter is not set!
4495
     *
4496
     * @see http://php.net/manual/en/function.parse-str.php
4497
     *
4498
     * @param string $str       <p>The input string.</p>
4499
     * @param array  $result    <p>The result will be returned into this reference parameter.</p>
4500
     * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
4501
     *
4502
     * @return bool
4503
     *              Will return <strong>false</strong> if php can't parse the string and we haven't any $result
4504
     */
4505 2
    public static function parse_str(string $str, &$result, bool $cleanUtf8 = false): bool
4506
    {
4507 2
        if ($cleanUtf8 === true) {
4508 2
            $str = self::clean($str);
4509
        }
4510
4511 2
        if (self::$SUPPORT['mbstring'] === true) {
4512 2
            $return = \mb_parse_str($str, $result);
4513
4514 2
            return $return !== false && $result !== [];
4515
        }
4516
4517
        /** @noinspection PhpVoidFunctionResultUsedInspection */
4518
        \parse_str($str, $result);
4519
4520
        return $result !== [];
4521
    }
4522
4523
    /**
4524
     * Checks if \u modifier is available that enables Unicode support in PCRE.
4525
     *
4526
     * @return bool
4527
     *              <strong>true</strong> if support is available,<br>
4528
     *              <strong>false</strong> otherwise
4529
     */
4530 102
    public static function pcre_utf8_support(): bool
4531
    {
4532
        /** @noinspection PhpUsageOfSilenceOperatorInspection */
4533 102
        return (bool) @\preg_match('//u', '');
4534
    }
4535
4536
    /**
4537
     * Create an array containing a range of UTF-8 characters.
4538
     *
4539
     * @param mixed  $var1      <p>Numeric or hexadecimal code points, or a UTF-8 character to start from.</p>
4540
     * @param mixed  $var2      <p>Numeric or hexadecimal code points, or a UTF-8 character to end at.</p>
4541
     * @param bool   $use_ctype <p>use ctype to detect numeric and hexadecimal, otherwise we will use a simple "is_numeric"</p>
4542
     * @param string $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
4543
     *
4544
     * @return string[]
4545
     */
4546 2
    public static function range(
4547
        $var1,
4548
        $var2,
4549
        bool $use_ctype = true,
4550
        string $encoding = 'UTF-8'
4551
    ): array {
4552 2
        if (!$var1 || !$var2) {
4553 2
            return [];
4554
        }
4555
4556 2
        if (self::$SUPPORT['ctype'] === false) {
4557
            throw new \RuntimeException('ext-ctype: is not installed');
4558
        }
4559
4560 2
        $is_digit = false;
4561 2
        $is_xdigit = false;
4562
4563
        /** @noinspection PhpComposerExtensionStubsInspection */
4564 2
        if ($use_ctype && \ctype_digit((string) $var1) && \ctype_digit((string) $var2)) {
4565 2
            $is_digit = true;
4566 2
            $start = (int) $var1;
4567 2
        } /** @noinspection PhpComposerExtensionStubsInspection */ elseif ($use_ctype && \ctype_xdigit($var1) && \ctype_xdigit($var2)) {
4568
            $is_xdigit = true;
4569
            $start = (int) self::hex_to_int($var1);
4570 2
        } elseif (!$use_ctype && \is_numeric($var1)) {
4571 1
            $start = (int) $var1;
4572
        } else {
4573 2
            $start = self::ord($var1);
4574
        }
4575
4576 2
        if (!$start) {
4577
            return [];
4578
        }
4579
4580 2
        if ($is_digit) {
4581 2
            $end = (int) $var2;
4582 2
        } elseif ($is_xdigit) {
4583
            $end = (int) self::hex_to_int($var2);
4584 2
        } elseif (!$use_ctype && \is_numeric($var2)) {
4585 1
            $end = (int) $var2;
4586
        } else {
4587 2
            $end = self::ord($var2);
4588
        }
4589
4590 2
        if (!$end) {
4591
            return [];
4592
        }
4593
4594 2
        return \array_map(
4595
            static function (int $i) use ($encoding): string {
4596 2
                return (string) self::chr($i, $encoding);
4597 2
            },
4598 2
            \range($start, $end)
4599
        );
4600
    }
4601
4602
    /**
4603
     * Multi decode html entity & fix urlencoded-win1252-chars.
4604
     *
4605
     * e.g:
4606
     * 'test+test'                     => 'test+test'
4607
     * 'D&#252;sseldorf'               => 'Düsseldorf'
4608
     * 'D%FCsseldorf'                  => 'Düsseldorf'
4609
     * 'D&#xFC;sseldorf'               => 'Düsseldorf'
4610
     * 'D%26%23xFC%3Bsseldorf'         => 'Düsseldorf'
4611
     * 'Düsseldorf'                   => 'Düsseldorf'
4612
     * 'D%C3%BCsseldorf'               => 'Düsseldorf'
4613
     * 'D%C3%83%C2%BCsseldorf'         => 'Düsseldorf'
4614
     * 'D%25C3%2583%25C2%25BCsseldorf' => 'Düsseldorf'
4615
     *
4616
     * @param string $str          <p>The input string.</p>
4617
     * @param bool   $multi_decode <p>Decode as often as possible.</p>
4618
     *
4619
     * @return string
4620
     */
4621 6
    public static function rawurldecode(string $str, bool $multi_decode = true): string
4622
    {
4623 6
        if ($str === '') {
4624 4
            return '';
4625
        }
4626
4627
        if (
4628 6
            \strpos($str, '&') === false
4629
            &&
4630 6
            \strpos($str, '%') === false
4631
            &&
4632 6
            \strpos($str, '+') === false
4633
            &&
4634 6
            \strpos($str, '\u') === false
4635
        ) {
4636 4
            return self::fix_simple_utf8($str);
4637
        }
4638
4639 6
        $str = self::urldecode_unicode_helper($str);
4640
4641
        do {
4642 6
            $str_compare = $str;
4643
4644
            /**
4645
             * @psalm-suppress PossiblyInvalidArgument
4646
             */
4647 6
            $str = self::fix_simple_utf8(
4648 6
                \rawurldecode(
4649 6
                    self::html_entity_decode(
4650 6
                        self::to_utf8($str),
4651 6
                        \ENT_QUOTES | \ENT_HTML5
4652
                    )
4653
                )
4654
            );
4655 6
        } while ($multi_decode === true && $str_compare !== $str);
4656
4657 6
        return $str;
4658
    }
4659
4660
    /**
4661
     * Replaces all occurrences of $pattern in $str by $replacement.
4662
     *
4663
     * @param string $str         <p>The input string.</p>
4664
     * @param string $pattern     <p>The regular expression pattern.</p>
4665
     * @param string $replacement <p>The string to replace with.</p>
4666
     * @param string $options     [optional] <p>Matching conditions to be used.</p>
4667
     * @param string $delimiter   [optional] <p>Delimiter the the regex. Default: '/'</p>
4668
     *
4669
     * @return string
4670
     */
4671 18
    public static function regex_replace(
4672
        string $str,
4673
        string $pattern,
4674
        string $replacement,
4675
        string $options = '',
4676
        string $delimiter = '/'
4677
    ): string {
4678 18
        if ($options === 'msr') {
4679 9
            $options = 'ms';
4680
        }
4681
4682
        // fallback
4683 18
        if (!$delimiter) {
4684
            $delimiter = '/';
4685
        }
4686
4687 18
        return (string) \preg_replace(
4688 18
            $delimiter . $pattern . $delimiter . 'u' . $options,
4689 18
            $replacement,
4690 18
            $str
4691
        );
4692
    }
4693
4694
    /**
4695
     * alias for "UTF8::remove_bom()"
4696
     *
4697
     * @param string $str
4698
     *
4699
     * @return string
4700
     *
4701
     * @see UTF8::remove_bom()
4702
     * @deprecated <p>use "UTF8::remove_bom()"</p>
4703
     */
4704
    public static function removeBOM(string $str): string
4705
    {
4706
        return self::remove_bom($str);
4707
    }
4708
4709
    /**
4710
     * Remove the BOM from UTF-8 / UTF-16 / UTF-32 strings.
4711
     *
4712
     * @param string $str <p>The input string.</p>
4713
     *
4714
     * @return string string without UTF-BOM
4715
     */
4716 82
    public static function remove_bom(string $str): string
4717
    {
4718 82
        if ($str === '') {
4719 9
            return '';
4720
        }
4721
4722 82
        $strLength = \strlen($str);
4723 82
        foreach (self::$BOM as $bomString => $bomByteLength) {
4724 82
            if (\strpos($str, $bomString, 0) === 0) {
4725 10
                $strTmp = \substr($str, $bomByteLength, $strLength);
4726 10
                if ($strTmp === false) {
4727
                    return '';
4728
                }
4729
4730 10
                $strLength -= (int) $bomByteLength;
4731
4732 82
                $str = (string) $strTmp;
4733
            }
4734
        }
4735
4736 82
        return $str;
4737
    }
4738
4739
    /**
4740
     * Removes duplicate occurrences of a string in another string.
4741
     *
4742
     * @param string          $str  <p>The base string.</p>
4743
     * @param string|string[] $what <p>String to search for in the base string.</p>
4744
     *
4745
     * @return string the result string with removed duplicates
4746
     */
4747 2
    public static function remove_duplicates(string $str, $what = ' '): string
4748
    {
4749 2
        if (\is_string($what) === true) {
4750 2
            $what = [$what];
4751
        }
4752
4753 2
        if (\is_array($what) === true) {
0 ignored issues
show
introduced by
The condition is_array($what) === true is always true.
Loading history...
4754
            /** @noinspection ForeachSourceInspection */
4755 2
            foreach ($what as $item) {
4756 2
                $str = (string) \preg_replace('/(' . \preg_quote($item, '/u') . ')+/u', $item, $str);
4757
            }
4758
        }
4759
4760 2
        return $str;
4761
    }
4762
4763
    /**
4764
     * Remove html via "strip_tags()" from the string.
4765
     *
4766
     * @param string $str
4767
     * @param string $allowableTags [optional] <p>You can use the optional second parameter to specify tags which should
4768
     *                              not be stripped. Default: null
4769
     *                              </p>
4770
     *
4771
     * @return string
4772
     */
4773 6
    public static function remove_html(string $str, string $allowableTags = ''): string
4774
    {
4775 6
        return \strip_tags($str, $allowableTags);
4776
    }
4777
4778
    /**
4779
     * Remove all breaks [<br> | \r\n | \r | \n | ...] from the string.
4780
     *
4781
     * @param string $str
4782
     * @param string $replacement [optional] <p>Default is a empty string.</p>
4783
     *
4784
     * @return string
4785
     */
4786 6
    public static function remove_html_breaks(string $str, string $replacement = ''): string
4787
    {
4788 6
        return (string) \preg_replace("#/\r\n|\r|\n|<br.*/?>#isU", $replacement, $str);
4789
    }
4790
4791
    /**
4792
     * Remove invisible characters from a string.
4793
     *
4794
     * e.g.: This prevents sandwiching null characters between ascii characters, like Java\0script.
4795
     *
4796
     * copy&past from https://github.com/bcit-ci/CodeIgniter/blob/develop/system/core/Common.php
4797
     *
4798
     * @param string $str
4799
     * @param bool   $url_encoded
4800
     * @param string $replacement
4801
     *
4802
     * @return string
4803
     */
4804 116
    public static function remove_invisible_characters(string $str, bool $url_encoded = true, string $replacement = ''): string
4805
    {
4806
        // init
4807 116
        $non_displayables = [];
4808
4809
        // every control character except newline (dec 10),
4810
        // carriage return (dec 13) and horizontal tab (dec 09)
4811 116
        if ($url_encoded) {
4812 116
            $non_displayables[] = '/%0[0-8bcefBCEF]/'; // url encoded 00-08, 11, 12, 14, 15
4813 116
            $non_displayables[] = '/%1[0-9a-fA-F]/'; // url encoded 16-31
4814
        }
4815
4816 116
        $non_displayables[] = '/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]+/S'; // 00-08, 11, 12, 14-31, 127
4817
4818
        do {
4819 116
            $str = (string) \preg_replace($non_displayables, $replacement, $str, -1, $count);
4820 116
        } while ($count !== 0);
4821
4822 116
        return $str;
4823
    }
4824
4825
    /**
4826
     * Returns a new string with the prefix $substring removed, if present.
4827
     *
4828
     * @param string $str
4829
     * @param string $substring <p>The prefix to remove.</p>
4830
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
4831
     *
4832
     * @return string string without the prefix $substring
4833
     */
4834 12
    public static function remove_left(string $str, string $substring, string $encoding = 'UTF-8'): string
4835
    {
4836 12
        if ($substring && \strpos($str, $substring) === 0) {
4837 6
            if ($encoding === 'UTF-8') {
4838 4
                return (string) \mb_substr(
4839 4
                    $str,
4840 4
                    (int) \mb_strlen($substring)
4841
                );
4842
            }
4843
4844 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
4845
4846 2
            return (string) self::substr(
4847 2
                $str,
4848 2
                (int) self::strlen($substring, $encoding),
4849 2
                null,
4850 2
                $encoding
4851
            );
4852
        }
4853
4854 6
        return $str;
4855
    }
4856
4857
    /**
4858
     * Returns a new string with the suffix $substring removed, if present.
4859
     *
4860
     * @param string $str
4861
     * @param string $substring <p>The suffix to remove.</p>
4862
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
4863
     *
4864
     * @return string string having a $str without the suffix $substring
4865
     */
4866 12
    public static function remove_right(string $str, string $substring, string $encoding = 'UTF-8'): string
4867
    {
4868 12
        if ($substring && \substr($str, -\strlen($substring)) === $substring) {
4869 6
            if ($encoding === 'UTF-8') {
4870 4
                return (string) \mb_substr(
4871 4
                    $str,
4872 4
                    0,
4873 4
                    (int) \mb_strlen($str) - (int) \mb_strlen($substring)
4874
                );
4875
            }
4876
4877 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
4878
4879 2
            return (string) self::substr(
4880 2
                $str,
4881 2
                0,
4882 2
                (int) self::strlen($str, $encoding) - (int) self::strlen($substring, $encoding),
4883 2
                $encoding
4884
            );
4885
        }
4886
4887 6
        return $str;
4888
    }
4889
4890
    /**
4891
     * Replaces all occurrences of $search in $str by $replacement.
4892
     *
4893
     * @param string $str           <p>The input string.</p>
4894
     * @param string $search        <p>The needle to search for.</p>
4895
     * @param string $replacement   <p>The string to replace with.</p>
4896
     * @param bool   $caseSensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
4897
     *
4898
     * @return string string after the replacements
4899
     */
4900 29
    public static function replace(
4901
        string $str,
4902
        string $search,
4903
        string $replacement,
4904
        bool $caseSensitive = true
4905
    ): string {
4906 29
        if ($caseSensitive) {
4907 22
            return \str_replace($search, $replacement, $str);
4908
        }
4909
4910 7
        return self::str_ireplace($search, $replacement, $str);
4911
    }
4912
4913
    /**
4914
     * Replaces all occurrences of $search in $str by $replacement.
4915
     *
4916
     * @param string       $str           <p>The input string.</p>
4917
     * @param array        $search        <p>The elements to search for.</p>
4918
     * @param array|string $replacement   <p>The string to replace with.</p>
4919
     * @param bool         $caseSensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
4920
     *
4921
     * @return string string after the replacements
4922
     */
4923 30
    public static function replace_all(
4924
        string $str,
4925
        array $search,
4926
        $replacement,
4927
        bool $caseSensitive = true
4928
    ): string {
4929 30
        if ($caseSensitive) {
4930 23
            return \str_replace($search, $replacement, $str);
4931
        }
4932
4933 7
        return self::str_ireplace($search, $replacement, $str);
4934
    }
4935
4936
    /**
4937
     * Replace the diamond question mark (�) and invalid-UTF8 chars with the replacement.
4938
     *
4939
     * @param string $str                <p>The input string</p>
4940
     * @param string $replacementChar    <p>The replacement character.</p>
4941
     * @param bool   $processInvalidUtf8 <p>Convert invalid UTF-8 chars </p>
4942
     *
4943
     * @return string
4944
     */
4945 62
    public static function replace_diamond_question_mark(string $str, string $replacementChar = '', bool $processInvalidUtf8 = true): string
4946
    {
4947 62
        if ($str === '') {
4948 9
            return '';
4949
        }
4950
4951 62
        if ($processInvalidUtf8 === true) {
4952 62
            $replacementCharHelper = $replacementChar;
4953 62
            if ($replacementChar === '') {
4954 62
                $replacementCharHelper = 'none';
4955
            }
4956
4957 62
            if (self::$SUPPORT['mbstring'] === false) {
4958
                // if there is no native support for "mbstring",
4959
                // then we need to clean the string before ...
4960
                $str = self::clean($str);
4961
            }
4962
4963 62
            $save = \mb_substitute_character();
4964 62
            \mb_substitute_character($replacementCharHelper);
4965
            // the polyfill maybe return false, so cast to string
4966 62
            $str = (string) \mb_convert_encoding($str, 'UTF-8', 'UTF-8');
4967 62
            \mb_substitute_character($save);
4968
        }
4969
4970 62
        return \str_replace(
4971
            [
4972 62
                "\xEF\xBF\xBD",
4973
                '�',
4974
            ],
4975
            [
4976 62
                $replacementChar,
4977 62
                $replacementChar,
4978
            ],
4979 62
            $str
4980
        );
4981
    }
4982
4983
    /**
4984
     * Strip whitespace or other characters from end of a UTF-8 string.
4985
     *
4986
     * @param string      $str   <p>The string to be trimmed.</p>
4987
     * @param string|null $chars <p>Optional characters to be stripped.</p>
4988
     *
4989
     * @return string the string with unwanted characters stripped from the right
4990
     */
4991 20
    public static function rtrim(string $str = '', string $chars = null): string
4992
    {
4993 20
        if ($str === '') {
4994 3
            return '';
4995
        }
4996
4997 19
        if ($chars) {
4998 8
            $chars = \preg_quote($chars, '/');
4999 8
            $pattern = "[${chars}]+$";
5000
        } else {
5001 14
            $pattern = '[\\s]+$';
5002
        }
5003
5004 19
        if (self::$SUPPORT['mbstring'] === true) {
5005
            /** @noinspection PhpComposerExtensionStubsInspection */
5006 19
            return (string) \mb_ereg_replace($pattern, '', $str);
5007
        }
5008
5009
        return self::regex_replace($str, $pattern, '', '', '/');
5010
    }
5011
5012
    /**
5013
     * WARNING: Print native UTF-8 support (libs), e.g. for debugging.
5014
     *
5015
     * @psalm-suppress MissingReturnType
5016
     */
5017 2
    public static function showSupport()
5018
    {
5019 2
        echo '<pre>';
5020 2
        foreach (self::$SUPPORT as $key => &$value) {
5021 2
            echo $key . ' - ' . \print_r($value, true) . "\n<br>";
5022
        }
5023 2
        unset($value);
5024 2
        echo '</pre>';
5025 2
    }
5026
5027
    /**
5028
     * Converts a UTF-8 character to HTML Numbered Entity like "&#123;".
5029
     *
5030
     * @param string $char           <p>The Unicode character to be encoded as numbered entity.</p>
5031
     * @param bool   $keepAsciiChars <p>Set to <strong>true</strong> to keep ASCII chars.</>
5032
     * @param string $encoding       [optional] <p>Set the charset for e.g. "mb_" function</p>
5033
     *
5034
     * @return string the HTML numbered entity
5035
     */
5036 2
    public static function single_chr_html_encode(string $char, bool $keepAsciiChars = false, string $encoding = 'UTF-8'): string
5037
    {
5038 2
        if ($char === '') {
5039 2
            return '';
5040
        }
5041
5042
        if (
5043 2
            $keepAsciiChars === true
5044
            &&
5045 2
            self::is_ascii($char) === true
5046
        ) {
5047 2
            return $char;
5048
        }
5049
5050 2
        return '&#' . self::ord($char, $encoding) . ';';
5051
    }
5052
5053
    /**
5054
     * @param string $str
5055
     * @param int    $tabLength
5056
     *
5057
     * @return string
5058
     */
5059 5
    public static function spaces_to_tabs(string $str, int $tabLength = 4): string
5060
    {
5061 5
        if ($tabLength === 4) {
5062 3
            $tab = '    ';
5063 2
        } elseif ($tabLength === 2) {
5064 1
            $tab = '  ';
5065
        } else {
5066 1
            $tab = \str_repeat(' ', $tabLength);
5067
        }
5068
5069 5
        return \str_replace($tab, "\t", $str);
5070
    }
5071
5072
    /**
5073
     * alias for "UTF8::str_split()"
5074
     *
5075
     * @param string|string[] $str
5076
     * @param int             $length
5077
     * @param bool            $cleanUtf8
5078
     *
5079
     * @return string[]
5080
     *
5081
     * @see UTF8::str_split()
5082
     */
5083 9
    public static function split(
5084
        $str,
5085
        int $length = 1,
5086
        bool $cleanUtf8 = false
5087
    ): array {
5088 9
        return self::str_split($str, $length, $cleanUtf8);
5089
    }
5090
5091
    /**
5092
     * alias for "UTF8::str_starts_with()"
5093
     *
5094
     * @param string $haystack
5095
     * @param string $needle
5096
     *
5097
     * @return bool
5098
     *
5099
     * @see UTF8::str_starts_with()
5100
     */
5101
    public static function str_begins(string $haystack, string $needle): bool
5102
    {
5103
        return self::str_starts_with($haystack, $needle);
5104
    }
5105
5106
    /**
5107
     * Returns a camelCase version of the string. Trims surrounding spaces,
5108
     * capitalizes letters following digits, spaces, dashes and underscores,
5109
     * and removes spaces, dashes, as well as underscores.
5110
     *
5111
     * @param string      $str                   <p>The input string.</p>
5112
     * @param string      $encoding              [optional] <p>Default: 'UTF-8'</p>
5113
     * @param bool        $cleanUtf8             [optional] <p>Remove non UTF-8 chars from the string.</p>
5114
     * @param string|null $lang                  [optional] <p>Set the language for special cases: az, el, lt, tr</p>
5115
     * @param bool        $tryToKeepStringLength [optional] <p>true === try to keep the string length: e.g. ẞ -> ß</p>
5116
     *
5117
     * @return string
5118
     */
5119 32
    public static function str_camelize(
5120
        string $str,
5121
        string $encoding = 'UTF-8',
5122
        bool $cleanUtf8 = false,
5123
        string $lang = null,
5124
        bool $tryToKeepStringLength = false
5125
    ): string {
5126 32
        if ($cleanUtf8 === true) {
5127
            $str = self::clean($str);
5128
        }
5129
5130 32
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
5131 26
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
5132
        }
5133
5134 32
        $str = self::lcfirst(
5135 32
            \trim($str),
5136 32
            $encoding,
5137 32
            false,
5138 32
            $lang,
5139 32
            $tryToKeepStringLength
5140
        );
5141 32
        $str = (string) \preg_replace('/^[-_]+/', '', $str);
5142
5143 32
        $useMbFunction = $lang === null && $tryToKeepStringLength === false;
5144
5145 32
        $str = (string) \preg_replace_callback(
5146 32
            '/[-_\\s]+(.)?/u',
5147
            /**
5148
             * @param array $match
5149
             *
5150
             * @return string
5151
             */
5152
            static function (array $match) use ($useMbFunction, $encoding, $lang, $tryToKeepStringLength): string {
5153 27
                if (isset($match[1])) {
5154 27
                    if ($useMbFunction === true) {
5155 27
                        if ($encoding === 'UTF-8') {
5156 27
                            return \mb_strtoupper($match[1]);
5157
                        }
5158
5159
                        return \mb_strtoupper($match[1], $encoding);
5160
                    }
5161
5162
                    return self::strtoupper($match[1], $encoding, false, $lang, $tryToKeepStringLength);
5163
                }
5164
5165 1
                return '';
5166 32
            },
5167 32
            $str
5168
        );
5169
5170 32
        return (string) \preg_replace_callback(
5171 32
            '/[\\p{N}]+(.)?/u',
5172
            /**
5173
             * @param array $match
5174
             *
5175
             * @return string
5176
             */
5177
            static function (array $match) use ($useMbFunction, $encoding, $cleanUtf8, $lang, $tryToKeepStringLength): string {
5178 6
                if ($useMbFunction === true) {
5179 6
                    if ($encoding === 'UTF-8') {
5180 6
                        return \mb_strtoupper($match[0]);
5181
                    }
5182
5183
                    return \mb_strtoupper($match[0], $encoding);
5184
                }
5185
5186
                return self::strtoupper($match[0], $encoding, $cleanUtf8, $lang, $tryToKeepStringLength);
5187 32
            },
5188 32
            $str
5189
        );
5190
    }
5191
5192
    /**
5193
     * Returns the string with the first letter of each word capitalized,
5194
     * except for when the word is a name which shouldn't be capitalized.
5195
     *
5196
     * @param string $str
5197
     *
5198
     * @return string string with $str capitalized
5199
     */
5200 1
    public static function str_capitalize_name(string $str): string
5201
    {
5202 1
        return self::str_capitalize_name_helper(
5203 1
            self::str_capitalize_name_helper(
5204 1
                self::collapse_whitespace($str),
5205 1
                ' '
5206
            ),
5207 1
            '-'
5208
        );
5209
    }
5210
5211
    /**
5212
     * Returns true if the string contains $needle, false otherwise. By default
5213
     * the comparison is case-sensitive, but can be made insensitive by setting
5214
     * $caseSensitive to false.
5215
     *
5216
     * @param string $haystack      <p>The input string.</p>
5217
     * @param string $needle        <p>Substring to look for.</p>
5218
     * @param bool   $caseSensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
5219
     *
5220
     * @return bool whether or not $haystack contains $needle
5221
     */
5222 21
    public static function str_contains(
5223
        string $haystack,
5224
        string $needle,
5225
        bool $caseSensitive = true
5226
    ): bool {
5227 21
        if ($caseSensitive) {
5228 11
            return \strpos($haystack, $needle) !== false;
5229
        }
5230
5231 10
        return \mb_stripos($haystack, $needle) !== false;
5232
    }
5233
5234
    /**
5235
     * Returns true if the string contains all $needles, false otherwise. By
5236
     * default the comparison is case-sensitive, but can be made insensitive by
5237
     * setting $caseSensitive to false.
5238
     *
5239
     * @param string $haystack      <p>The input string.</p>
5240
     * @param array  $needles       <p>SubStrings to look for.</p>
5241
     * @param bool   $caseSensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
5242
     *
5243
     * @return bool whether or not $haystack contains $needle
5244
     */
5245 44
    public static function str_contains_all(
5246
        string $haystack,
5247
        array $needles,
5248
        bool $caseSensitive = true
5249
    ): bool {
5250 44
        if ($haystack === '' || $needles === []) {
5251 1
            return false;
5252
        }
5253
5254
        /** @noinspection LoopWhichDoesNotLoopInspection */
5255 43
        foreach ($needles as &$needle) {
5256 43
            if (!$needle) {
5257 1
                return false;
5258
            }
5259
5260 42
            if ($caseSensitive) {
5261 22
                return \strpos($haystack, $needle) !== false;
5262
            }
5263
5264 20
            return \mb_stripos($haystack, $needle) !== false;
5265
        }
5266
5267
        return true;
5268
    }
5269
5270
    /**
5271
     * Returns true if the string contains any $needles, false otherwise. By
5272
     * default the comparison is case-sensitive, but can be made insensitive by
5273
     * setting $caseSensitive to false.
5274
     *
5275
     * @param string $haystack      <p>The input string.</p>
5276
     * @param array  $needles       <p>SubStrings to look for.</p>
5277
     * @param bool   $caseSensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
5278
     *
5279
     * @return bool
5280
     *              Whether or not $str contains $needle
5281
     */
5282 46
    public static function str_contains_any(
5283
        string $haystack,
5284
        array $needles,
5285
        bool $caseSensitive = true
5286
    ): bool {
5287 46
        if ($haystack === '' || $needles === []) {
5288 1
            return false;
5289
        }
5290
5291
        /** @noinspection LoopWhichDoesNotLoopInspection */
5292 45
        foreach ($needles as &$needle) {
5293 45
            if (!$needle) {
5294
                continue;
5295
            }
5296
5297 45
            if ($caseSensitive) {
5298 25
                if (\strpos($haystack, $needle) !== false) {
5299 14
                    return true;
5300
                }
5301
5302 13
                continue;
5303
            }
5304
5305 20
            if (\mb_stripos($haystack, $needle) !== false) {
5306 20
                return true;
5307
            }
5308
        }
5309
5310 19
        return false;
5311
    }
5312
5313
    /**
5314
     * Returns a lowercase and trimmed string separated by dashes. Dashes are
5315
     * inserted before uppercase characters (with the exception of the first
5316
     * character of the string), and in place of spaces as well as underscores.
5317
     *
5318
     * @param string $str      <p>The input string.</p>
5319
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
5320
     *
5321
     * @return string
5322
     */
5323 19
    public static function str_dasherize(string $str, string $encoding = 'UTF-8'): string
5324
    {
5325 19
        return self::str_delimit($str, '-', $encoding);
5326
    }
5327
5328
    /**
5329
     * Returns a lowercase and trimmed string separated by the given delimiter.
5330
     * Delimiters are inserted before uppercase characters (with the exception
5331
     * of the first character of the string), and in place of spaces, dashes,
5332
     * and underscores. Alpha delimiters are not converted to lowercase.
5333
     *
5334
     * @param string      $str                   <p>The input string.</p>
5335
     * @param string      $delimiter             <p>Sequence used to separate parts of the string.</p>
5336
     * @param string      $encoding              [optional] <p>Set the charset for e.g. "mb_" function</p>
5337
     * @param bool        $cleanUtf8             [optional] <p>Remove non UTF-8 chars from the string.</p>
5338
     * @param string|null $lang                  [optional] <p>Set the language for special cases: az, el, lt,
5339
     *                                           tr</p>
5340
     * @param bool        $tryToKeepStringLength [optional] <p>true === try to keep the string length: e.g. ẞ ->
5341
     *                                           ß</p>
5342
     *
5343
     * @return string
5344
     */
5345 49
    public static function str_delimit(
5346
        string $str,
5347
        string $delimiter,
5348
        string $encoding = 'UTF-8',
5349
        bool $cleanUtf8 = false,
5350
        string $lang = null,
5351
        bool $tryToKeepStringLength = false
5352
    ): string {
5353 49
        if (self::$SUPPORT['mbstring'] === true) {
5354
            /** @noinspection PhpComposerExtensionStubsInspection */
5355 49
            $str = (string) \mb_ereg_replace('\\B(\\p{Lu})', '-\1', \trim($str));
5356
5357 49
            $useMbFunction = $lang === null && $tryToKeepStringLength === false;
5358 49
            if ($useMbFunction === true && $encoding === 'UTF-8') {
5359 22
                $str = \mb_strtolower($str);
5360
            } else {
5361 27
                $str = self::strtolower($str, $encoding, $cleanUtf8, $lang, $tryToKeepStringLength);
5362
            }
5363
5364
            /** @noinspection PhpComposerExtensionStubsInspection */
5365 49
            return (string) \mb_ereg_replace('[\\-_\\s]+', $delimiter, $str);
5366
        }
5367
5368
        $str = (string) \preg_replace('/\\B(\\p{Lu})/u', '-\1', \trim($str));
5369
5370
        $useMbFunction = $lang === null && $tryToKeepStringLength === false;
5371
        if ($useMbFunction === true && $encoding === 'UTF-8') {
5372
            $str = \mb_strtolower($str);
5373
        } else {
5374
            $str = self::strtolower($str, $encoding, $cleanUtf8, $lang, $tryToKeepStringLength);
5375
        }
5376
5377
        return (string) \preg_replace('/[\\-_\\s]+/u', $delimiter, $str);
5378
    }
5379
5380
    /**
5381
     * Optimized "mb_detect_encoding()"-function -> with support for UTF-16 and UTF-32.
5382
     *
5383
     * @param string $str <p>The input string.</p>
5384
     *
5385
     * @return false|string
5386
     *                      The detected string-encoding e.g. UTF-8 or UTF-16BE,<br>
5387
     *                      otherwise it will return false e.g. for BINARY or not detected encoding.
5388
     */
5389 30
    public static function str_detect_encoding($str)
5390
    {
5391
        // init
5392 30
        $str = (string) $str;
5393
5394
        //
5395
        // 1.) check binary strings (010001001...) like UTF-16 / UTF-32 / PDF / Images / ...
5396
        //
5397
5398 30
        if (self::is_binary($str, true) === true) {
5399 11
            $isUtf16 = self::is_utf16($str, false);
5400 11
            if ($isUtf16 === 1) {
5401 2
                return 'UTF-16LE';
5402
            }
5403 11
            if ($isUtf16 === 2) {
5404 2
                return 'UTF-16BE';
5405
            }
5406
5407 9
            $isUtf32 = self::is_utf32($str, false);
5408 9
            if ($isUtf32 === 1) {
5409
                return 'UTF-32LE';
5410
            }
5411 9
            if ($isUtf32 === 2) {
5412
                return 'UTF-32BE';
5413
            }
5414
5415
            // is binary but not "UTF-16" or "UTF-32"
5416 9
            return false;
5417
        }
5418
5419
        //
5420
        // 2.) simple check for ASCII chars
5421
        //
5422
5423 26
        if (self::is_ascii($str) === true) {
5424 10
            return 'ASCII';
5425
        }
5426
5427
        //
5428
        // 3.) simple check for UTF-8 chars
5429
        //
5430
5431 26
        if (self::is_utf8($str) === true) {
5432 19
            return 'UTF-8';
5433
        }
5434
5435
        //
5436
        // 4.) check via "mb_detect_encoding()"
5437
        //
5438
        // INFO: UTF-16, UTF-32, UCS2 and UCS4, encoding detection will fail always with "mb_detect_encoding()"
5439
5440 15
        if (self::$SUPPORT['mbstring'] === true) {
5441
            // info: do not use the symfony polyfill here
5442 15
            $encoding = \mb_detect_encoding($str, \mb_detect_order(), true);
5443 15
            if ($encoding) {
5444
                return $encoding;
5445
            }
5446
        }
5447
5448
        //
5449
        // 5.) check via "iconv()"
5450
        //
5451
5452 15
        if (self::$ENCODINGS === null) {
5453
            self::$ENCODINGS = self::getData('encodings');
5454
        }
5455
5456 15
        foreach (self::$ENCODINGS as $encodingTmp) {
5457
            // INFO: //IGNORE but still throw notice
5458
            /** @noinspection PhpUsageOfSilenceOperatorInspection */
5459 15
            if ((string) @\iconv($encodingTmp, $encodingTmp . '//IGNORE', $str) === $str) {
5460 15
                return $encodingTmp;
5461
            }
5462
        }
5463
5464
        return false;
5465
    }
5466
5467
    /**
5468
     * alias for "UTF8::str_ends_with()"
5469
     *
5470
     * @param string $haystack
5471
     * @param string $needle
5472
     *
5473
     * @return bool
5474
     *
5475
     * @see UTF8::str_ends_with()
5476
     */
5477
    public static function str_ends(string $haystack, string $needle): bool
5478
    {
5479
        return self::str_ends_with($haystack, $needle);
5480
    }
5481
5482
    /**
5483
     * Check if the string ends with the given substring.
5484
     *
5485
     * @param string $haystack <p>The string to search in.</p>
5486
     * @param string $needle   <p>The substring to search for.</p>
5487
     *
5488
     * @return bool
5489
     */
5490 9
    public static function str_ends_with(string $haystack, string $needle): bool
5491
    {
5492 9
        if ($needle === '') {
5493 2
            return true;
5494
        }
5495
5496 9
        if ($haystack === '') {
5497
            return false;
5498
        }
5499
5500 9
        return \substr($haystack, -\strlen($needle)) === $needle;
5501
    }
5502
5503
    /**
5504
     * Returns true if the string ends with any of $substrings, false otherwise.
5505
     *
5506
     * - case-sensitive
5507
     *
5508
     * @param string   $str        <p>The input string.</p>
5509
     * @param string[] $substrings <p>Substrings to look for.</p>
5510
     *
5511
     * @return bool whether or not $str ends with $substring
5512
     */
5513 7
    public static function str_ends_with_any(string $str, array $substrings): bool
5514
    {
5515 7
        if ($substrings === []) {
5516
            return false;
5517
        }
5518
5519 7
        foreach ($substrings as &$substring) {
5520 7
            if (\substr($str, -\strlen($substring)) === $substring) {
5521 7
                return true;
5522
            }
5523
        }
5524
5525 6
        return false;
5526
    }
5527
5528
    /**
5529
     * Ensures that the string begins with $substring. If it doesn't, it's
5530
     * prepended.
5531
     *
5532
     * @param string $str       <p>The input string.</p>
5533
     * @param string $substring <p>The substring to add if not present.</p>
5534
     *
5535
     * @return string
5536
     */
5537 10
    public static function str_ensure_left(string $str, string $substring): string
5538
    {
5539
        if (
5540 10
            $substring !== ''
5541
            &&
5542 10
            \strpos($str, $substring) === 0
5543
        ) {
5544 6
            return $str;
5545
        }
5546
5547 4
        return $substring . $str;
5548
    }
5549
5550
    /**
5551
     * Ensures that the string ends with $substring. If it doesn't, it's appended.
5552
     *
5553
     * @param string $str       <p>The input string.</p>
5554
     * @param string $substring <p>The substring to add if not present.</p>
5555
     *
5556
     * @return string
5557
     */
5558 10
    public static function str_ensure_right(string $str, string $substring): string
5559
    {
5560
        if (
5561 10
            $str === ''
5562
            ||
5563 10
            $substring === ''
5564
            ||
5565 10
            \substr($str, -\strlen($substring)) !== $substring
5566
        ) {
5567 4
            $str .= $substring;
5568
        }
5569
5570 10
        return $str;
5571
    }
5572
5573
    /**
5574
     * Capitalizes the first word of the string, replaces underscores with
5575
     * spaces, and strips '_id'.
5576
     *
5577
     * @param string $str
5578
     *
5579
     * @return string
5580
     */
5581 3
    public static function str_humanize($str): string
5582
    {
5583 3
        $str = \str_replace(
5584
            [
5585 3
                '_id',
5586
                '_',
5587
            ],
5588
            [
5589 3
                '',
5590
                ' ',
5591
            ],
5592 3
            $str
5593
        );
5594
5595 3
        return self::ucfirst(\trim($str));
5596
    }
5597
5598
    /**
5599
     * alias for "UTF8::str_istarts_with()"
5600
     *
5601
     * @param string $haystack
5602
     * @param string $needle
5603
     *
5604
     * @return bool
5605
     *
5606
     * @see UTF8::str_istarts_with()
5607
     */
5608
    public static function str_ibegins(string $haystack, string $needle): bool
5609
    {
5610
        return self::str_istarts_with($haystack, $needle);
5611
    }
5612
5613
    /**
5614
     * alias for "UTF8::str_iends_with()"
5615
     *
5616
     * @param string $haystack
5617
     * @param string $needle
5618
     *
5619
     * @return bool
5620
     *
5621
     * @see UTF8::str_iends_with()
5622
     */
5623
    public static function str_iends(string $haystack, string $needle): bool
5624
    {
5625
        return self::str_iends_with($haystack, $needle);
5626
    }
5627
5628
    /**
5629
     * Check if the string ends with the given substring, case insensitive.
5630
     *
5631
     * @param string $haystack <p>The string to search in.</p>
5632
     * @param string $needle   <p>The substring to search for.</p>
5633
     *
5634
     * @return bool
5635
     */
5636 12
    public static function str_iends_with(string $haystack, string $needle): bool
5637
    {
5638 12
        if ($needle === '') {
5639 2
            return true;
5640
        }
5641
5642 12
        if ($haystack === '') {
5643
            return false;
5644
        }
5645
5646 12
        return self::strcasecmp(\substr($haystack, -\strlen($needle)), $needle) === 0;
5647
    }
5648
5649
    /**
5650
     * Returns true if the string ends with any of $substrings, false otherwise.
5651
     *
5652
     * - case-insensitive
5653
     *
5654
     * @param string   $str        <p>The input string.</p>
5655
     * @param string[] $substrings <p>Substrings to look for.</p>
5656
     *
5657
     * @return bool whether or not $str ends with $substring
5658
     */
5659 4
    public static function str_iends_with_any(string $str, array $substrings): bool
5660
    {
5661 4
        if ($substrings === []) {
5662
            return false;
5663
        }
5664
5665 4
        foreach ($substrings as &$substring) {
5666 4
            if (self::str_iends_with($str, $substring)) {
5667 4
                return true;
5668
            }
5669
        }
5670
5671
        return false;
5672
    }
5673
5674
    /**
5675
     * Returns the index of the first occurrence of $needle in the string,
5676
     * and false if not found. Accepts an optional offset from which to begin
5677
     * the search.
5678
     *
5679
     * @param string $str      <p>The input string.</p>
5680
     * @param string $needle   <p>Substring to look for.</p>
5681
     * @param int    $offset   [optional] <p>Offset from which to search. Default: 0</p>
5682
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
5683
     *
5684
     * @return false|int
5685
     *                   The occurrence's <strong>index</strong> if found, otherwise <strong>false</strong>
5686
     */
5687 2
    public static function str_iindex_first(
5688
        string $str,
5689
        string $needle,
5690
        int $offset = 0,
5691
        string $encoding = 'UTF-8'
5692
    ) {
5693 2
        return self::stripos(
5694 2
            $str,
5695 2
            $needle,
5696 2
            $offset,
5697 2
            $encoding
5698
        );
5699
    }
5700
5701
    /**
5702
     * Returns the index of the last occurrence of $needle in the string,
5703
     * and false if not found. Accepts an optional offset from which to begin
5704
     * the search. Offsets may be negative to count from the last character
5705
     * in the string.
5706
     *
5707
     * @param string $str      <p>The input string.</p>
5708
     * @param string $needle   <p>Substring to look for.</p>
5709
     * @param int    $offset   [optional] <p>Offset from which to search. Default: 0</p>
5710
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
5711
     *
5712
     * @return false|int
5713
     *                   The last occurrence's <strong>index</strong> if found, otherwise <strong>false</strong>
5714
     */
5715
    public static function str_iindex_last(
5716
        string $str,
5717
        string $needle,
5718
        int $offset = 0,
5719
        string $encoding = 'UTF-8'
5720
    ) {
5721
        return self::strripos(
5722
            $str,
5723
            $needle,
5724
            $offset,
5725
            $encoding
5726
        );
5727
    }
5728
5729
    /**
5730
     * Returns the index of the first occurrence of $needle in the string,
5731
     * and false if not found. Accepts an optional offset from which to begin
5732
     * the search.
5733
     *
5734
     * @param string $str      <p>The input string.</p>
5735
     * @param string $needle   <p>Substring to look for.</p>
5736
     * @param int    $offset   [optional] <p>Offset from which to search. Default: 0</p>
5737
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
5738
     *
5739
     * @return false|int
5740
     *                   The occurrence's <strong>index</strong> if found, otherwise <strong>false</strong>
5741
     */
5742 10
    public static function str_index_first(
5743
        string $str,
5744
        string $needle,
5745
        int $offset = 0,
5746
        string $encoding = 'UTF-8'
5747
    ) {
5748 10
        return self::strpos(
5749 10
            $str,
5750 10
            $needle,
5751 10
            $offset,
5752 10
            $encoding
5753
        );
5754
    }
5755
5756
    /**
5757
     * Returns the index of the last occurrence of $needle in the string,
5758
     * and false if not found. Accepts an optional offset from which to begin
5759
     * the search. Offsets may be negative to count from the last character
5760
     * in the string.
5761
     *
5762
     * @param string $str      <p>The input string.</p>
5763
     * @param string $needle   <p>Substring to look for.</p>
5764
     * @param int    $offset   [optional] <p>Offset from which to search. Default: 0</p>
5765
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
5766
     *
5767
     * @return false|int
5768
     *                   The last occurrence's <strong>index</strong> if found, otherwise <strong>false</strong>
5769
     */
5770 10
    public static function str_index_last(
5771
        string $str,
5772
        string $needle,
5773
        int $offset = 0,
5774
        string $encoding = 'UTF-8'
5775
    ) {
5776 10
        return self::strrpos(
5777 10
            $str,
5778 10
            $needle,
5779 10
            $offset,
5780 10
            $encoding
5781
        );
5782
    }
5783
5784
    /**
5785
     * Inserts $substring into the string at the $index provided.
5786
     *
5787
     * @param string $str       <p>The input string.</p>
5788
     * @param string $substring <p>String to be inserted.</p>
5789
     * @param int    $index     <p>The index at which to insert the substring.</p>
5790
     * @param string $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
5791
     *
5792
     * @return string
5793
     */
5794 8
    public static function str_insert(
5795
        string $str,
5796
        string $substring,
5797
        int $index,
5798
        string $encoding = 'UTF-8'
5799
    ): string {
5800 8
        if ($encoding === 'UTF-8') {
5801 4
            $len = (int) \mb_strlen($str);
5802 4
            if ($index > $len) {
5803
                return $str;
5804
            }
5805
5806
            /** @noinspection UnnecessaryCastingInspection */
5807 4
            return (string) \mb_substr($str, 0, $index) .
5808 4
                   $substring .
5809 4
                   (string) \mb_substr($str, $index, $len);
5810
        }
5811
5812 4
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
5813
5814 4
        $len = (int) self::strlen($str, $encoding);
5815 4
        if ($index > $len) {
5816 1
            return $str;
5817
        }
5818
5819 3
        return ((string) self::substr($str, 0, $index, $encoding)) .
5820 3
               $substring .
5821 3
               ((string) self::substr($str, $index, $len, $encoding));
5822
    }
5823
5824
    /**
5825
     * Case-insensitive and UTF-8 safe version of <function>str_replace</function>.
5826
     *
5827
     * @see http://php.net/manual/en/function.str-ireplace.php
5828
     *
5829
     * @param mixed $search  <p>
5830
     *                       Every replacement with search array is
5831
     *                       performed on the result of previous replacement.
5832
     *                       </p>
5833
     * @param mixed $replace <p>
5834
     *                       </p>
5835
     * @param mixed $subject <p>
5836
     *                       If subject is an array, then the search and
5837
     *                       replace is performed with every entry of
5838
     *                       subject, and the return value is an array as
5839
     *                       well.
5840
     *                       </p>
5841
     * @param int   $count   [optional] <p>
5842
     *                       The number of matched and replaced needles will
5843
     *                       be returned in count which is passed by
5844
     *                       reference.
5845
     *                       </p>
5846
     *
5847
     * @return mixed a string or an array of replacements
5848
     */
5849 29
    public static function str_ireplace($search, $replace, $subject, &$count = null)
5850
    {
5851 29
        $search = (array) $search;
5852
5853
        /** @noinspection AlterInForeachInspection */
5854 29
        foreach ($search as &$s) {
5855 29
            $s = (string) $s;
5856 29
            if ($s === '') {
5857 6
                $s = '/^(?<=.)$/';
5858
            } else {
5859 29
                $s = '/' . \preg_quote($s, '/') . '/ui';
5860
            }
5861
        }
5862
5863 29
        $subject = \preg_replace($search, $replace, $subject, -1, $replace);
5864 29
        $count = $replace; // used as reference parameter
5865
5866 29
        return $subject;
5867
    }
5868
5869
    /**
5870
     * Replaces $search from the beginning of string with $replacement.
5871
     *
5872
     * @param string $str         <p>The input string.</p>
5873
     * @param string $search      <p>The string to search for.</p>
5874
     * @param string $replacement <p>The replacement.</p>
5875
     *
5876
     * @return string string after the replacements
5877
     */
5878 17
    public static function str_ireplace_beginning(string $str, string $search, string $replacement): string
5879
    {
5880 17
        if ($str === '') {
5881 4
            if ($replacement === '') {
5882 2
                return '';
5883
            }
5884
5885 2
            if ($search === '') {
5886 2
                return $replacement;
5887
            }
5888
        }
5889
5890 13
        if ($search === '') {
5891 2
            return $str . $replacement;
5892
        }
5893
5894 11
        if (\stripos($str, $search) === 0) {
5895 10
            return $replacement . \substr($str, \strlen($search));
5896
        }
5897
5898 1
        return $str;
5899
    }
5900
5901
    /**
5902
     * Replaces $search from the ending of string with $replacement.
5903
     *
5904
     * @param string $str         <p>The input string.</p>
5905
     * @param string $search      <p>The string to search for.</p>
5906
     * @param string $replacement <p>The replacement.</p>
5907
     *
5908
     * @return string string after the replacements
5909
     */
5910 17
    public static function str_ireplace_ending(string $str, string $search, string $replacement): string
5911
    {
5912 17
        if ($str === '') {
5913 4
            if ($replacement === '') {
5914 2
                return '';
5915
            }
5916
5917 2
            if ($search === '') {
5918 2
                return $replacement;
5919
            }
5920
        }
5921
5922 13
        if ($search === '') {
5923 2
            return $str . $replacement;
5924
        }
5925
5926 11
        if (\stripos($str, $search, \strlen($str) - \strlen($search)) !== false) {
5927 9
            $str = \substr($str, 0, -\strlen($search)) . $replacement;
5928
        }
5929
5930 11
        return $str;
5931
    }
5932
5933
    /**
5934
     * Check if the string starts with the given substring, case insensitive.
5935
     *
5936
     * @param string $haystack <p>The string to search in.</p>
5937
     * @param string $needle   <p>The substring to search for.</p>
5938
     *
5939
     * @return bool
5940
     */
5941 12
    public static function str_istarts_with(string $haystack, string $needle): bool
5942
    {
5943 12
        if ($needle === '') {
5944 2
            return true;
5945
        }
5946
5947 12
        if ($haystack === '') {
5948
            return false;
5949
        }
5950
5951 12
        return self::stripos($haystack, $needle) === 0;
5952
    }
5953
5954
    /**
5955
     * Returns true if the string begins with any of $substrings, false otherwise.
5956
     *
5957
     * - case-insensitive
5958
     *
5959
     * @param string $str        <p>The input string.</p>
5960
     * @param array  $substrings <p>Substrings to look for.</p>
5961
     *
5962
     * @return bool whether or not $str starts with $substring
5963
     */
5964 4
    public static function str_istarts_with_any(string $str, array $substrings): bool
5965
    {
5966 4
        if ($str === '') {
5967
            return false;
5968
        }
5969
5970 4
        if ($substrings === []) {
5971
            return false;
5972
        }
5973
5974 4
        foreach ($substrings as &$substring) {
5975 4
            if (self::str_istarts_with($str, $substring)) {
5976 4
                return true;
5977
            }
5978
        }
5979
5980
        return false;
5981
    }
5982
5983
    /**
5984
     * Gets the substring after the first occurrence of a separator.
5985
     *
5986
     * @param string $str       <p>The input string.</p>
5987
     * @param string $separator <p>The string separator.</p>
5988
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
5989
     *
5990
     * @return string
5991
     */
5992 1
    public static function str_isubstr_after_first_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
5993
    {
5994 1
        if ($separator === '' || $str === '') {
5995 1
            return '';
5996
        }
5997
5998 1
        $offset = self::str_iindex_first($str, $separator);
5999 1
        if ($offset === false) {
6000 1
            return '';
6001
        }
6002
6003 1
        if ($encoding === 'UTF-8') {
6004 1
            return (string) \mb_substr(
6005 1
                $str,
6006 1
                $offset + (int) \mb_strlen($separator)
6007
            );
6008
        }
6009
6010
        return (string) self::substr(
6011
            $str,
6012
            $offset + (int) self::strlen($separator, $encoding),
6013
            null,
6014
            $encoding
6015
        );
6016
    }
6017
6018
    /**
6019
     * Gets the substring after the last occurrence of a separator.
6020
     *
6021
     * @param string $str       <p>The input string.</p>
6022
     * @param string $separator <p>The string separator.</p>
6023
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
6024
     *
6025
     * @return string
6026
     */
6027 1
    public static function str_isubstr_after_last_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
6028
    {
6029 1
        if ($separator === '' || $str === '') {
6030 1
            return '';
6031
        }
6032
6033 1
        $offset = self::strripos($str, $separator);
6034 1
        if ($offset === false) {
6035 1
            return '';
6036
        }
6037
6038 1
        if ($encoding === 'UTF-8') {
6039 1
            return (string) \mb_substr(
6040 1
                $str,
6041 1
                $offset + (int) self::strlen($separator)
6042
            );
6043
        }
6044
6045
        return (string) self::substr(
6046
            $str,
6047
            $offset + (int) self::strlen($separator, $encoding),
6048
            null,
6049
            $encoding
6050
        );
6051
    }
6052
6053
    /**
6054
     * Gets the substring before the first occurrence of a separator.
6055
     *
6056
     * @param string $str       <p>The input string.</p>
6057
     * @param string $separator <p>The string separator.</p>
6058
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
6059
     *
6060
     * @return string
6061
     */
6062 1
    public static function str_isubstr_before_first_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
6063
    {
6064 1
        if ($separator === '' || $str === '') {
6065 1
            return '';
6066
        }
6067
6068 1
        $offset = self::str_iindex_first($str, $separator);
6069 1
        if ($offset === false) {
6070 1
            return '';
6071
        }
6072
6073 1
        if ($encoding === 'UTF-8') {
6074 1
            return (string) \mb_substr($str, 0, $offset);
6075
        }
6076
6077
        return (string) self::substr($str, 0, $offset, $encoding);
6078
    }
6079
6080
    /**
6081
     * Gets the substring before the last occurrence of a separator.
6082
     *
6083
     * @param string $str       <p>The input string.</p>
6084
     * @param string $separator <p>The string separator.</p>
6085
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
6086
     *
6087
     * @return string
6088
     */
6089 1
    public static function str_isubstr_before_last_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
6090
    {
6091 1
        if ($separator === '' || $str === '') {
6092 1
            return '';
6093
        }
6094
6095 1
        if ($encoding === 'UTF-8') {
6096 1
            $offset = \mb_strripos($str, $separator);
6097 1
            if ($offset === false) {
6098 1
                return '';
6099
            }
6100
6101 1
            return (string) \mb_substr($str, 0, $offset);
6102
        }
6103
6104
        $offset = self::strripos($str, $separator, 0, $encoding);
6105
        if ($offset === false) {
6106
            return '';
6107
        }
6108
6109
        return (string) self::substr($str, 0, $offset, $encoding);
6110
    }
6111
6112
    /**
6113
     * Gets the substring after (or before via "$beforeNeedle") the first occurrence of the "$needle".
6114
     *
6115
     * @param string $str          <p>The input string.</p>
6116
     * @param string $needle       <p>The string to look for.</p>
6117
     * @param bool   $beforeNeedle [optional] <p>Default: false</p>
6118
     * @param string $encoding     [optional] <p>Default: 'UTF-8'</p>
6119
     *
6120
     * @return string
6121
     */
6122 2
    public static function str_isubstr_first(
6123
        string $str,
6124
        string $needle,
6125
        bool $beforeNeedle = false,
6126
        string $encoding = 'UTF-8'
6127
    ): string {
6128
        if (
6129 2
            $needle === ''
6130
            ||
6131 2
            $str === ''
6132
        ) {
6133 2
            return '';
6134
        }
6135
6136 2
        $part = self::stristr(
6137 2
            $str,
6138 2
            $needle,
6139 2
            $beforeNeedle,
6140 2
            $encoding
6141
        );
6142 2
        if ($part === false) {
6143 2
            return '';
6144
        }
6145
6146 2
        return $part;
6147
    }
6148
6149
    /**
6150
     * Gets the substring after (or before via "$beforeNeedle") the last occurrence of the "$needle".
6151
     *
6152
     * @param string $str          <p>The input string.</p>
6153
     * @param string $needle       <p>The string to look for.</p>
6154
     * @param bool   $beforeNeedle [optional] <p>Default: false</p>
6155
     * @param string $encoding     [optional] <p>Default: 'UTF-8'</p>
6156
     *
6157
     * @return string
6158
     */
6159 1
    public static function str_isubstr_last(
6160
        string $str,
6161
        string $needle,
6162
        bool $beforeNeedle = false,
6163
        string $encoding = 'UTF-8'
6164
    ): string {
6165
        if (
6166 1
            $needle === ''
6167
            ||
6168 1
            $str === ''
6169
        ) {
6170 1
            return '';
6171
        }
6172
6173 1
        $part = self::strrichr($str, $needle, $beforeNeedle, $encoding);
6174 1
        if ($part === false) {
6175 1
            return '';
6176
        }
6177
6178 1
        return $part;
6179
    }
6180
6181
    /**
6182
     * Returns the last $n characters of the string.
6183
     *
6184
     * @param string $str      <p>The input string.</p>
6185
     * @param int    $n        <p>Number of characters to retrieve from the end.</p>
6186
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6187
     *
6188
     * @return string
6189
     */
6190 12
    public static function str_last_char(string $str, int $n = 1, string $encoding = 'UTF-8'): string
6191
    {
6192 12
        if ($str === '' || $n <= 0) {
6193 4
            return '';
6194
        }
6195
6196 8
        if ($encoding === 'UTF-8') {
6197 4
            return (string) \mb_substr($str, -$n);
6198
        }
6199
6200 4
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
6201
6202 4
        return (string) self::substr($str, -$n, null, $encoding);
6203
    }
6204
6205
    /**
6206
     * Limit the number of characters in a string.
6207
     *
6208
     * @param string $str      <p>The input string.</p>
6209
     * @param int    $length   [optional] <p>Default: 100</p>
6210
     * @param string $strAddOn [optional] <p>Default: …</p>
6211
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6212
     *
6213
     * @return string
6214
     */
6215 2
    public static function str_limit(
6216
        string $str,
6217
        int $length = 100,
6218
        string $strAddOn = '…',
6219
        string $encoding = 'UTF-8'
6220
    ): string {
6221 2
        if ($str === '' || $length <= 0) {
6222 2
            return '';
6223
        }
6224
6225 2
        if ($encoding === 'UTF-8') {
6226 2
            if ((int) \mb_strlen($str) <= $length) {
6227 2
                return $str;
6228
            }
6229
6230
            /** @noinspection UnnecessaryCastingInspection */
6231 2
            return (string) \mb_substr($str, 0, $length - (int) self::strlen($strAddOn)) . $strAddOn;
6232
        }
6233
6234
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
6235
6236
        if ((int) self::strlen($str, $encoding) <= $length) {
6237
            return $str;
6238
        }
6239
6240
        return ((string) self::substr($str, 0, $length - (int) self::strlen($strAddOn), $encoding)) . $strAddOn;
6241
    }
6242
6243
    /**
6244
     * Limit the number of characters in a string, but also after the next word.
6245
     *
6246
     * @param string $str      <p>The input string.</p>
6247
     * @param int    $length   [optional] <p>Default: 100</p>
6248
     * @param string $strAddOn [optional] <p>Default: …</p>
6249
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6250
     *
6251
     * @return string
6252
     */
6253 6
    public static function str_limit_after_word(
6254
        string $str,
6255
        int $length = 100,
6256
        string $strAddOn = '…',
6257
        string $encoding = 'UTF-8'
6258
    ): string {
6259 6
        if ($str === '' || $length <= 0) {
6260 2
            return '';
6261
        }
6262
6263 6
        if ($encoding === 'UTF-8') {
6264
            /** @noinspection UnnecessaryCastingInspection */
6265 2
            if ((int) \mb_strlen($str) <= $length) {
6266 2
                return $str;
6267
            }
6268
6269 2
            if (\mb_substr($str, $length - 1, 1) === ' ') {
6270 2
                return ((string) \mb_substr($str, 0, $length - 1)) . $strAddOn;
6271
            }
6272
6273 2
            $str = \mb_substr($str, 0, $length);
6274
6275 2
            $array = \explode(' ', $str);
6276 2
            \array_pop($array);
6277 2
            $new_str = \implode(' ', $array);
6278
6279 2
            if ($new_str === '') {
6280 2
                return ((string) \mb_substr($str, 0, $length - 1)) . $strAddOn;
6281
            }
6282
        } else {
6283 4
            if ((int) self::strlen($str, $encoding) <= $length) {
6284
                return $str;
6285
            }
6286
6287 4
            if (self::substr($str, $length - 1, 1, $encoding) === ' ') {
6288 3
                return ((string) self::substr($str, 0, $length - 1, $encoding)) . $strAddOn;
6289
            }
6290
6291 1
            $str = self::substr($str, 0, $length, $encoding);
6292 1
            if ($str === false) {
6293
                return '' . $strAddOn;
6294
            }
6295
6296 1
            $array = \explode(' ', $str);
6297 1
            \array_pop($array);
6298 1
            $new_str = \implode(' ', $array);
6299
6300 1
            if ($new_str === '') {
6301
                return ((string) self::substr($str, 0, $length - 1, $encoding)) . $strAddOn;
6302
            }
6303
        }
6304
6305 3
        return $new_str . $strAddOn;
6306
    }
6307
6308
    /**
6309
     * Returns the longest common prefix between the string and $otherStr.
6310
     *
6311
     * @param string $str      <p>The input sting.</p>
6312
     * @param string $otherStr <p>Second string for comparison.</p>
6313
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6314
     *
6315
     * @return string
6316
     */
6317 10
    public static function str_longest_common_prefix(string $str, string $otherStr, string $encoding = 'UTF-8'): string
6318
    {
6319
        // init
6320 10
        $longestCommonPrefix = '';
6321
6322 10
        if ($encoding === 'UTF-8') {
6323 5
            $maxLength = (int) \min(
6324 5
                \mb_strlen($str),
6325 5
                \mb_strlen($otherStr)
6326
            );
6327
6328 5
            for ($i = 0; $i < $maxLength; ++$i) {
6329 4
                $char = \mb_substr($str, $i, 1);
6330
6331
                if (
6332 4
                    $char !== false
6333
                    &&
6334 4
                    $char === \mb_substr($otherStr, $i, 1)
6335
                ) {
6336 3
                    $longestCommonPrefix .= $char;
6337
                } else {
6338 3
                    break;
6339
                }
6340
            }
6341
        } else {
6342 5
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
6343
6344 5
            $maxLength = (int) \min(
6345 5
                self::strlen($str, $encoding),
6346 5
                self::strlen($otherStr, $encoding)
6347
            );
6348
6349 5
            for ($i = 0; $i < $maxLength; ++$i) {
6350 4
                $char = self::substr($str, $i, 1, $encoding);
6351
6352
                if (
6353 4
                    $char !== false
6354
                    &&
6355 4
                    $char === self::substr($otherStr, $i, 1, $encoding)
6356
                ) {
6357 3
                    $longestCommonPrefix .= $char;
6358
                } else {
6359 3
                    break;
6360
                }
6361
            }
6362
        }
6363
6364 10
        return $longestCommonPrefix;
6365
    }
6366
6367
    /**
6368
     * Returns the longest common substring between the string and $otherStr.
6369
     * In the case of ties, it returns that which occurs first.
6370
     *
6371
     * @param string $str
6372
     * @param string $otherStr <p>Second string for comparison.</p>
6373
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6374
     *
6375
     * @return string string with its $str being the longest common substring
6376
     */
6377 11
    public static function str_longest_common_substring(string $str, string $otherStr, string $encoding = 'UTF-8'): string
6378
    {
6379 11
        if ($str === '' || $otherStr === '') {
6380 2
            return '';
6381
        }
6382
6383
        // Uses dynamic programming to solve
6384
        // http://en.wikipedia.org/wiki/Longest_common_substring_problem
6385
6386 9
        if ($encoding === 'UTF-8') {
6387 4
            $strLength = (int) \mb_strlen($str);
6388 4
            $otherLength = (int) \mb_strlen($otherStr);
6389
        } else {
6390 5
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
6391
6392 5
            $strLength = (int) self::strlen($str, $encoding);
6393 5
            $otherLength = (int) self::strlen($otherStr, $encoding);
6394
        }
6395
6396
        // Return if either string is empty
6397 9
        if ($strLength === 0 || $otherLength === 0) {
6398
            return '';
6399
        }
6400
6401 9
        $len = 0;
6402 9
        $end = 0;
6403 9
        $table = \array_fill(
6404 9
            0,
6405 9
            $strLength + 1,
6406 9
            \array_fill(0, $otherLength + 1, 0)
6407
        );
6408
6409 9
        if ($encoding === 'UTF-8') {
6410 9
            for ($i = 1; $i <= $strLength; ++$i) {
6411 9
                for ($j = 1; $j <= $otherLength; ++$j) {
6412 9
                    $strChar = \mb_substr($str, $i - 1, 1);
6413 9
                    $otherChar = \mb_substr($otherStr, $j - 1, 1);
6414
6415 9
                    if ($strChar === $otherChar) {
6416 8
                        $table[$i][$j] = $table[$i - 1][$j - 1] + 1;
6417 8
                        if ($table[$i][$j] > $len) {
6418 8
                            $len = $table[$i][$j];
6419 8
                            $end = $i;
6420
                        }
6421
                    } else {
6422 9
                        $table[$i][$j] = 0;
6423
                    }
6424
                }
6425
            }
6426
        } else {
6427
            for ($i = 1; $i <= $strLength; ++$i) {
6428
                for ($j = 1; $j <= $otherLength; ++$j) {
6429
                    $strChar = self::substr($str, $i - 1, 1, $encoding);
6430
                    $otherChar = self::substr($otherStr, $j - 1, 1, $encoding);
6431
6432
                    if ($strChar === $otherChar) {
6433
                        $table[$i][$j] = $table[$i - 1][$j - 1] + 1;
6434
                        if ($table[$i][$j] > $len) {
6435
                            $len = $table[$i][$j];
6436
                            $end = $i;
6437
                        }
6438
                    } else {
6439
                        $table[$i][$j] = 0;
6440
                    }
6441
                }
6442
            }
6443
        }
6444
6445 9
        if ($encoding === 'UTF-8') {
6446 9
            return (string) \mb_substr($str, $end - $len, $len);
6447
        }
6448
6449
        return (string) self::substr($str, $end - $len, $len, $encoding);
6450
    }
6451
6452
    /**
6453
     * Returns the longest common suffix between the string and $otherStr.
6454
     *
6455
     * @param string $str
6456
     * @param string $otherStr <p>Second string for comparison.</p>
6457
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6458
     *
6459
     * @return string
6460
     */
6461 10
    public static function str_longest_common_suffix(string $str, string $otherStr, string $encoding = 'UTF-8'): string
6462
    {
6463 10
        if ($str === '' || $otherStr === '') {
6464 2
            return '';
6465
        }
6466
6467 8
        if ($encoding === 'UTF-8') {
6468 4
            $maxLength = (int) \min(
6469 4
                \mb_strlen($str, $encoding),
6470 4
                \mb_strlen($otherStr, $encoding)
6471
            );
6472
6473 4
            $longestCommonSuffix = '';
6474 4
            for ($i = 1; $i <= $maxLength; ++$i) {
6475 4
                $char = \mb_substr($str, -$i, 1);
6476
6477
                if (
6478 4
                    $char !== false
6479
                    &&
6480 4
                    $char === \mb_substr($otherStr, -$i, 1)
6481
                ) {
6482 3
                    $longestCommonSuffix = $char . $longestCommonSuffix;
6483
                } else {
6484 3
                    break;
6485
                }
6486
            }
6487
        } else {
6488 4
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
6489
6490 4
            $maxLength = (int) \min(
6491 4
                self::strlen($str, $encoding),
6492 4
                self::strlen($otherStr, $encoding)
6493
            );
6494
6495 4
            $longestCommonSuffix = '';
6496 4
            for ($i = 1; $i <= $maxLength; ++$i) {
6497 4
                $char = self::substr($str, -$i, 1, $encoding);
6498
6499
                if (
6500 4
                    $char !== false
6501
                    &&
6502 4
                    $char === self::substr($otherStr, -$i, 1, $encoding)
6503
                ) {
6504 3
                    $longestCommonSuffix = $char . $longestCommonSuffix;
6505
                } else {
6506 3
                    break;
6507
                }
6508
            }
6509
        }
6510
6511 8
        return $longestCommonSuffix;
6512
    }
6513
6514
    /**
6515
     * Returns true if $str matches the supplied pattern, false otherwise.
6516
     *
6517
     * @param string $str     <p>The input string.</p>
6518
     * @param string $pattern <p>Regex pattern to match against.</p>
6519
     *
6520
     * @return bool whether or not $str matches the pattern
6521
     */
6522
    public static function str_matches_pattern(string $str, string $pattern): bool
6523
    {
6524
        return (bool) \preg_match('/' . $pattern . '/u', $str);
6525
    }
6526
6527
    /**
6528
     * Returns whether or not a character exists at an index. Offsets may be
6529
     * negative to count from the last character in the string. Implements
6530
     * part of the ArrayAccess interface.
6531
     *
6532
     * @param string $str      <p>The input string.</p>
6533
     * @param int    $offset   <p>The index to check.</p>
6534
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6535
     *
6536
     * @return bool whether or not the index exists
6537
     */
6538 6
    public static function str_offset_exists(string $str, int $offset, string $encoding = 'UTF-8'): bool
6539
    {
6540
        // init
6541 6
        $length = (int) self::strlen($str, $encoding);
6542
6543 6
        if ($offset >= 0) {
6544 3
            return $length > $offset;
6545
        }
6546
6547 3
        return $length >= \abs($offset);
6548
    }
6549
6550
    /**
6551
     * Returns the character at the given index. Offsets may be negative to
6552
     * count from the last character in the string. Implements part of the
6553
     * ArrayAccess interface, and throws an OutOfBoundsException if the index
6554
     * does not exist.
6555
     *
6556
     * @param string $str      <p>The input string.</p>
6557
     * @param int    $index    <p>The <strong>index</strong> from which to retrieve the char.</p>
6558
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6559
     *
6560
     * @throws \OutOfBoundsException if the positive or negative offset does not exist
6561
     *
6562
     * @return string the character at the specified index
6563
     */
6564 2
    public static function str_offset_get(string $str, int $index, string $encoding = 'UTF-8'): string
6565
    {
6566
        // init
6567 2
        $length = (int) self::strlen($str);
6568
6569
        if (
6570 2
            ($index >= 0 && $length <= $index)
6571
            ||
6572 2
            $length < \abs($index)
6573
        ) {
6574 1
            throw new \OutOfBoundsException('No character exists at the index');
6575
        }
6576
6577 1
        return self::char_at($str, $index, $encoding);
6578
    }
6579
6580
    /**
6581
     * Pad a UTF-8 string to given length with another string.
6582
     *
6583
     * @param string     $str        <p>The input string.</p>
6584
     * @param int        $pad_length <p>The length of return string.</p>
6585
     * @param string     $pad_string [optional] <p>String to use for padding the input string.</p>
6586
     * @param int|string $pad_type   [optional] <p>
6587
     *                               Can be <strong>STR_PAD_RIGHT</strong> (default), [or string "right"]<br>
6588
     *                               <strong>STR_PAD_LEFT</strong> [or string "left"] or<br>
6589
     *                               <strong>STR_PAD_BOTH</strong> [or string "both"]
6590
     *                               </p>
6591
     * @param string     $encoding   [optional] <p>Default: 'UTF-8'</p>
6592
     *
6593
     * @return string returns the padded string
6594
     */
6595 41
    public static function str_pad(
6596
        string $str,
6597
        int $pad_length,
6598
        string $pad_string = ' ',
6599
        $pad_type = \STR_PAD_RIGHT,
6600
        string $encoding = 'UTF-8'
6601
    ): string {
6602 41
        if ($pad_length === 0 || $pad_string === '') {
6603 1
            return $str;
6604
        }
6605
6606 41
        if ($pad_type !== (int) $pad_type) {
6607 13
            if ($pad_type === 'left') {
6608 3
                $pad_type = \STR_PAD_LEFT;
6609 10
            } elseif ($pad_type === 'right') {
6610 6
                $pad_type = \STR_PAD_RIGHT;
6611 4
            } elseif ($pad_type === 'both') {
6612 3
                $pad_type = \STR_PAD_BOTH;
6613
            } else {
6614 1
                throw new \InvalidArgumentException(
6615 1
                    'Pad expects $padType to be "STR_PAD_*" or ' . "to be one of 'left', 'right' or 'both'"
6616
                );
6617
            }
6618
        }
6619
6620 40
        if ($encoding === 'UTF-8') {
6621 25
            $str_length = (int) \mb_strlen($str);
6622
6623 25
            if ($pad_length >= $str_length) {
6624
                switch ($pad_type) {
6625 25
                    case \STR_PAD_LEFT:
6626 8
                        $ps_length = (int) \mb_strlen($pad_string);
6627
6628 8
                        $diff = ($pad_length - $str_length);
6629
6630 8
                        $pre = (string) \mb_substr(
6631 8
                            \str_repeat($pad_string, (int) \ceil($diff / $ps_length)),
6632 8
                            0,
6633 8
                            $diff
6634
                        );
6635 8
                        $post = '';
6636
6637 8
                        break;
6638
6639 20
                    case \STR_PAD_BOTH:
6640 14
                        $diff = ($pad_length - $str_length);
6641
6642 14
                        $ps_length_left = (int) \floor($diff / 2);
6643
6644 14
                        $ps_length_right = (int) \ceil($diff / 2);
6645
6646 14
                        $pre = (string) \mb_substr(
6647 14
                            \str_repeat($pad_string, $ps_length_left),
6648 14
                            0,
6649 14
                            $ps_length_left
6650
                        );
6651 14
                        $post = (string) \mb_substr(
6652 14
                            \str_repeat($pad_string, $ps_length_right),
6653 14
                            0,
6654 14
                            $ps_length_right
6655
                        );
6656
6657 14
                        break;
6658
6659 9
                    case \STR_PAD_RIGHT:
6660
                    default:
6661 9
                        $ps_length = (int) \mb_strlen($pad_string);
6662
6663 9
                        $diff = ($pad_length - $str_length);
6664
6665 9
                        $post = (string) \mb_substr(
6666 9
                            \str_repeat($pad_string, (int) \ceil($diff / $ps_length)),
6667 9
                            0,
6668 9
                            $diff
6669
                        );
6670 9
                        $pre = '';
6671
                }
6672
6673 25
                return $pre . $str . $post;
6674
            }
6675
6676 3
            return $str;
6677
        }
6678
6679 15
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
6680
6681 15
        $str_length = (int) self::strlen($str, $encoding);
6682
6683 15
        if ($pad_length >= $str_length) {
6684
            switch ($pad_type) {
6685 14
                case \STR_PAD_LEFT:
6686 5
                    $ps_length = (int) self::strlen($pad_string, $encoding);
6687
6688 5
                    $diff = ($pad_length - $str_length);
6689
6690 5
                    $pre = (string) self::substr(
6691 5
                        \str_repeat($pad_string, (int) \ceil($diff / $ps_length)),
6692 5
                        0,
6693 5
                        $diff,
6694 5
                        $encoding
6695
                    );
6696 5
                    $post = '';
6697
6698 5
                    break;
6699
6700 9
                case \STR_PAD_BOTH:
6701 3
                    $diff = ($pad_length - $str_length);
6702
6703 3
                    $ps_length_left = (int) \floor($diff / 2);
6704
6705 3
                    $ps_length_right = (int) \ceil($diff / 2);
6706
6707 3
                    $pre = (string) self::substr(
6708 3
                        \str_repeat($pad_string, $ps_length_left),
6709 3
                        0,
6710 3
                        $ps_length_left,
6711 3
                        $encoding
6712
                    );
6713 3
                    $post = (string) self::substr(
6714 3
                        \str_repeat($pad_string, $ps_length_right),
6715 3
                        0,
6716 3
                        $ps_length_right,
6717 3
                        $encoding
6718
                    );
6719
6720 3
                    break;
6721
6722 6
                case \STR_PAD_RIGHT:
6723
                default:
6724 6
                    $ps_length = (int) self::strlen($pad_string, $encoding);
6725
6726 6
                    $diff = ($pad_length - $str_length);
6727
6728 6
                    $post = (string) self::substr(
6729 6
                        \str_repeat($pad_string, (int) \ceil($diff / $ps_length)),
6730 6
                        0,
6731 6
                        $diff,
6732 6
                        $encoding
6733
                    );
6734 6
                    $pre = '';
6735
            }
6736
6737 14
            return $pre . $str . $post;
6738
        }
6739
6740 1
        return $str;
6741
    }
6742
6743
    /**
6744
     * Returns a new string of a given length such that both sides of the
6745
     * string are padded. Alias for pad() with a $padType of 'both'.
6746
     *
6747
     * @param string $str
6748
     * @param int    $length   <p>Desired string length after padding.</p>
6749
     * @param string $padStr   [optional] <p>String used to pad, defaults to space. Default: ' '</p>
6750
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6751
     *
6752
     * @return string string with padding applied
6753
     */
6754 11
    public static function str_pad_both(
6755
        string $str,
6756
        int $length,
6757
        string $padStr = ' ',
6758
        string $encoding = 'UTF-8'
6759
    ): string {
6760 11
        return self::str_pad($str, $length, $padStr, \STR_PAD_BOTH, $encoding);
6761
    }
6762
6763
    /**
6764
     * Returns a new string of a given length such that the beginning of the
6765
     * string is padded. Alias for pad() with a $padType of 'left'.
6766
     *
6767
     * @param string $str
6768
     * @param int    $length   <p>Desired string length after padding.</p>
6769
     * @param string $padStr   [optional] <p>String used to pad, defaults to space. Default: ' '</p>
6770
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6771
     *
6772
     * @return string string with left padding
6773
     */
6774 7
    public static function str_pad_left(
6775
        string $str,
6776
        int $length,
6777
        string $padStr = ' ',
6778
        string $encoding = 'UTF-8'
6779
    ): string {
6780 7
        return self::str_pad($str, $length, $padStr, \STR_PAD_LEFT, $encoding);
6781
    }
6782
6783
    /**
6784
     * Returns a new string of a given length such that the end of the string
6785
     * is padded. Alias for pad() with a $padType of 'right'.
6786
     *
6787
     * @param string $str
6788
     * @param int    $length   <p>Desired string length after padding.</p>
6789
     * @param string $padStr   [optional] <p>String used to pad, defaults to space. Default: ' '</p>
6790
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6791
     *
6792
     * @return string string with right padding
6793
     */
6794 7
    public static function str_pad_right(
6795
        string $str,
6796
        int $length,
6797
        string $padStr = ' ',
6798
        string $encoding = 'UTF-8'
6799
    ): string {
6800 7
        return self::str_pad($str, $length, $padStr, \STR_PAD_RIGHT, $encoding);
6801
    }
6802
6803
    /**
6804
     * Repeat a string.
6805
     *
6806
     * @param string $str        <p>
6807
     *                           The string to be repeated.
6808
     *                           </p>
6809
     * @param int    $multiplier <p>
6810
     *                           Number of time the input string should be
6811
     *                           repeated.
6812
     *                           </p>
6813
     *                           <p>
6814
     *                           multiplier has to be greater than or equal to 0.
6815
     *                           If the multiplier is set to 0, the function
6816
     *                           will return an empty string.
6817
     *                           </p>
6818
     *
6819
     * @return string the repeated string
6820
     */
6821 9
    public static function str_repeat(string $str, int $multiplier): string
6822
    {
6823 9
        $str = self::filter($str);
6824
6825 9
        return \str_repeat($str, $multiplier);
6826
    }
6827
6828
    /**
6829
     * INFO: This is only a wrapper for "str_replace()"  -> the original functions is already UTF-8 safe.
6830
     *
6831
     * Replace all occurrences of the search string with the replacement string
6832
     *
6833
     * @see http://php.net/manual/en/function.str-replace.php
6834
     *
6835
     * @param mixed $search  <p>
6836
     *                       The value being searched for, otherwise known as the needle.
6837
     *                       An array may be used to designate multiple needles.
6838
     *                       </p>
6839
     * @param mixed $replace <p>
6840
     *                       The replacement value that replaces found search
6841
     *                       values. An array may be used to designate multiple replacements.
6842
     *                       </p>
6843
     * @param mixed $subject <p>
6844
     *                       The string or array being searched and replaced on,
6845
     *                       otherwise known as the haystack.
6846
     *                       </p>
6847
     *                       <p>
6848
     *                       If subject is an array, then the search and
6849
     *                       replace is performed with every entry of
6850
     *                       subject, and the return value is an array as
6851
     *                       well.
6852
     *                       </p>
6853
     * @param int   $count   [optional] If passed, this will hold the number of matched and replaced needles
6854
     *
6855
     * @return mixed this function returns a string or an array with the replaced values
6856
     */
6857 12
    public static function str_replace(
6858
        $search,
6859
        $replace,
6860
        $subject,
6861
        int &$count = null
6862
    ) {
6863
        /**
6864
         * @psalm-suppress PossiblyNullArgument
6865
         */
6866 12
        return \str_replace(
6867 12
            $search,
6868 12
            $replace,
6869 12
            $subject,
6870 12
            $count
6871
        );
6872
    }
6873
6874
    /**
6875
     * Replaces $search from the beginning of string with $replacement.
6876
     *
6877
     * @param string $str         <p>The input string.</p>
6878
     * @param string $search      <p>The string to search for.</p>
6879
     * @param string $replacement <p>The replacement.</p>
6880
     *
6881
     * @return string string after the replacements
6882
     */
6883 17
    public static function str_replace_beginning(string $str, string $search, string $replacement): string
6884
    {
6885 17
        if ($str === '') {
6886 4
            if ($replacement === '') {
6887 2
                return '';
6888
            }
6889
6890 2
            if ($search === '') {
6891 2
                return $replacement;
6892
            }
6893
        }
6894
6895 13
        if ($search === '') {
6896 2
            return $str . $replacement;
6897
        }
6898
6899 11
        if (\strpos($str, $search) === 0) {
6900 9
            return $replacement . \substr($str, \strlen($search));
6901
        }
6902
6903 2
        return $str;
6904
    }
6905
6906
    /**
6907
     * Replaces $search from the ending of string with $replacement.
6908
     *
6909
     * @param string $str         <p>The input string.</p>
6910
     * @param string $search      <p>The string to search for.</p>
6911
     * @param string $replacement <p>The replacement.</p>
6912
     *
6913
     * @return string string after the replacements
6914
     */
6915 17
    public static function str_replace_ending(string $str, string $search, string $replacement): string
6916
    {
6917 17
        if ($str === '') {
6918 4
            if ($replacement === '') {
6919 2
                return '';
6920
            }
6921
6922 2
            if ($search === '') {
6923 2
                return $replacement;
6924
            }
6925
        }
6926
6927 13
        if ($search === '') {
6928 2
            return $str . $replacement;
6929
        }
6930
6931 11
        if (\strpos($str, $search, \strlen($str) - \strlen($search)) !== false) {
6932 8
            $str = \substr($str, 0, -\strlen($search)) . $replacement;
6933
        }
6934
6935 11
        return $str;
6936
    }
6937
6938
    /**
6939
     * Replace the first "$search"-term with the "$replace"-term.
6940
     *
6941
     * @param string $search
6942
     * @param string $replace
6943
     * @param string $subject
6944
     *
6945
     * @return string
6946
     *
6947
     * @psalm-suppress InvalidReturnType
6948
     */
6949 2
    public static function str_replace_first(string $search, string $replace, string $subject): string
6950
    {
6951 2
        $pos = self::strpos($subject, $search);
6952
6953 2
        if ($pos !== false) {
6954
            /**
6955
             * @psalm-suppress InvalidReturnStatement
6956
             */
6957 2
            return self::substr_replace(
0 ignored issues
show
Bug Best Practice introduced by
The expression return self::substr_repl...)self::strlen($search)) could return the type string[] which is incompatible with the type-hinted return string. Consider adding an additional type-check to rule them out.
Loading history...
6958 2
                $subject,
6959 2
                $replace,
6960 2
                $pos,
6961 2
                (int) self::strlen($search)
6962
            );
6963
        }
6964
6965 2
        return $subject;
6966
    }
6967
6968
    /**
6969
     * Replace the last "$search"-term with the "$replace"-term.
6970
     *
6971
     * @param string $search
6972
     * @param string $replace
6973
     * @param string $subject
6974
     *
6975
     * @return string
6976
     *
6977
     * @psalm-suppress InvalidReturnType
6978
     */
6979 2
    public static function str_replace_last(
6980
        string $search,
6981
        string $replace,
6982
        string $subject
6983
    ): string {
6984 2
        $pos = self::strrpos($subject, $search);
6985 2
        if ($pos !== false) {
6986
            /**
6987
             * @psalm-suppress InvalidReturnStatement
6988
             */
6989 2
            return self::substr_replace(
0 ignored issues
show
Bug Best Practice introduced by
The expression return self::substr_repl...)self::strlen($search)) could return the type string[] which is incompatible with the type-hinted return string. Consider adding an additional type-check to rule them out.
Loading history...
6990 2
                $subject,
6991 2
                $replace,
6992 2
                $pos,
6993 2
                (int) self::strlen($search)
6994
            );
6995
        }
6996
6997 2
        return $subject;
6998
    }
6999
7000
    /**
7001
     * Shuffles all the characters in the string.
7002
     *
7003
     * PS: uses random algorithm which is weak for cryptography purposes
7004
     *
7005
     * @param string $str      <p>The input string</p>
7006
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
7007
     *
7008
     * @return string the shuffled string
7009
     */
7010 5
    public static function str_shuffle(string $str, string $encoding = 'UTF-8'): string
7011
    {
7012 5
        if ($encoding === 'UTF-8') {
7013 5
            $indexes = \range(0, (int) \mb_strlen($str) - 1);
7014
            /** @noinspection NonSecureShuffleUsageInspection */
7015 5
            \shuffle($indexes);
7016
7017
            // init
7018 5
            $shuffledStr = '';
7019
7020 5
            foreach ($indexes as &$i) {
7021 5
                $tmpSubStr = \mb_substr($str, $i, 1);
7022 5
                if ($tmpSubStr !== false) {
7023 5
                    $shuffledStr .= $tmpSubStr;
7024
                }
7025
            }
7026
        } else {
7027
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
7028
7029
            $indexes = \range(0, (int) self::strlen($str, $encoding) - 1);
7030
            /** @noinspection NonSecureShuffleUsageInspection */
7031
            \shuffle($indexes);
7032
7033
            // init
7034
            $shuffledStr = '';
7035
7036
            foreach ($indexes as &$i) {
7037
                $tmpSubStr = self::substr($str, $i, 1, $encoding);
7038
                if ($tmpSubStr !== false) {
7039
                    $shuffledStr .= $tmpSubStr;
7040
                }
7041
            }
7042
        }
7043
7044 5
        return $shuffledStr;
7045
    }
7046
7047
    /**
7048
     * Returns the substring beginning at $start, and up to, but not including
7049
     * the index specified by $end. If $end is omitted, the function extracts
7050
     * the remaining string. If $end is negative, it is computed from the end
7051
     * of the string.
7052
     *
7053
     * @param string $str
7054
     * @param int    $start    <p>Initial index from which to begin extraction.</p>
7055
     * @param int    $end      [optional] <p>Index at which to end extraction. Default: null</p>
7056
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
7057
     *
7058
     * @return false|string
7059
     *                      <p>The extracted substring.</p><p>If <i>str</i> is shorter than <i>start</i>
7060
     *                      characters long, <b>FALSE</b> will be returned.
7061
     */
7062 18
    public static function str_slice(
7063
        string $str,
7064
        int $start,
7065
        int $end = null,
7066
        string $encoding = 'UTF-8'
7067
    ) {
7068 18
        if ($encoding === 'UTF-8') {
7069 7
            if ($end === null) {
7070 1
                $length = (int) \mb_strlen($str);
7071 6
            } elseif ($end >= 0 && $end <= $start) {
7072 2
                return '';
7073 4
            } elseif ($end < 0) {
7074 1
                $length = (int) \mb_strlen($str) + $end - $start;
7075
            } else {
7076 3
                $length = $end - $start;
7077
            }
7078
7079 5
            return \mb_substr($str, $start, $length);
7080
        }
7081
7082 11
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
7083
7084 11
        if ($end === null) {
7085 5
            $length = (int) self::strlen($str, $encoding);
7086 6
        } elseif ($end >= 0 && $end <= $start) {
7087 2
            return '';
7088 4
        } elseif ($end < 0) {
7089 1
            $length = (int) self::strlen($str, $encoding) + $end - $start;
7090
        } else {
7091 3
            $length = $end - $start;
7092
        }
7093
7094 9
        return self::substr($str, $start, $length, $encoding);
7095
    }
7096
7097
    /**
7098
     * Convert a string to e.g.: "snake_case"
7099
     *
7100
     * @param string $str
7101
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
7102
     *
7103
     * @return string string in snake_case
7104
     */
7105 22
    public static function str_snakeize(string $str, string $encoding = 'UTF-8'): string
7106
    {
7107 22
        if ($str === '') {
7108
            return '';
7109
        }
7110
7111 22
        $str = \str_replace(
7112 22
            '-',
7113 22
            '_',
7114 22
            self::normalize_whitespace($str)
7115
        );
7116
7117 22
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
7118 19
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
7119
        }
7120
7121 22
        $str = (string) \preg_replace_callback(
7122 22
            '/([\\p{N}|\\p{Lu}])/u',
7123
            /**
7124
             * @param string[] $matches
7125
             *
7126
             * @return string
7127
             */
7128
            static function (array $matches) use ($encoding): string {
7129 9
                $match = $matches[1];
7130 9
                $matchInt = (int) $match;
7131
7132 9
                if ((string) $matchInt === $match) {
7133 4
                    return '_' . $match . '_';
7134
                }
7135
7136 5
                if ($encoding === 'UTF-8') {
7137 5
                    return '_' . \mb_strtolower($match);
7138
                }
7139
7140
                return '_' . self::strtolower($match, $encoding);
7141 22
            },
7142 22
            $str
7143
        );
7144
7145 22
        $str = (string) \preg_replace(
7146
            [
7147 22
                '/\\s+/u',           // convert spaces to "_"
7148
                '/^\\s+|\\s+$/u', // trim leading & trailing spaces
7149
                '/_+/',                 // remove double "_"
7150
            ],
7151
            [
7152 22
                '_',
7153
                '',
7154
                '_',
7155
            ],
7156 22
            $str
7157
        );
7158
7159 22
        return \trim(\trim($str, '_')); // trim leading & trailing "_" + whitespace
7160
    }
7161
7162
    /**
7163
     * Sort all characters according to code points.
7164
     *
7165
     * @param string $str    <p>A UTF-8 string.</p>
7166
     * @param bool   $unique <p>Sort unique. If <strong>true</strong>, repeated characters are ignored.</p>
7167
     * @param bool   $desc   <p>If <strong>true</strong>, will sort characters in reverse code point order.</p>
7168
     *
7169
     * @return string string of sorted characters
7170
     */
7171 2
    public static function str_sort(string $str, bool $unique = false, bool $desc = false): string
7172
    {
7173 2
        $array = self::codepoints($str);
7174
7175 2
        if ($unique) {
7176 2
            $array = \array_flip(\array_flip($array));
7177
        }
7178
7179 2
        if ($desc) {
7180 2
            \arsort($array);
0 ignored issues
show
Bug introduced by
It seems like $array can also be of type null; however, parameter $array of arsort() does only seem to accept array, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

7180
            \arsort(/** @scrutinizer ignore-type */ $array);
Loading history...
7181
        } else {
7182 2
            \asort($array);
0 ignored issues
show
Bug introduced by
It seems like $array can also be of type null; however, parameter $array of asort() does only seem to accept array, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

7182
            \asort(/** @scrutinizer ignore-type */ $array);
Loading history...
7183
        }
7184
7185 2
        return self::string($array);
7186
    }
7187
7188
    /**
7189
     * Convert a string to an array of Unicode characters.
7190
     *
7191
     * @param int|int[]|string|string[] $str                <p>The string to split into array.</p>
7192
     * @param int                       $length             [optional] <p>Max character length of each array
7193
     *                                                      element.</p>
7194
     * @param bool                      $cleanUtf8          [optional] <p>Remove non UTF-8 chars from the string.</p>
7195
     * @param bool                      $tryToUseMbFunction [optional] <p>Set to false, if you don't want to use
7196
     *                                                      "mb_substr"</p>
7197
     *
7198
     * @return array
7199
     *               <p>An array containing chunks of the input.</p>
7200
     */
7201 89
    public static function str_split(
7202
        $str,
7203
        int $length = 1,
7204
        bool $cleanUtf8 = false,
7205
        bool $tryToUseMbFunction = true
7206
    ): array {
7207 89
        if ($length <= 0) {
7208 3
            return [];
7209
        }
7210
7211 88
        if (\is_array($str) === true) {
7212 2
            foreach ($str as $k => &$v) {
7213 2
                $v = self::str_split(
7214 2
                    $v,
7215 2
                    $length,
7216 2
                    $cleanUtf8,
7217 2
                    $tryToUseMbFunction
7218
                );
7219
            }
7220
7221 2
            return $str;
7222
        }
7223
7224
        // init
7225 88
        $str = (string) $str;
7226
7227 88
        if ($str === '') {
7228 13
            return [];
7229
        }
7230
7231 85
        if ($cleanUtf8 === true) {
7232 19
            $str = self::clean($str);
7233
        }
7234
7235
        if (
7236 85
            $tryToUseMbFunction === true
7237
            &&
7238 85
            self::$SUPPORT['mbstring'] === true
7239
        ) {
7240 81
            $iMax = \mb_strlen($str);
7241 81
            if ($iMax <= 127) {
7242 75
                $ret = [];
7243 75
                for ($i = 0; $i < $iMax; ++$i) {
7244 75
                    $ret[] = \mb_substr($str, $i, 1);
7245
                }
7246
            } else {
7247 15
                $retArray = [];
7248 15
                \preg_match_all('/./us', $str, $retArray);
7249 81
                $ret = $retArray[0] ?? [];
7250
            }
7251 23
        } elseif (self::$SUPPORT['pcre_utf8'] === true) {
7252 17
            $retArray = [];
7253 17
            \preg_match_all('/./us', $str, $retArray);
7254 17
            $ret = $retArray[0] ?? [];
7255
        } else {
7256
7257
            // fallback
7258
7259 8
            $ret = [];
7260 8
            $len = \strlen($str);
7261
7262
            /** @noinspection ForeachInvariantsInspection */
7263 8
            for ($i = 0; $i < $len; ++$i) {
7264 8
                if (($str[$i] & "\x80") === "\x00") {
7265 8
                    $ret[] = $str[$i];
7266
                } elseif (
7267 8
                    isset($str[$i + 1])
7268
                    &&
7269 8
                    ($str[$i] & "\xE0") === "\xC0"
7270
                ) {
7271 4
                    if (($str[$i + 1] & "\xC0") === "\x80") {
7272 4
                        $ret[] = $str[$i] . $str[$i + 1];
7273
7274 4
                        ++$i;
7275
                    }
7276
                } elseif (
7277 6
                    isset($str[$i + 2])
7278
                    &&
7279 6
                    ($str[$i] & "\xF0") === "\xE0"
7280
                ) {
7281
                    if (
7282 6
                        ($str[$i + 1] & "\xC0") === "\x80"
7283
                        &&
7284 6
                        ($str[$i + 2] & "\xC0") === "\x80"
7285
                    ) {
7286 6
                        $ret[] = $str[$i] . $str[$i + 1] . $str[$i + 2];
7287
7288 6
                        $i += 2;
7289
                    }
7290
                } elseif (
7291
                    isset($str[$i + 3])
7292
                    &&
7293
                    ($str[$i] & "\xF8") === "\xF0"
7294
                ) {
7295
                    if (
7296
                        ($str[$i + 1] & "\xC0") === "\x80"
7297
                        &&
7298
                        ($str[$i + 2] & "\xC0") === "\x80"
7299
                        &&
7300
                        ($str[$i + 3] & "\xC0") === "\x80"
7301
                    ) {
7302
                        $ret[] = $str[$i] . $str[$i + 1] . $str[$i + 2] . $str[$i + 3];
7303
7304
                        $i += 3;
7305
                    }
7306
                }
7307
            }
7308
        }
7309
7310 85
        if ($length > 1) {
7311 11
            $ret = \array_chunk($ret, $length);
7312
7313 11
            return \array_map(
7314
                static function (array &$item): string {
7315 11
                    return \implode('', $item);
7316 11
                },
7317 11
                $ret
7318
            );
7319
        }
7320
7321 78
        if (isset($ret[0]) && $ret[0] === '') {
7322
            return [];
7323
        }
7324
7325 78
        return $ret;
7326
    }
7327
7328
    /**
7329
     * Splits the string with the provided regular expression, returning an
7330
     * array of Stringy objects. An optional integer $limit will truncate the
7331
     * results.
7332
     *
7333
     * @param string $str
7334
     * @param string $pattern <p>The regex with which to split the string.</p>
7335
     * @param int    $limit   [optional] <p>Maximum number of results to return. Default: -1 === no limit</p>
7336
     *
7337
     * @return string[] an array of strings
7338
     */
7339 16
    public static function str_split_pattern(string $str, string $pattern, int $limit = -1): array
7340
    {
7341 16
        if ($limit === 0) {
7342 2
            return [];
7343
        }
7344
7345 14
        if ($pattern === '') {
7346 1
            return [$str];
7347
        }
7348
7349 13
        if (self::$SUPPORT['mbstring'] === true) {
7350 13
            if ($limit >= 0) {
7351
                /** @noinspection PhpComposerExtensionStubsInspection */
7352 8
                $resultTmp = \mb_split($pattern, $str);
7353
7354 8
                $result = [];
7355 8
                foreach ($resultTmp as $itemTmp) {
7356 8
                    if ($limit === 0) {
7357 4
                        break;
7358
                    }
7359 8
                    --$limit;
7360
7361 8
                    $result[] = $itemTmp;
7362
                }
7363
7364 8
                return $result;
7365
            }
7366
7367
            /** @noinspection PhpComposerExtensionStubsInspection */
7368 5
            return \mb_split($pattern, $str);
7369
        }
7370
7371
        if ($limit > 0) {
7372
            ++$limit;
7373
        } else {
7374
            $limit = -1;
7375
        }
7376
7377
        $array = \preg_split('/' . \preg_quote($pattern, '/') . '/u', $str, $limit);
7378
7379
        if ($array === false) {
7380
            return [];
7381
        }
7382
7383
        if ($limit > 0 && \count($array) === $limit) {
7384
            \array_pop($array);
7385
        }
7386
7387
        return $array;
7388
    }
7389
7390
    /**
7391
     * Check if the string starts with the given substring.
7392
     *
7393
     * @param string $haystack <p>The string to search in.</p>
7394
     * @param string $needle   <p>The substring to search for.</p>
7395
     *
7396
     * @return bool
7397
     */
7398 19
    public static function str_starts_with(string $haystack, string $needle): bool
7399
    {
7400 19
        if ($needle === '') {
7401 2
            return true;
7402
        }
7403
7404 19
        if ($haystack === '') {
7405
            return false;
7406
        }
7407
7408 19
        return \strpos($haystack, $needle) === 0;
7409
    }
7410
7411
    /**
7412
     * Returns true if the string begins with any of $substrings, false otherwise.
7413
     *
7414
     * - case-sensitive
7415
     *
7416
     * @param string $str        <p>The input string.</p>
7417
     * @param array  $substrings <p>Substrings to look for.</p>
7418
     *
7419
     * @return bool whether or not $str starts with $substring
7420
     */
7421 8
    public static function str_starts_with_any(string $str, array $substrings): bool
7422
    {
7423 8
        if ($str === '') {
7424
            return false;
7425
        }
7426
7427 8
        if ($substrings === []) {
7428
            return false;
7429
        }
7430
7431 8
        foreach ($substrings as &$substring) {
7432 8
            if (self::str_starts_with($str, $substring)) {
7433 8
                return true;
7434
            }
7435
        }
7436
7437 6
        return false;
7438
    }
7439
7440
    /**
7441
     * Gets the substring after the first occurrence of a separator.
7442
     *
7443
     * @param string $str       <p>The input string.</p>
7444
     * @param string $separator <p>The string separator.</p>
7445
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
7446
     *
7447
     * @return string
7448
     */
7449 1
    public static function str_substr_after_first_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
7450
    {
7451 1
        if ($separator === '' || $str === '') {
7452 1
            return '';
7453
        }
7454
7455 1
        if ($encoding === 'UTF-8') {
7456 1
            $offset = \mb_strpos($str, $separator);
7457 1
            if ($offset === false) {
7458 1
                return '';
7459
            }
7460
7461 1
            return (string) \mb_substr(
7462 1
                $str,
7463 1
                $offset + (int) \mb_strlen($separator)
7464
            );
7465
        }
7466
7467
        $offset = self::strpos($str, $separator, 0, $encoding);
7468
        if ($offset === false) {
7469
            return '';
7470
        }
7471
7472
        return (string) \mb_substr(
7473
            $str,
7474
            $offset + (int) self::strlen($separator, $encoding),
7475
            null,
7476
            $encoding
7477
        );
7478
    }
7479
7480
    /**
7481
     * Gets the substring after the last occurrence of a separator.
7482
     *
7483
     * @param string $str       <p>The input string.</p>
7484
     * @param string $separator <p>The string separator.</p>
7485
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
7486
     *
7487
     * @return string
7488
     */
7489 1
    public static function str_substr_after_last_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
7490
    {
7491 1
        if ($separator === '' || $str === '') {
7492 1
            return '';
7493
        }
7494
7495 1
        if ($encoding === 'UTF-8') {
7496 1
            $offset = \mb_strrpos($str, $separator);
7497 1
            if ($offset === false) {
7498 1
                return '';
7499
            }
7500
7501 1
            return (string) \mb_substr(
7502 1
                $str,
7503 1
                $offset + (int) \mb_strlen($separator)
7504
            );
7505
        }
7506
7507
        $offset = self::strrpos($str, $separator, 0, $encoding);
7508
        if ($offset === false) {
7509
            return '';
7510
        }
7511
7512
        return (string) self::substr(
7513
            $str,
7514
            $offset + (int) self::strlen($separator, $encoding),
7515
            null,
7516
            $encoding
7517
        );
7518
    }
7519
7520
    /**
7521
     * Gets the substring before the first occurrence of a separator.
7522
     *
7523
     * @param string $str       <p>The input string.</p>
7524
     * @param string $separator <p>The string separator.</p>
7525
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
7526
     *
7527
     * @return string
7528
     */
7529 1
    public static function str_substr_before_first_separator(
7530
        string $str,
7531
        string $separator,
7532
        string $encoding = 'UTF-8'
7533
    ): string {
7534 1
        if ($separator === '' || $str === '') {
7535 1
            return '';
7536
        }
7537
7538 1
        if ($encoding === 'UTF-8') {
7539 1
            $offset = \mb_strpos($str, $separator);
7540 1
            if ($offset === false) {
7541 1
                return '';
7542
            }
7543
7544 1
            return (string) \mb_substr(
7545 1
                $str,
7546 1
                0,
7547 1
                $offset
7548
            );
7549
        }
7550
7551
        $offset = self::strpos($str, $separator, 0, $encoding);
7552
        if ($offset === false) {
7553
            return '';
7554
        }
7555
7556
        return (string) self::substr(
7557
            $str,
7558
            0,
7559
            $offset,
7560
            $encoding
7561
        );
7562
    }
7563
7564
    /**
7565
     * Gets the substring before the last occurrence of a separator.
7566
     *
7567
     * @param string $str       <p>The input string.</p>
7568
     * @param string $separator <p>The string separator.</p>
7569
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
7570
     *
7571
     * @return string
7572
     */
7573 1
    public static function str_substr_before_last_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
7574
    {
7575 1
        if ($separator === '' || $str === '') {
7576 1
            return '';
7577
        }
7578
7579 1
        if ($encoding === 'UTF-8') {
7580 1
            $offset = \mb_strrpos($str, $separator);
7581 1
            if ($offset === false) {
7582 1
                return '';
7583
            }
7584
7585 1
            return (string) \mb_substr(
7586 1
                $str,
7587 1
                0,
7588 1
                $offset
7589
            );
7590
        }
7591
7592
        $offset = self::strrpos($str, $separator, 0, $encoding);
7593
        if ($offset === false) {
7594
            return '';
7595
        }
7596
7597
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
7598
7599
        return (string) self::substr(
7600
            $str,
7601
            0,
7602
            $offset,
7603
            $encoding
7604
        );
7605
    }
7606
7607
    /**
7608
     * Gets the substring after (or before via "$beforeNeedle") the first occurrence of the "$needle".
7609
     *
7610
     * @param string $str          <p>The input string.</p>
7611
     * @param string $needle       <p>The string to look for.</p>
7612
     * @param bool   $beforeNeedle [optional] <p>Default: false</p>
7613
     * @param string $encoding     [optional] <p>Default: 'UTF-8'</p>
7614
     *
7615
     * @return string
7616
     */
7617 2
    public static function str_substr_first(
7618
        string $str,
7619
        string $needle,
7620
        bool $beforeNeedle = false,
7621
        string $encoding = 'UTF-8'
7622
    ): string {
7623 2
        if ($str === '' || $needle === '') {
7624 2
            return '';
7625
        }
7626
7627 2
        if ($encoding === 'UTF-8') {
7628 2
            if ($beforeNeedle === true) {
7629 1
                $part = \mb_strstr(
7630 1
                    $str,
7631 1
                    $needle,
7632 1
                    $beforeNeedle
7633
                );
7634
            } else {
7635 1
                $part = \mb_strstr(
7636 1
                    $str,
7637 2
                    $needle
7638
                );
7639
            }
7640
        } else {
7641
            $part = self::strstr(
7642
                $str,
7643
                $needle,
7644
                $beforeNeedle,
7645
                $encoding
7646
            );
7647
        }
7648
7649 2
        return $part === false ? '' : $part;
7650
    }
7651
7652
    /**
7653
     * Gets the substring after (or before via "$beforeNeedle") the last occurrence of the "$needle".
7654
     *
7655
     * @param string $str          <p>The input string.</p>
7656
     * @param string $needle       <p>The string to look for.</p>
7657
     * @param bool   $beforeNeedle [optional] <p>Default: false</p>
7658
     * @param string $encoding     [optional] <p>Default: 'UTF-8'</p>
7659
     *
7660
     * @return string
7661
     */
7662 2
    public static function str_substr_last(
7663
        string $str,
7664
        string $needle,
7665
        bool $beforeNeedle = false,
7666
        string $encoding = 'UTF-8'
7667
    ): string {
7668 2
        if ($str === '' || $needle === '') {
7669 2
            return '';
7670
        }
7671
7672 2
        if ($encoding === 'UTF-8') {
7673 2
            if ($beforeNeedle === true) {
7674 1
                $part = \mb_strrchr(
7675 1
                    $str,
7676 1
                    $needle,
7677 1
                    $beforeNeedle
7678
                );
7679
            } else {
7680 1
                $part = \mb_strrchr(
7681 1
                    $str,
7682 2
                    $needle
7683
                );
7684
            }
7685
        } else {
7686
            $part = self::strrchr(
7687
                $str,
7688
                $needle,
7689
                $beforeNeedle,
7690
                $encoding
7691
            );
7692
        }
7693
7694 2
        return $part === false ? '' : $part;
7695
    }
7696
7697
    /**
7698
     * Surrounds $str with the given substring.
7699
     *
7700
     * @param string $str
7701
     * @param string $substring <p>The substring to add to both sides.</P>
7702
     *
7703
     * @return string string with the substring both prepended and appended
7704
     */
7705 5
    public static function str_surround(string $str, string $substring): string
7706
    {
7707 5
        return $substring . $str . $substring;
7708
    }
7709
7710
    /**
7711
     * Returns a trimmed string with the first letter of each word capitalized.
7712
     * Also accepts an array, $ignore, allowing you to list words not to be
7713
     * capitalized.
7714
     *
7715
     * @param string              $str
7716
     * @param array|string[]|null $ignore                [optional] <p>An array of words not to capitalize or null.
7717
     *                                                   Default: null</p>
7718
     * @param string              $encoding              [optional] <p>Default: 'UTF-8'</p>
7719
     * @param bool                $cleanUtf8             [optional] <p>Remove non UTF-8 chars from the string.</p>
7720
     * @param string|null         $lang                  [optional] <p>Set the language for special cases: az, el, lt,
7721
     *                                                   tr</p>
7722
     * @param bool                $tryToKeepStringLength [optional] <p>true === try to keep the string length: e.g. ẞ ->
7723
     *                                                   ß</p>
7724
     * @param bool                $useTrimFirst          [optional] <p>true === trim the input string, first</p>
7725
     *
7726
     * @return string the titleized string
7727
     */
7728 6
    public static function str_titleize(
7729
        string $str,
7730
        array $ignore = null,
7731
        string $encoding = 'UTF-8',
7732
        bool $cleanUtf8 = false,
7733
        string $lang = null,
7734
        bool $tryToKeepStringLength = false,
7735
        bool $useTrimFirst = true
7736
    ): string {
7737 6
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
7738 5
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
7739
        }
7740
7741 6
        if ($useTrimFirst === true) {
7742 6
            $str = \trim($str);
7743
        }
7744
7745 6
        if ($cleanUtf8 === true) {
7746
            $str = self::clean($str);
7747
        }
7748
7749 6
        $useMbFunction = $lang === null && $tryToKeepStringLength === false;
7750
7751 6
        return (string) \preg_replace_callback(
7752 6
            '/([^\\s]+)/u',
7753
            static function (array $match) use ($tryToKeepStringLength, $lang, $ignore, $useMbFunction, $encoding): string {
7754 6
                if ($ignore !== null && \in_array($match[0], $ignore, true)) {
7755 2
                    return $match[0];
7756
                }
7757
7758 6
                if ($useMbFunction === true) {
7759 6
                    if ($encoding === 'UTF-8') {
7760 6
                        return \mb_strtoupper(\mb_substr($match[0], 0, 1))
7761 6
                               . \mb_strtolower(\mb_substr($match[0], 1));
7762
                    }
7763
7764
                    return \mb_strtoupper(\mb_substr($match[0], 0, 1, $encoding), $encoding)
7765
                           . \mb_strtolower(\mb_substr($match[0], 1, null, $encoding), $encoding);
7766
                }
7767
7768
                return self::ucfirst(
7769
                    self::strtolower(
7770
                        $match[0],
7771
                        $encoding,
7772
                        false,
7773
                        $lang,
7774
                        $tryToKeepStringLength
7775
                    ),
7776
                    $encoding,
7777
                    false,
7778
                    $lang,
7779
                    $tryToKeepStringLength
7780
                );
7781 6
            },
7782 6
            $str
7783
        );
7784
    }
7785
7786
    /**
7787
     * Returns a trimmed string in proper title case.
7788
     *
7789
     * Also accepts an array, $ignore, allowing you to list words not to be
7790
     * capitalized.
7791
     *
7792
     * Adapted from John Gruber's script.
7793
     *
7794
     * @see https://gist.github.com/gruber/9f9e8650d68b13ce4d78
7795
     *
7796
     * @param string $str
7797
     * @param array  $ignore   <p>An array of words not to capitalize.</p>
7798
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
7799
     *
7800
     * @return string the titleized string
7801
     */
7802 35
    public static function str_titleize_for_humans(string $str, array $ignore = [], string $encoding = 'UTF-8'): string
7803
    {
7804 35
        $smallWords = \array_merge(
7805
            [
7806 35
                '(?<!q&)a',
7807
                'an',
7808
                'and',
7809
                'as',
7810
                'at(?!&t)',
7811
                'but',
7812
                'by',
7813
                'en',
7814
                'for',
7815
                'if',
7816
                'in',
7817
                'of',
7818
                'on',
7819
                'or',
7820
                'the',
7821
                'to',
7822
                'v[.]?',
7823
                'via',
7824
                'vs[.]?',
7825
            ],
7826 35
            $ignore
7827
        );
7828
7829 35
        $smallWordsRx = \implode('|', $smallWords);
7830 35
        $apostropheRx = '(?x: [\'’] [[:lower:]]* )?';
7831
7832 35
        $str = \trim($str);
7833
7834 35
        if (self::has_lowercase($str) === false) {
7835 2
            $str = self::strtolower($str, $encoding);
7836
        }
7837
7838
        // the main substitutions
7839 35
        $str = (string) \preg_replace_callback(
7840
            '~\\b (_*) (?:                                                         # 1. Leading underscore and
7841
                        ( (?<=[ ][/\\\\]) [[:alpha:]]+ [-_[:alpha:]/\\\\]+ |              # 2. file path or 
7842 35
                          [-_[:alpha:]]+ [@.:] [-_[:alpha:]@.:/]+ ' . $apostropheRx . ' ) #    URL, domain, or email
7843
                        |
7844 35
                        ( (?i: ' . $smallWordsRx . ' ) ' . $apostropheRx . ' )            # 3. or small word (case-insensitive)
7845
                        |
7846 35
                        ( [[:alpha:]] [[:lower:]\'’()\[\]{}]* ' . $apostropheRx . ' )     # 4. or word w/o internal caps
7847
                        |
7848 35
                        ( [[:alpha:]] [[:alpha:]\'’()\[\]{}]* ' . $apostropheRx . ' )     # 5. or some other word
7849
                      ) (_*) \\b                                                          # 6. With trailing underscore
7850
                    ~ux',
7851
            /**
7852
             * @param string[] $matches
7853
             *
7854
             * @return string
7855
             */
7856
            static function (array $matches) use ($encoding): string {
7857
                // preserve leading underscore
7858 35
                $str = $matches[1];
7859 35
                if ($matches[2]) {
7860
                    // preserve URLs, domains, emails and file paths
7861 5
                    $str .= $matches[2];
7862 35
                } elseif ($matches[3]) {
7863
                    // lower-case small words
7864 25
                    $str .= self::strtolower($matches[3], $encoding);
7865 35
                } elseif ($matches[4]) {
7866
                    // capitalize word w/o internal caps
7867 34
                    $str .= static::str_upper_first($matches[4], $encoding);
7868
                } else {
7869
                    // preserve other kinds of word (iPhone)
7870 7
                    $str .= $matches[5];
7871
                }
7872
                // Preserve trailing underscore
7873 35
                $str .= $matches[6];
7874
7875 35
                return $str;
7876 35
            },
7877 35
            $str
7878
        );
7879
7880
        // Exceptions for small words: capitalize at start of title...
7881 35
        $str = (string) \preg_replace_callback(
7882
            '~(  \\A [[:punct:]]*            # start of title...
7883
                      |  [:.;?!][ ]+                # or of subsentence...
7884
                      |  [ ][\'"“‘(\[][ ]* )        # or of inserted subphrase...
7885 35
                      ( ' . $smallWordsRx . ' ) \\b # ...followed by small word
7886
                     ~uxi',
7887
            /**
7888
             * @param string[] $matches
7889
             *
7890
             * @return string
7891
             */
7892
            static function (array $matches) use ($encoding): string {
7893 11
                return $matches[1] . static::str_upper_first($matches[2], $encoding);
7894 35
            },
7895 35
            $str
7896
        );
7897
7898
        // ...and end of title
7899 35
        $str = (string) \preg_replace_callback(
7900 35
            '~\\b ( ' . $smallWordsRx . ' ) # small word...
7901
                      (?= [[:punct:]]* \Z          # ...at the end of the title...
7902
                      |   [\'"’”)\]] [ ] )         # ...or of an inserted subphrase?
7903
                     ~uxi',
7904
            /**
7905
             * @param string[] $matches
7906
             *
7907
             * @return string
7908
             */
7909
            static function (array $matches) use ($encoding): string {
7910 3
                return static::str_upper_first($matches[1], $encoding);
7911 35
            },
7912 35
            $str
7913
        );
7914
7915
        // Exceptions for small words in hyphenated compound words.
7916
        // e.g. "in-flight" -> In-Flight
7917 35
        $str = (string) \preg_replace_callback(
7918
            '~\\b
7919
                        (?<! -)                   # Negative lookbehind for a hyphen; we do not want to match man-in-the-middle but do want (in-flight)
7920 35
                        ( ' . $smallWordsRx . ' )
7921
                        (?= -[[:alpha:]]+)        # lookahead for "-someword"
7922
                       ~uxi',
7923
            /**
7924
             * @param string[] $matches
7925
             *
7926
             * @return string
7927
             */
7928
            static function (array $matches) use ($encoding): string {
7929
                return static::str_upper_first($matches[1], $encoding);
7930 35
            },
7931 35
            $str
7932
        );
7933
7934
        // e.g. "Stand-in" -> "Stand-In" (Stand is already capped at this point)
7935 35
        $str = (string) \preg_replace_callback(
7936
            '~\\b
7937
                      (?<!…)                    # Negative lookbehind for a hyphen; we do not want to match man-in-the-middle but do want (stand-in)
7938
                      ( [[:alpha:]]+- )         # $1 = first word and hyphen, should already be properly capped
7939 35
                      ( ' . $smallWordsRx . ' ) # ...followed by small word
7940
                      (?!	- )                 # Negative lookahead for another -
7941
                     ~uxi',
7942
            /**
7943
             * @param string[] $matches
7944
             *
7945
             * @return string
7946
             */
7947
            static function (array $matches) use ($encoding): string {
7948
                return $matches[1] . static::str_upper_first($matches[2], $encoding);
7949 35
            },
7950 35
            $str
7951
        );
7952
7953 35
        return $str;
7954
    }
7955
7956
    /**
7957
     * Get a binary representation of a specific string.
7958
     *
7959
     * @param string $str <p>The input string.</p>
7960
     *
7961
     * @return false|string
7962
     *                      <p>false on error</p>
7963
     */
7964 2
    public static function str_to_binary(string $str)
7965
    {
7966 2
        $value = \unpack('H*', $str);
7967 2
        if ($value === false) {
7968
            return false;
7969
        }
7970
7971
        /** @noinspection OffsetOperationsInspection */
7972 2
        return \base_convert($value[1], 16, 2);
7973
    }
7974
7975
    /**
7976
     * @param string   $str
7977
     * @param bool     $removeEmptyValues <p>Remove empty values.</p>
7978
     * @param int|null $removeShortValues <p>The min. string length or null to disable</p>
7979
     *
7980
     * @return string[]
7981
     */
7982 17
    public static function str_to_lines(string $str, bool $removeEmptyValues = false, int $removeShortValues = null): array
7983
    {
7984 17
        if ($str === '') {
7985 1
            return $removeEmptyValues === true ? [] : [''];
7986
        }
7987
7988 16
        if (self::$SUPPORT['mbstring'] === true) {
7989
            /** @noinspection PhpComposerExtensionStubsInspection */
7990 16
            $return = \mb_split("[\r\n]{1,2}", $str);
7991
        } else {
7992
            $return = \preg_split("/[\r\n]{1,2}/u", $str);
7993
        }
7994
7995 16
        if ($return === false) {
7996
            return $removeEmptyValues === true ? [] : [''];
7997
        }
7998
7999
        if (
8000 16
            $removeShortValues === null
8001
            &&
8002 16
            $removeEmptyValues === false
8003
        ) {
8004 16
            return $return;
0 ignored issues
show
Bug Best Practice introduced by
The expression return $return returns an array which contains values of type array which are incompatible with the documented value type string.
Loading history...
8005
        }
8006
8007
        return self::reduce_string_array(
8008
            $return,
8009
            $removeEmptyValues,
8010
            $removeShortValues
8011
        );
8012
    }
8013
8014
    /**
8015
     * Convert a string into an array of words.
8016
     *
8017
     * @param string   $str
8018
     * @param string   $charList          <p>Additional chars for the definition of "words".</p>
8019
     * @param bool     $removeEmptyValues <p>Remove empty values.</p>
8020
     * @param int|null $removeShortValues <p>The min. string length or null to disable</p>
8021
     *
8022
     * @return string[]
8023
     */
8024 13
    public static function str_to_words(
8025
        string $str,
8026
        string $charList = '',
8027
        bool $removeEmptyValues = false,
8028
        int $removeShortValues = null
8029
    ): array {
8030 13
        if ($str === '') {
8031 4
            return $removeEmptyValues === true ? [] : [''];
8032
        }
8033
8034 13
        $charList = self::rxClass($charList, '\pL');
8035
8036 13
        $return = \preg_split("/({$charList}+(?:[\p{Pd}’']{$charList}+)*)/u", $str, -1, \PREG_SPLIT_DELIM_CAPTURE);
8037 13
        if ($return === false) {
8038
            return $removeEmptyValues === true ? [] : [''];
8039
        }
8040
8041
        if (
8042 13
            $removeShortValues === null
8043
            &&
8044 13
            $removeEmptyValues === false
8045
        ) {
8046 13
            return $return;
0 ignored issues
show
Bug Best Practice introduced by
The expression return $return returns an array which contains values of type array which are incompatible with the documented value type string.
Loading history...
8047
        }
8048
8049 2
        $tmpReturn = self::reduce_string_array(
8050 2
            $return,
8051 2
            $removeEmptyValues,
8052 2
            $removeShortValues
8053
        );
8054
8055 2
        foreach ($tmpReturn as &$item) {
8056 2
            $item = (string) $item;
8057
        }
8058
8059 2
        return $tmpReturn;
8060
    }
8061
8062
    /**
8063
     * alias for "UTF8::to_ascii()"
8064
     *
8065
     * @param string $str
8066
     * @param string $unknown
8067
     * @param bool   $strict
8068
     *
8069
     * @return string
8070
     *
8071
     * @see UTF8::to_ascii()
8072
     */
8073 8
    public static function str_transliterate(string $str, string $unknown = '?', bool $strict = false): string
8074
    {
8075 8
        return self::to_ascii($str, $unknown, $strict);
8076
    }
8077
8078
    /**
8079
     * Truncates the string to a given length. If $substring is provided, and
8080
     * truncating occurs, the string is further truncated so that the substring
8081
     * may be appended without exceeding the desired length.
8082
     *
8083
     * @param string $str
8084
     * @param int    $length    <p>Desired length of the truncated string.</p>
8085
     * @param string $substring [optional] <p>The substring to append if it can fit. Default: ''</p>
8086
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
8087
     *
8088
     * @return string string after truncating
8089
     */
8090 22
    public static function str_truncate(
8091
        string $str,
8092
        int $length,
8093
        string $substring = '',
8094
        string $encoding = 'UTF-8'
8095
    ): string {
8096 22
        if ($str === '') {
8097
            return '';
8098
        }
8099
8100 22
        if ($encoding === 'UTF-8') {
8101 10
            if ($length >= (int) \mb_strlen($str)) {
8102 2
                return $str;
8103
            }
8104
8105 8
            if ($substring !== '') {
8106 4
                $length -= (int) \mb_strlen($substring);
8107
8108
                /** @noinspection UnnecessaryCastingInspection */
8109 4
                return (string) \mb_substr($str, 0, $length) . $substring;
8110
            }
8111
8112
            /** @noinspection UnnecessaryCastingInspection */
8113 4
            return (string) \mb_substr($str, 0, $length);
8114
        }
8115
8116 12
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
8117
8118 12
        if ($length >= (int) self::strlen($str, $encoding)) {
8119 2
            return $str;
8120
        }
8121
8122 10
        if ($substring !== '') {
8123 6
            $length -= (int) self::strlen($substring, $encoding);
8124
        }
8125
8126
        return (
8127 10
               (string) self::substr(
8128 10
                   $str,
8129 10
                   0,
8130 10
                   $length,
8131 10
                   $encoding
8132
               )
8133 10
               ) . $substring;
8134
    }
8135
8136
    /**
8137
     * Truncates the string to a given length, while ensuring that it does not
8138
     * split words. If $substring is provided, and truncating occurs, the
8139
     * string is further truncated so that the substring may be appended without
8140
     * exceeding the desired length.
8141
     *
8142
     * @param string $str
8143
     * @param int    $length                          <p>Desired length of the truncated string.</p>
8144
     * @param string $substring                       [optional] <p>The substring to append if it can fit. Default:
8145
     *                                                ''</p>
8146
     * @param string $encoding                        [optional] <p>Default: 'UTF-8'</p>
8147
     * @param bool   $ignoreDoNotSplitWordsForOneWord [optional] <p>Default: false</p>
8148
     *
8149
     * @return string string after truncating
8150
     */
8151 47
    public static function str_truncate_safe(
8152
        string $str,
8153
        int $length,
8154
        string $substring = '',
8155
        string $encoding = 'UTF-8',
8156
        bool $ignoreDoNotSplitWordsForOneWord = false
8157
    ): string {
8158 47
        if ($str === '' || $length <= 0) {
8159 1
            return $substring;
8160
        }
8161
8162 47
        if ($encoding === 'UTF-8') {
8163 21
            if ($length >= (int) \mb_strlen($str)) {
8164 5
                return $str;
8165
            }
8166
8167
            // need to further trim the string so we can append the substring
8168 17
            $length -= (int) \mb_strlen($substring);
8169 17
            if ($length <= 0) {
8170 1
                return $substring;
8171
            }
8172
8173 17
            $truncated = \mb_substr($str, 0, $length);
8174
8175 17
            if ($truncated === false) {
8176
                return '';
8177
            }
8178
8179
            // if the last word was truncated
8180 17
            $strPosSpace = \mb_strpos($str, ' ', $length - 1);
8181 17
            if ($strPosSpace !== $length) {
8182
                // find pos of the last occurrence of a space, get up to that
8183 13
                $lastPos = \mb_strrpos($truncated, ' ', 0);
8184
8185
                if (
8186 13
                    $lastPos !== false
8187
                    ||
8188 13
                    ($strPosSpace !== false && $ignoreDoNotSplitWordsForOneWord === false)
8189
                ) {
8190 17
                    $truncated = (string) \mb_substr($truncated, 0, (int) $lastPos);
8191
                }
8192
            }
8193
        } else {
8194 26
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
8195
8196 26
            if ($length >= (int) self::strlen($str, $encoding)) {
8197 4
                return $str;
8198
            }
8199
8200
            // need to further trim the string so we can append the substring
8201 22
            $length -= (int) self::strlen($substring, $encoding);
8202 22
            if ($length <= 0) {
8203
                return $substring;
8204
            }
8205
8206 22
            $truncated = self::substr($str, 0, $length, $encoding);
8207
8208 22
            if ($truncated === false) {
8209
                return '';
8210
            }
8211
8212
            // if the last word was truncated
8213 22
            $strPosSpace = self::strpos($str, ' ', $length - 1, $encoding);
8214 22
            if ($strPosSpace !== $length) {
8215
                // find pos of the last occurrence of a space, get up to that
8216 12
                $lastPos = self::strrpos($truncated, ' ', 0, $encoding);
8217
8218
                if (
8219 12
                    $lastPos !== false
8220
                    ||
8221 12
                    ($strPosSpace !== false && $ignoreDoNotSplitWordsForOneWord === false)
8222
                ) {
8223 9
                    $truncated = (string) self::substr($truncated, 0, (int) $lastPos, $encoding);
8224
                }
8225
            }
8226
        }
8227
8228 39
        return $truncated . $substring;
8229
    }
8230
8231
    /**
8232
     * Returns a lowercase and trimmed string separated by underscores.
8233
     * Underscores are inserted before uppercase characters (with the exception
8234
     * of the first character of the string), and in place of spaces as well as
8235
     * dashes.
8236
     *
8237
     * @param string $str
8238
     *
8239
     * @return string the underscored string
8240
     */
8241 16
    public static function str_underscored(string $str): string
8242
    {
8243 16
        return self::str_delimit($str, '_');
8244
    }
8245
8246
    /**
8247
     * Returns an UpperCamelCase version of the supplied string. It trims
8248
     * surrounding spaces, capitalizes letters following digits, spaces, dashes
8249
     * and underscores, and removes spaces, dashes, underscores.
8250
     *
8251
     * @param string      $str                   <p>The input string.</p>
8252
     * @param string      $encoding              [optional] <p>Default: 'UTF-8'</p>
8253
     * @param bool        $cleanUtf8             [optional] <p>Remove non UTF-8 chars from the string.</p>
8254
     * @param string|null $lang                  [optional] <p>Set the language for special cases: az, el, lt, tr</p>
8255
     * @param bool        $tryToKeepStringLength [optional] <p>true === try to keep the string length: e.g. ẞ -> ß</p>
8256
     *
8257
     * @return string string in UpperCamelCase
8258
     */
8259 13
    public static function str_upper_camelize(
8260
        string $str,
8261
        string $encoding = 'UTF-8',
8262
        bool $cleanUtf8 = false,
8263
        string $lang = null,
8264
        bool $tryToKeepStringLength = false
8265
    ): string {
8266 13
        return self::ucfirst(self::str_camelize($str, $encoding), $encoding, $cleanUtf8, $lang, $tryToKeepStringLength);
8267
    }
8268
8269
    /**
8270
     * alias for "UTF8::ucfirst()"
8271
     *
8272
     * @param string      $str
8273
     * @param string      $encoding
8274
     * @param bool        $cleanUtf8
8275
     * @param string|null $lang
8276
     * @param bool        $tryToKeepStringLength
8277
     *
8278
     * @return string
8279
     *
8280
     * @see UTF8::ucfirst()
8281
     */
8282 39
    public static function str_upper_first(
8283
        string $str,
8284
        string $encoding = 'UTF-8',
8285
        bool $cleanUtf8 = false,
8286
        string $lang = null,
8287
        bool $tryToKeepStringLength = false
8288
    ): string {
8289 39
        return self::ucfirst($str, $encoding, $cleanUtf8, $lang, $tryToKeepStringLength);
8290
    }
8291
8292
    /**
8293
     * Counts number of words in the UTF-8 string.
8294
     *
8295
     * @param string $str      <p>The input string.</p>
8296
     * @param int    $format   [optional] <p>
8297
     *                         <strong>0</strong> => return a number of words (default)<br>
8298
     *                         <strong>1</strong> => return an array of words<br>
8299
     *                         <strong>2</strong> => return an array of words with word-offset as key
8300
     *                         </p>
8301
     * @param string $charlist [optional] <p>Additional chars that contains to words and do not start a new word.</p>
8302
     *
8303
     * @return int|string[] The number of words in the string
8304
     */
8305 2
    public static function str_word_count(string $str, int $format = 0, string $charlist = '')
8306
    {
8307 2
        $strParts = self::str_to_words($str, $charlist);
8308
8309 2
        $len = \count($strParts);
8310
8311 2
        if ($format === 1) {
8312 2
            $numberOfWords = [];
8313 2
            for ($i = 1; $i < $len; $i += 2) {
8314 2
                $numberOfWords[] = $strParts[$i];
8315
            }
8316 2
        } elseif ($format === 2) {
8317 2
            $numberOfWords = [];
8318 2
            $offset = (int) self::strlen($strParts[0]);
8319 2
            for ($i = 1; $i < $len; $i += 2) {
8320 2
                $numberOfWords[$offset] = $strParts[$i];
8321 2
                $offset += (int) self::strlen($strParts[$i]) + (int) self::strlen($strParts[$i + 1]);
8322
            }
8323
        } else {
8324 2
            $numberOfWords = (int) (($len - 1) / 2);
8325
        }
8326
8327 2
        return $numberOfWords;
8328
    }
8329
8330
    /**
8331
     * Case-insensitive string comparison.
8332
     *
8333
     * INFO: Case-insensitive version of UTF8::strcmp()
8334
     *
8335
     * @param string $str1     <p>The first string.</p>
8336
     * @param string $str2     <p>The second string.</p>
8337
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
8338
     *
8339
     * @return int
8340
     *             <strong>&lt; 0</strong> if str1 is less than str2;<br>
8341
     *             <strong>&gt; 0</strong> if str1 is greater than str2,<br>
8342
     *             <strong>0</strong> if they are equal
8343
     */
8344 23
    public static function strcasecmp(string $str1, string $str2, string $encoding = 'UTF-8'): int
8345
    {
8346 23
        return self::strcmp(
8347 23
            self::strtocasefold($str1, true, false, $encoding, null, false),
8348 23
            self::strtocasefold($str2, true, false, $encoding, null, false)
8349
        );
8350
    }
8351
8352
    /**
8353
     * alias for "UTF8::strstr()"
8354
     *
8355
     * @param string $haystack
8356
     * @param string $needle
8357
     * @param bool   $before_needle
8358
     * @param string $encoding
8359
     * @param bool   $cleanUtf8
8360
     *
8361
     * @return false|string
8362
     *
8363
     * @see UTF8::strstr()
8364
     */
8365 2
    public static function strchr(
8366
        string $haystack,
8367
        string $needle,
8368
        bool $before_needle = false,
8369
        string $encoding = 'UTF-8',
8370
        bool $cleanUtf8 = false
8371
    ) {
8372 2
        return self::strstr($haystack, $needle, $before_needle, $encoding, $cleanUtf8);
8373
    }
8374
8375
    /**
8376
     * Case-sensitive string comparison.
8377
     *
8378
     * @param string $str1 <p>The first string.</p>
8379
     * @param string $str2 <p>The second string.</p>
8380
     *
8381
     * @return int
8382
     *             <strong>&lt; 0</strong> if str1 is less than str2<br>
8383
     *             <strong>&gt; 0</strong> if str1 is greater than str2<br>
8384
     *             <strong>0</strong> if they are equal
8385
     */
8386 29
    public static function strcmp(string $str1, string $str2): int
8387
    {
8388 29
        if ($str1 === $str2) {
8389 21
            return 0;
8390
        }
8391
8392 24
        return \strcmp(
8393 24
            \Normalizer::normalize($str1, \Normalizer::NFD),
8394 24
            \Normalizer::normalize($str2, \Normalizer::NFD)
8395
        );
8396
    }
8397
8398
    /**
8399
     * Find length of initial segment not matching mask.
8400
     *
8401
     * @param string $str
8402
     * @param string $charList
8403
     * @param int    $offset
8404
     * @param int    $length
8405
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
8406
     *
8407
     * @return int
8408
     */
8409 12
    public static function strcspn(
8410
        string $str,
8411
        string $charList,
8412
        int $offset = null,
8413
        int $length = null,
8414
        string $encoding = 'UTF-8'
8415
    ): int {
8416 12
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
8417
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
8418
        }
8419
8420 12
        if ($charList === '') {
8421 2
            return (int) self::strlen($str, $encoding);
8422
        }
8423
8424 11
        if ($offset !== null || $length !== null) {
8425 3
            if ($encoding === 'UTF-8') {
8426 3
                if ($length === null) {
8427
                    /** @noinspection UnnecessaryCastingInspection */
8428 2
                    $strTmp = \mb_substr($str, (int) $offset);
8429
                } else {
8430
                    /** @noinspection UnnecessaryCastingInspection */
8431 3
                    $strTmp = \mb_substr($str, (int) $offset, $length);
8432
                }
8433
            } else {
8434
                /** @noinspection UnnecessaryCastingInspection */
8435
                $strTmp = self::substr($str, (int) $offset, $length, $encoding);
8436
            }
8437
8438 3
            if ($strTmp === false) {
8439
                return 0;
8440
            }
8441
8442 3
            $str = $strTmp;
8443
        }
8444
8445 11
        if ($str === '') {
8446 2
            return 0;
8447
        }
8448
8449 10
        $matches = [];
8450 10
        if (\preg_match('/^(.*?)' . self::rxClass($charList) . '/us', $str, $matches)) {
8451 9
            $return = self::strlen($matches[1], $encoding);
8452 9
            if ($return === false) {
8453
                return 0;
8454
            }
8455
8456 9
            return $return;
8457
        }
8458
8459 2
        return (int) self::strlen($str, $encoding);
8460
    }
8461
8462
    /**
8463
     * alias for "UTF8::stristr()"
8464
     *
8465
     * @param string $haystack
8466
     * @param string $needle
8467
     * @param bool   $before_needle
8468
     * @param string $encoding
8469
     * @param bool   $cleanUtf8
8470
     *
8471
     * @return false|string
8472
     *
8473
     * @see UTF8::stristr()
8474
     */
8475 1
    public static function strichr(
8476
        string $haystack,
8477
        string $needle,
8478
        bool $before_needle = false,
8479
        string $encoding = 'UTF-8',
8480
        bool $cleanUtf8 = false
8481
    ) {
8482 1
        return self::stristr($haystack, $needle, $before_needle, $encoding, $cleanUtf8);
8483
    }
8484
8485
    /**
8486
     * Create a UTF-8 string from code points.
8487
     *
8488
     * INFO: opposite to UTF8::codepoints()
8489
     *
8490
     * @param array $array <p>Integer or Hexadecimal codepoints.</p>
8491
     *
8492
     * @return string UTF-8 encoded string
8493
     */
8494 4
    public static function string(array $array): string
8495
    {
8496 4
        return \implode(
8497 4
            '',
8498 4
            \array_map(
8499
                [
8500 4
                    self::class,
8501
                    'chr',
8502
                ],
8503 4
                $array
8504
            )
8505
        );
8506
    }
8507
8508
    /**
8509
     * Checks if string starts with "BOM" (Byte Order Mark Character) character.
8510
     *
8511
     * @param string $str <p>The input string.</p>
8512
     *
8513
     * @return bool
8514
     *              <strong>true</strong> if the string has BOM at the start,<br>
8515
     *              <strong>false</strong> otherwise
8516
     */
8517 6
    public static function string_has_bom(string $str): bool
8518
    {
8519
        /** @noinspection PhpUnusedLocalVariableInspection */
8520 6
        foreach (self::$BOM as $bomString => &$bomByteLength) {
8521 6
            if (\strpos($str, $bomString) === 0) {
8522 6
                return true;
8523
            }
8524
        }
8525
8526 6
        return false;
8527
    }
8528
8529
    /**
8530
     * Strip HTML and PHP tags from a string + clean invalid UTF-8.
8531
     *
8532
     * @see http://php.net/manual/en/function.strip-tags.php
8533
     *
8534
     * @param string $str            <p>
8535
     *                               The input string.
8536
     *                               </p>
8537
     * @param string $allowable_tags [optional] <p>
8538
     *                               You can use the optional second parameter to specify tags which should
8539
     *                               not be stripped.
8540
     *                               </p>
8541
     *                               <p>
8542
     *                               HTML comments and PHP tags are also stripped. This is hardcoded and
8543
     *                               can not be changed with allowable_tags.
8544
     *                               </p>
8545
     * @param bool   $cleanUtf8      [optional] <p>Remove non UTF-8 chars from the string.</p>
8546
     *
8547
     * @return string the stripped string
8548
     */
8549 4
    public static function strip_tags(string $str, string $allowable_tags = null, bool $cleanUtf8 = false): string
8550
    {
8551 4
        if ($str === '') {
8552 1
            return '';
8553
        }
8554
8555 4
        if ($cleanUtf8 === true) {
8556 2
            $str = self::clean($str);
8557
        }
8558
8559 4
        if ($allowable_tags === null) {
8560 4
            return \strip_tags($str);
8561
        }
8562
8563 2
        return \strip_tags($str, $allowable_tags);
8564
    }
8565
8566
    /**
8567
     * Strip all whitespace characters. This includes tabs and newline
8568
     * characters, as well as multibyte whitespace such as the thin space
8569
     * and ideographic space.
8570
     *
8571
     * @param string $str
8572
     *
8573
     * @return string
8574
     */
8575 36
    public static function strip_whitespace(string $str): string
8576
    {
8577 36
        if ($str === '') {
8578 3
            return '';
8579
        }
8580
8581 33
        return (string) \preg_replace('/[[:space:]]+/u', '', $str);
8582
    }
8583
8584
    /**
8585
     * Finds position of first occurrence of a string within another, case insensitive.
8586
     *
8587
     * @see http://php.net/manual/en/function.mb-stripos.php
8588
     *
8589
     * @param string $haystack  <p>The string from which to get the position of the first occurrence of needle.</p>
8590
     * @param string $needle    <p>The string to find in haystack.</p>
8591
     * @param int    $offset    [optional] <p>The position in haystack to start searching.</p>
8592
     * @param string $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
8593
     * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
8594
     *
8595
     * @return false|int
8596
     *                   Return the <strong>(int)</strong> numeric position of the first occurrence of needle in the
8597
     *                   haystack string,<br> or <strong>false</strong> if needle is not found
8598
     */
8599 24
    public static function stripos(
8600
        string $haystack,
8601
        string $needle,
8602
        int $offset = 0,
8603
        $encoding = 'UTF-8',
8604
        bool $cleanUtf8 = false
8605
    ) {
8606 24
        if ($haystack === '' || $needle === '') {
8607 5
            return false;
8608
        }
8609
8610 23
        if ($cleanUtf8 === true) {
8611
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
8612
            // if invalid characters are found in $haystack before $needle
8613 1
            $haystack = self::clean($haystack);
8614 1
            $needle = self::clean($needle);
8615
        }
8616
8617 23
        if (self::$SUPPORT['mbstring'] === true) {
8618 23
            if ($encoding === 'UTF-8') {
8619 23
                return \mb_stripos($haystack, $needle, $offset);
8620
            }
8621
8622 3
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
8623
8624 3
            return \mb_stripos($haystack, $needle, $offset, $encoding);
8625
        }
8626
8627 2
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
8628
8629
        if (
8630 2
            $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
8631
            &&
8632 2
            $offset >= 0 // grapheme_stripos() can't handle negative offset
8633
            &&
8634 2
            self::$SUPPORT['intl'] === true
8635
        ) {
8636
            $returnTmp = \grapheme_stripos($haystack, $needle, $offset);
8637
            if ($returnTmp !== false) {
8638
                return $returnTmp;
8639
            }
8640
        }
8641
8642
        //
8643
        // fallback for ascii only
8644
        //
8645
8646 2
        if (self::is_ascii($haystack . $needle)) {
8647
            return \stripos($haystack, $needle, $offset);
8648
        }
8649
8650
        //
8651
        // fallback via vanilla php
8652
        //
8653
8654 2
        $haystack = self::strtocasefold($haystack, true, false, $encoding, null, false);
8655 2
        $needle = self::strtocasefold($needle, true, false, $encoding, null, false);
8656
8657 2
        return self::strpos($haystack, $needle, $offset, $encoding);
8658
    }
8659
8660
    /**
8661
     * Returns all of haystack starting from and including the first occurrence of needle to the end.
8662
     *
8663
     * @param string $haystack      <p>The input string. Must be valid UTF-8.</p>
8664
     * @param string $needle        <p>The string to look for. Must be valid UTF-8.</p>
8665
     * @param bool   $before_needle [optional] <p>
8666
     *                              If <b>TRUE</b>, it returns the part of the
8667
     *                              haystack before the first occurrence of the needle (excluding the needle).
8668
     *                              </p>
8669
     * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
8670
     * @param bool   $cleanUtf8     [optional] <p>Remove non UTF-8 chars from the string.</p>
8671
     *
8672
     * @return false|string a sub-string,<br>or <strong>false</strong> if needle is not found
8673
     */
8674 12
    public static function stristr(
8675
        string $haystack,
8676
        string $needle,
8677
        bool $before_needle = false,
8678
        string $encoding = 'UTF-8',
8679
        bool $cleanUtf8 = false
8680
    ) {
8681 12
        if ($haystack === '' || $needle === '') {
8682 3
            return false;
8683
        }
8684
8685 9
        if ($cleanUtf8 === true) {
8686
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
8687
            // if invalid characters are found in $haystack before $needle
8688 1
            $needle = self::clean($needle);
8689 1
            $haystack = self::clean($haystack);
8690
        }
8691
8692 9
        if (!$needle) {
8693
            return $haystack;
8694
        }
8695
8696 9
        if (self::$SUPPORT['mbstring'] === true) {
8697 9
            if ($encoding === 'UTF-8') {
8698 9
                return \mb_stristr($haystack, $needle, $before_needle);
8699
            }
8700
8701 1
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
8702
8703 1
            return \mb_stristr($haystack, $needle, $before_needle, $encoding);
8704
        }
8705
8706
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
8707
8708
        if (
8709
            $encoding !== 'UTF-8'
8710
            &&
8711
            self::$SUPPORT['mbstring'] === false
8712
        ) {
8713
            \trigger_error('UTF8::stristr() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
8714
        }
8715
8716
        if (
8717
            $encoding === 'UTF-8' // INFO: "grapheme_stristr()" can't handle other encodings
8718
            &&
8719
            self::$SUPPORT['intl'] === true
8720
        ) {
8721
            $returnTmp = \grapheme_stristr($haystack, $needle, $before_needle);
8722
            if ($returnTmp !== false) {
8723
                return $returnTmp;
8724
            }
8725
        }
8726
8727
        if (self::is_ascii($needle . $haystack)) {
8728
            return \stristr($haystack, $needle, $before_needle);
8729
        }
8730
8731
        \preg_match('/^(.*?)' . \preg_quote($needle, '/') . '/usi', $haystack, $match);
8732
8733
        if (!isset($match[1])) {
8734
            return false;
8735
        }
8736
8737
        if ($before_needle) {
8738
            return $match[1];
8739
        }
8740
8741
        return self::substr($haystack, (int) self::strlen($match[1], $encoding), null, $encoding);
8742
    }
8743
8744
    /**
8745
     * Get the string length, not the byte-length!
8746
     *
8747
     * @see http://php.net/manual/en/function.mb-strlen.php
8748
     *
8749
     * @param string $str       <p>The string being checked for length.</p>
8750
     * @param string $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
8751
     * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
8752
     *
8753
     * @return false|int
8754
     *                   The number <strong>(int)</strong> of characters in the string $str having character encoding
8755
     *                   $encoding.
8756
     *                   (One multi-byte character counted as +1).
8757
     *                   <br>
8758
     *                   Can return <strong>false</strong>, if e.g. mbstring is not installed and we process invalid
8759
     *                   chars.
8760
     */
8761 173
    public static function strlen(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
8762
    {
8763 173
        if ($str === '') {
8764 21
            return 0;
8765
        }
8766
8767 171
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
8768 12
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
8769
        }
8770
8771 171
        if ($cleanUtf8 === true) {
8772
            // "mb_strlen" and "\iconv_strlen" returns wrong length,
8773
            // if invalid characters are found in $str
8774 4
            $str = self::clean($str);
8775
        }
8776
8777
        //
8778
        // fallback via mbstring
8779
        //
8780
8781 171
        if (self::$SUPPORT['mbstring'] === true) {
8782 165
            if ($encoding === 'UTF-8') {
8783 165
                return \mb_strlen($str);
8784
            }
8785
8786 4
            return \mb_strlen($str, $encoding);
8787
        }
8788
8789
        //
8790
        // fallback for binary || ascii only
8791
        //
8792
8793
        if (
8794 8
            $encoding === 'CP850'
8795
            ||
8796 8
            $encoding === 'ASCII'
8797
        ) {
8798
            return \strlen($str);
8799
        }
8800
8801
        if (
8802 8
            $encoding !== 'UTF-8'
8803
            &&
8804 8
            self::$SUPPORT['mbstring'] === false
8805
            &&
8806 8
            self::$SUPPORT['iconv'] === false
8807
        ) {
8808 2
            \trigger_error('UTF8::strlen() without mbstring / iconv cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
8809
        }
8810
8811
        //
8812
        // fallback via iconv
8813
        //
8814
8815 8
        if (self::$SUPPORT['iconv'] === true) {
8816
            $returnTmp = \iconv_strlen($str, $encoding);
8817
            if ($returnTmp !== false) {
8818
                return $returnTmp;
8819
            }
8820
        }
8821
8822
        //
8823
        // fallback via intl
8824
        //
8825
8826
        if (
8827 8
            $encoding === 'UTF-8' // INFO: "grapheme_strlen()" can't handle other encodings
8828
            &&
8829 8
            self::$SUPPORT['intl'] === true
8830
        ) {
8831
            $returnTmp = \grapheme_strlen($str);
8832
            if ($returnTmp !== null) {
8833
                return $returnTmp;
8834
            }
8835
        }
8836
8837
        //
8838
        // fallback for ascii only
8839
        //
8840
8841 8
        if (self::is_ascii($str)) {
8842 4
            return \strlen($str);
8843
        }
8844
8845
        //
8846
        // fallback via vanilla php
8847
        //
8848
8849 8
        \preg_match_all('/./us', $str, $parts);
8850
8851 8
        $returnTmp = \count($parts[0]);
8852 8
        if ($returnTmp === 0) {
8853
            return false;
8854
        }
8855
8856 8
        return $returnTmp;
8857
    }
8858
8859
    /**
8860
     * Get string length in byte.
8861
     *
8862
     * @param string $str
8863
     *
8864
     * @return int
8865
     */
8866
    public static function strlen_in_byte(string $str): int
8867
    {
8868
        if ($str === '') {
8869
            return 0;
8870
        }
8871
8872
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
8873
            // "mb_" is available if overload is used, so use it ...
8874
            return \mb_strlen($str, 'CP850'); // 8-BIT
8875
        }
8876
8877
        return \strlen($str);
8878
    }
8879
8880
    /**
8881
     * Case insensitive string comparisons using a "natural order" algorithm.
8882
     *
8883
     * INFO: natural order version of UTF8::strcasecmp()
8884
     *
8885
     * @param string $str1     <p>The first string.</p>
8886
     * @param string $str2     <p>The second string.</p>
8887
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
8888
     *
8889
     * @return int
8890
     *             <strong>&lt; 0</strong> if str1 is less than str2<br>
8891
     *             <strong>&gt; 0</strong> if str1 is greater than str2<br>
8892
     *             <strong>0</strong> if they are equal
8893
     */
8894 2
    public static function strnatcasecmp(string $str1, string $str2, string $encoding = 'UTF-8'): int
8895
    {
8896 2
        return self::strnatcmp(
8897 2
            self::strtocasefold($str1, true, false, $encoding, null, false),
8898 2
            self::strtocasefold($str2, true, false, $encoding, null, false)
8899
        );
8900
    }
8901
8902
    /**
8903
     * String comparisons using a "natural order" algorithm
8904
     *
8905
     * INFO: natural order version of UTF8::strcmp()
8906
     *
8907
     * @see http://php.net/manual/en/function.strnatcmp.php
8908
     *
8909
     * @param string $str1 <p>The first string.</p>
8910
     * @param string $str2 <p>The second string.</p>
8911
     *
8912
     * @return int
8913
     *             <strong>&lt; 0</strong> if str1 is less than str2;<br>
8914
     *             <strong>&gt; 0</strong> if str1 is greater than str2;<br>
8915
     *             <strong>0</strong> if they are equal
8916
     */
8917 4
    public static function strnatcmp(string $str1, string $str2): int
8918
    {
8919 4
        if ($str1 === $str2) {
8920 4
            return 0;
8921
        }
8922
8923 4
        return \strnatcmp(
8924 4
            (string) self::strtonatfold($str1),
8925 4
            (string) self::strtonatfold($str2)
8926
        );
8927
    }
8928
8929
    /**
8930
     * Case-insensitive string comparison of the first n characters.
8931
     *
8932
     * @see http://php.net/manual/en/function.strncasecmp.php
8933
     *
8934
     * @param string $str1     <p>The first string.</p>
8935
     * @param string $str2     <p>The second string.</p>
8936
     * @param int    $len      <p>The length of strings to be used in the comparison.</p>
8937
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
8938
     *
8939
     * @return int
8940
     *             <strong>&lt; 0</strong> if <i>str1</i> is less than <i>str2</i>;<br>
8941
     *             <strong>&gt; 0</strong> if <i>str1</i> is greater than <i>str2</i>;<br>
8942
     *             <strong>0</strong> if they are equal
8943
     */
8944 2
    public static function strncasecmp(
8945
        string $str1,
8946
        string $str2,
8947
        int $len,
8948
        string $encoding = 'UTF-8'
8949
    ): int {
8950 2
        return self::strncmp(
8951 2
            self::strtocasefold($str1, true, false, $encoding, null, false),
8952 2
            self::strtocasefold($str2, true, false, $encoding, null, false),
8953 2
            $len
8954
        );
8955
    }
8956
8957
    /**
8958
     * String comparison of the first n characters.
8959
     *
8960
     * @see http://php.net/manual/en/function.strncmp.php
8961
     *
8962
     * @param string $str1     <p>The first string.</p>
8963
     * @param string $str2     <p>The second string.</p>
8964
     * @param int    $len      <p>Number of characters to use in the comparison.</p>
8965
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
8966
     *
8967
     * @return int
8968
     *             <strong>&lt; 0</strong> if <i>str1</i> is less than <i>str2</i>;<br>
8969
     *             <strong>&gt; 0</strong> if <i>str1</i> is greater than <i>str2</i>;<br>
8970
     *             <strong>0</strong> if they are equal
8971
     */
8972 4
    public static function strncmp(
8973
        string $str1,
8974
        string $str2,
8975
        int $len,
8976
        string $encoding = 'UTF-8'
8977
    ): int {
8978 4
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
8979
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
8980
        }
8981
8982 4
        if ($encoding === 'UTF-8') {
8983 4
            $str1 = (string) \mb_substr($str1, 0, $len);
8984 4
            $str2 = (string) \mb_substr($str2, 0, $len);
8985
        } else {
8986
            $str1 = (string) self::substr($str1, 0, $len, $encoding);
8987
            $str2 = (string) self::substr($str2, 0, $len, $encoding);
8988
        }
8989
8990 4
        return self::strcmp($str1, $str2);
8991
    }
8992
8993
    /**
8994
     * Search a string for any of a set of characters.
8995
     *
8996
     * @see http://php.net/manual/en/function.strpbrk.php
8997
     *
8998
     * @param string $haystack  <p>The string where char_list is looked for.</p>
8999
     * @param string $char_list <p>This parameter is case sensitive.</p>
9000
     *
9001
     * @return false|string string starting from the character found, or false if it is not found
9002
     */
9003 2
    public static function strpbrk(string $haystack, string $char_list)
9004
    {
9005 2
        if ($haystack === '' || $char_list === '') {
9006 2
            return false;
9007
        }
9008
9009 2
        if (\preg_match('/' . self::rxClass($char_list) . '/us', $haystack, $m)) {
9010 2
            return \substr($haystack, (int) \strpos($haystack, $m[0]));
9011
        }
9012
9013 2
        return false;
9014
    }
9015
9016
    /**
9017
     * Find position of first occurrence of string in a string.
9018
     *
9019
     * @see http://php.net/manual/en/function.mb-strpos.php
9020
     *
9021
     * @param string     $haystack  <p>The string from which to get the position of the first occurrence of needle.</p>
9022
     * @param int|string $needle    <p>The string to find in haystack.<br>Or a code point as int.</p>
9023
     * @param int        $offset    [optional] <p>The search offset. If it is not specified, 0 is used.</p>
9024
     * @param string     $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
9025
     * @param bool       $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
9026
     *
9027
     * @return false|int
9028
     *                   The <strong>(int)</strong> numeric position of the first occurrence of needle in the haystack
9029
     *                   string.<br> If needle is not found it returns false.
9030
     */
9031 53
    public static function strpos(
9032
        string $haystack,
9033
        $needle,
9034
        int $offset = 0,
9035
        $encoding = 'UTF-8',
9036
        bool $cleanUtf8 = false
9037
    ) {
9038 53
        if ($haystack === '') {
9039 4
            return false;
9040
        }
9041
9042
        // iconv and mbstring do not support integer $needle
9043 52
        if ((int) $needle === $needle) {
9044
            $needle = (string) self::chr($needle);
9045
        }
9046 52
        $needle = (string) $needle;
9047
9048 52
        if ($needle === '') {
9049 2
            return false;
9050
        }
9051
9052 52
        if ($cleanUtf8 === true) {
9053
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
9054
            // if invalid characters are found in $haystack before $needle
9055 3
            $needle = self::clean($needle);
9056 3
            $haystack = self::clean($haystack);
9057
        }
9058
9059 52
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
9060 11
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
9061
        }
9062
9063
        //
9064
        // fallback via mbstring
9065
        //
9066
9067 52
        if (self::$SUPPORT['mbstring'] === true) {
9068 50
            if ($encoding === 'UTF-8') {
9069 50
                return \mb_strpos($haystack, $needle, $offset);
9070
            }
9071
9072 2
            return \mb_strpos($haystack, $needle, $offset, $encoding);
9073
        }
9074
9075
        //
9076
        // fallback for binary || ascii only
9077
        //
9078
        if (
9079 4
            $encoding === 'CP850'
9080
            ||
9081 4
            $encoding === 'ASCII'
9082
        ) {
9083 2
            return \strpos($haystack, $needle, $offset);
9084
        }
9085
9086
        if (
9087 4
            $encoding !== 'UTF-8'
9088
            &&
9089 4
            self::$SUPPORT['iconv'] === false
9090
            &&
9091 4
            self::$SUPPORT['mbstring'] === false
9092
        ) {
9093 2
            \trigger_error('UTF8::strpos() without mbstring / iconv cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
9094
        }
9095
9096
        //
9097
        // fallback via intl
9098
        //
9099
9100
        if (
9101 4
            $encoding === 'UTF-8' // INFO: "grapheme_strpos()" can't handle other encodings
9102
            &&
9103 4
            $offset >= 0 // grapheme_strpos() can't handle negative offset
9104
            &&
9105 4
            self::$SUPPORT['intl'] === true
9106
        ) {
9107
            $returnTmp = \grapheme_strpos($haystack, $needle, $offset);
9108
            if ($returnTmp !== false) {
9109
                return $returnTmp;
9110
            }
9111
        }
9112
9113
        //
9114
        // fallback via iconv
9115
        //
9116
9117
        if (
9118 4
            $offset >= 0 // iconv_strpos() can't handle negative offset
9119
            &&
9120 4
            self::$SUPPORT['iconv'] === true
9121
        ) {
9122
            // ignore invalid negative offset to keep compatibility
9123
            // with php < 5.5.35, < 5.6.21, < 7.0.6
9124
            $returnTmp = \iconv_strpos($haystack, $needle, $offset > 0 ? $offset : 0, $encoding);
9125
            if ($returnTmp !== false) {
9126
                return $returnTmp;
9127
            }
9128
        }
9129
9130
        //
9131
        // fallback for ascii only
9132
        //
9133
9134 4
        if (self::is_ascii($haystack . $needle)) {
9135 2
            return \strpos($haystack, $needle, $offset);
9136
        }
9137
9138
        //
9139
        // fallback via vanilla php
9140
        //
9141
9142 4
        $haystackTmp = self::substr($haystack, $offset, null, $encoding);
9143 4
        if ($haystackTmp === false) {
9144
            $haystackTmp = '';
9145
        }
9146 4
        $haystack = (string) $haystackTmp;
9147
9148 4
        if ($offset < 0) {
9149
            $offset = 0;
9150
        }
9151
9152 4
        $pos = \strpos($haystack, $needle);
9153 4
        if ($pos === false) {
9154 2
            return false;
9155
        }
9156
9157 4
        if ($pos) {
9158 4
            return $offset + (int) self::strlen(\substr($haystack, 0, $pos), $encoding);
9159
        }
9160
9161 2
        return $offset + 0;
9162
    }
9163
9164
    /**
9165
     * Find position of first occurrence of string in a string.
9166
     *
9167
     * @param string $haystack <p>
9168
     *                         The string being checked.
9169
     *                         </p>
9170
     * @param string $needle   <p>
9171
     *                         The position counted from the beginning of haystack.
9172
     *                         </p>
9173
     * @param int    $offset   [optional] <p>
9174
     *                         The search offset. If it is not specified, 0 is used.
9175
     *                         </p>
9176
     *
9177
     * @return false|int The numeric position of the first occurrence of needle in the
9178
     *                   haystack string. If needle is not found, it returns false.
9179
     */
9180
    public static function strpos_in_byte(string $haystack, string $needle, int $offset = 0)
9181
    {
9182
        if ($haystack === '' || $needle === '') {
9183
            return false;
9184
        }
9185
9186
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
9187
            // "mb_" is available if overload is used, so use it ...
9188
            return \mb_strpos($haystack, $needle, $offset, 'CP850'); // 8-BIT
9189
        }
9190
9191
        return \strpos($haystack, $needle, $offset);
9192
    }
9193
9194
    /**
9195
     * Finds the last occurrence of a character in a string within another.
9196
     *
9197
     * @see http://php.net/manual/en/function.mb-strrchr.php
9198
     *
9199
     * @param string $haystack      <p>The string from which to get the last occurrence of needle.</p>
9200
     * @param string $needle        <p>The string to find in haystack</p>
9201
     * @param bool   $before_needle [optional] <p>
9202
     *                              Determines which portion of haystack
9203
     *                              this function returns.
9204
     *                              If set to true, it returns all of haystack
9205
     *                              from the beginning to the last occurrence of needle.
9206
     *                              If set to false, it returns all of haystack
9207
     *                              from the last occurrence of needle to the end,
9208
     *                              </p>
9209
     * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
9210
     * @param bool   $cleanUtf8     [optional] <p>Remove non UTF-8 chars from the string.</p>
9211
     *
9212
     * @return false|string the portion of haystack or false if needle is not found
9213
     */
9214 2
    public static function strrchr(
9215
        string $haystack,
9216
        string $needle,
9217
        bool $before_needle = false,
9218
        string $encoding = 'UTF-8',
9219
        bool $cleanUtf8 = false
9220
    ) {
9221 2
        if ($haystack === '' || $needle === '') {
9222 2
            return false;
9223
        }
9224
9225 2
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
9226 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
9227
        }
9228
9229 2
        if ($cleanUtf8 === true) {
9230
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
9231
            // if invalid characters are found in $haystack before $needle
9232 2
            $needle = self::clean($needle);
9233 2
            $haystack = self::clean($haystack);
9234
        }
9235
9236
        //
9237
        // fallback via mbstring
9238
        //
9239
9240 2
        if (self::$SUPPORT['mbstring'] === true) {
9241 2
            if ($encoding === 'UTF-8') {
9242 2
                return \mb_strrchr($haystack, $needle, $before_needle);
9243
            }
9244
9245 2
            return \mb_strrchr($haystack, $needle, $before_needle, $encoding);
9246
        }
9247
9248
        //
9249
        // fallback for binary || ascii only
9250
        //
9251
9252
        if (
9253
            $before_needle === false
9254
            &&
9255
            (
9256
                $encoding === 'CP850'
9257
                ||
9258
                $encoding === 'ASCII'
9259
            )
9260
        ) {
9261
            return \strrchr($haystack, $needle);
9262
        }
9263
9264
        if (
9265
            $encoding !== 'UTF-8'
9266
            &&
9267
            self::$SUPPORT['mbstring'] === false
9268
        ) {
9269
            \trigger_error('UTF8::strrchr() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
9270
        }
9271
9272
        //
9273
        // fallback via iconv
9274
        //
9275
9276
        if (self::$SUPPORT['iconv'] === true) {
9277
            $needleTmp = self::substr($needle, 0, 1, $encoding);
9278
            if ($needleTmp === false) {
9279
                return false;
9280
            }
9281
            $needle = (string) $needleTmp;
9282
9283
            $pos = \iconv_strrpos($haystack, $needle, $encoding);
9284
            if ($pos === false) {
9285
                return false;
9286
            }
9287
9288
            if ($before_needle) {
9289
                return self::substr($haystack, 0, $pos, $encoding);
9290
            }
9291
9292
            return self::substr($haystack, $pos, null, $encoding);
9293
        }
9294
9295
        //
9296
        // fallback via vanilla php
9297
        //
9298
9299
        $needleTmp = self::substr($needle, 0, 1, $encoding);
9300
        if ($needleTmp === false) {
9301
            return false;
9302
        }
9303
        $needle = (string) $needleTmp;
9304
9305
        $pos = self::strrpos($haystack, $needle, 0, $encoding);
9306
        if ($pos === false) {
9307
            return false;
9308
        }
9309
9310
        if ($before_needle) {
9311
            return self::substr($haystack, 0, $pos, $encoding);
9312
        }
9313
9314
        return self::substr($haystack, $pos, null, $encoding);
9315
    }
9316
9317
    /**
9318
     * Reverses characters order in the string.
9319
     *
9320
     * @param string $str      <p>The input string.</p>
9321
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
9322
     *
9323
     * @return string the string with characters in the reverse sequence
9324
     */
9325 10
    public static function strrev(string $str, string $encoding = 'UTF-8'): string
9326
    {
9327 10
        if ($str === '') {
9328 4
            return '';
9329
        }
9330
9331
        // init
9332 8
        $reversed = '';
9333
9334 8
        $str = self::emoji_encode($str, true);
9335
9336 8
        if ($encoding === 'UTF-8') {
9337 8
            if (self::$SUPPORT['intl'] === true) {
9338
                // try "grapheme" first: https://stackoverflow.com/questions/17496493/strrev-dosent-support-utf-8
9339 8
                $i = (int) \grapheme_strlen($str);
9340 8
                while ($i--) {
9341 8
                    $reversedTmp = \grapheme_substr($str, $i, 1);
9342 8
                    if ($reversedTmp !== false) {
9343 8
                        $reversed .= $reversedTmp;
9344
                    }
9345
                }
9346
            } else {
9347
                $i = (int) \mb_strlen($str);
9348 8
                while ($i--) {
9349
                    $reversedTmp = \mb_substr($str, $i, 1);
9350
                    if ($reversedTmp !== false) {
9351
                        $reversed .= $reversedTmp;
9352
                    }
9353
                }
9354
            }
9355
        } else {
9356
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
9357
9358
            $i = (int) self::strlen($str, $encoding);
9359
            while ($i--) {
9360
                $reversedTmp = self::substr($str, $i, 1, $encoding);
9361
                if ($reversedTmp !== false) {
9362
                    $reversed .= $reversedTmp;
9363
                }
9364
            }
9365
        }
9366
9367 8
        return self::emoji_decode($reversed, true);
9368
    }
9369
9370
    /**
9371
     * Finds the last occurrence of a character in a string within another, case insensitive.
9372
     *
9373
     * @see http://php.net/manual/en/function.mb-strrichr.php
9374
     *
9375
     * @param string $haystack      <p>The string from which to get the last occurrence of needle.</p>
9376
     * @param string $needle        <p>The string to find in haystack.</p>
9377
     * @param bool   $before_needle [optional] <p>
9378
     *                              Determines which portion of haystack
9379
     *                              this function returns.
9380
     *                              If set to true, it returns all of haystack
9381
     *                              from the beginning to the last occurrence of needle.
9382
     *                              If set to false, it returns all of haystack
9383
     *                              from the last occurrence of needle to the end,
9384
     *                              </p>
9385
     * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
9386
     * @param bool   $cleanUtf8     [optional] <p>Remove non UTF-8 chars from the string.</p>
9387
     *
9388
     * @return false|string the portion of haystack or<br>false if needle is not found
9389
     */
9390 3
    public static function strrichr(
9391
        string $haystack,
9392
        string $needle,
9393
        bool $before_needle = false,
9394
        string $encoding = 'UTF-8',
9395
        bool $cleanUtf8 = false
9396
    ) {
9397 3
        if ($haystack === '' || $needle === '') {
9398 2
            return false;
9399
        }
9400
9401 3
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
9402 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
9403
        }
9404
9405 3
        if ($cleanUtf8 === true) {
9406
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
9407
            // if invalid characters are found in $haystack before $needle
9408 2
            $needle = self::clean($needle);
9409 2
            $haystack = self::clean($haystack);
9410
        }
9411
9412
        //
9413
        // fallback via mbstring
9414
        //
9415
9416 3
        if (self::$SUPPORT['mbstring'] === true) {
9417 3
            if ($encoding === 'UTF-8') {
9418 3
                return \mb_strrichr($haystack, $needle, $before_needle);
9419
            }
9420
9421 2
            return \mb_strrichr($haystack, $needle, $before_needle, $encoding);
9422
        }
9423
9424
        //
9425
        // fallback via vanilla php
9426
        //
9427
9428
        $needleTmp = self::substr($needle, 0, 1, $encoding);
9429
        if ($needleTmp === false) {
9430
            return false;
9431
        }
9432
        $needle = (string) $needleTmp;
9433
9434
        $pos = self::strripos($haystack, $needle, 0, $encoding);
9435
        if ($pos === false) {
9436
            return false;
9437
        }
9438
9439
        if ($before_needle) {
9440
            return self::substr($haystack, 0, $pos, $encoding);
9441
        }
9442
9443
        return self::substr($haystack, $pos, null, $encoding);
9444
    }
9445
9446
    /**
9447
     * Find position of last occurrence of a case-insensitive string.
9448
     *
9449
     * @param string     $haystack  <p>The string to look in.</p>
9450
     * @param int|string $needle    <p>The string to look for.</p>
9451
     * @param int        $offset    [optional] <p>Number of characters to ignore in the beginning or end.</p>
9452
     * @param string     $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
9453
     * @param bool       $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
9454
     *
9455
     * @return false|int
9456
     *                   The <strong>(int)</strong> numeric position of the last occurrence of needle in the haystack
9457
     *                   string.<br>If needle is not found, it returns false.
9458
     */
9459 3
    public static function strripos(
9460
        string $haystack,
9461
        $needle,
9462
        int $offset = 0,
9463
        string $encoding = 'UTF-8',
9464
        bool $cleanUtf8 = false
9465
    ) {
9466 3
        if ($haystack === '') {
9467
            return false;
9468
        }
9469
9470
        // iconv and mbstring do not support integer $needle
9471 3
        if ((int) $needle === $needle && $needle >= 0) {
9472
            $needle = (string) self::chr($needle);
9473
        }
9474 3
        $needle = (string) $needle;
9475
9476 3
        if ($needle === '') {
9477
            return false;
9478
        }
9479
9480 3
        if ($cleanUtf8 === true) {
9481
            // mb_strripos() && iconv_strripos() is not tolerant to invalid characters
9482 2
            $needle = self::clean($needle);
9483 2
            $haystack = self::clean($haystack);
9484
        }
9485
9486 3
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
9487 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
9488
        }
9489
9490
        //
9491
        // fallback via mbstrig
9492
        //
9493
9494 3
        if (self::$SUPPORT['mbstring'] === true) {
9495 3
            if ($encoding === 'UTF-8') {
9496 3
                return \mb_strripos($haystack, $needle, $offset);
9497
            }
9498
9499
            return \mb_strripos($haystack, $needle, $offset, $encoding);
9500
        }
9501
9502
        //
9503
        // fallback for binary || ascii only
9504
        //
9505
9506
        if (
9507
            $encoding === 'CP850'
9508
            ||
9509
            $encoding === 'ASCII'
9510
        ) {
9511
            return \strripos($haystack, $needle, $offset);
9512
        }
9513
9514
        if (
9515
            $encoding !== 'UTF-8'
9516
            &&
9517
            self::$SUPPORT['mbstring'] === false
9518
        ) {
9519
            \trigger_error('UTF8::strripos() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
9520
        }
9521
9522
        //
9523
        // fallback via intl
9524
        //
9525
9526
        if (
9527
            $encoding === 'UTF-8' // INFO: "grapheme_strripos()" can't handle other encodings
9528
            &&
9529
            $offset >= 0 // grapheme_strripos() can't handle negative offset
9530
            &&
9531
            self::$SUPPORT['intl'] === true
9532
        ) {
9533
            $returnTmp = \grapheme_strripos($haystack, $needle, $offset);
9534
            if ($returnTmp !== false) {
9535
                return $returnTmp;
9536
            }
9537
        }
9538
9539
        //
9540
        // fallback for ascii only
9541
        //
9542
9543
        if (self::is_ascii($haystack . $needle)) {
9544
            return \strripos($haystack, $needle, $offset);
9545
        }
9546
9547
        //
9548
        // fallback via vanilla php
9549
        //
9550
9551
        $haystack = self::strtocasefold($haystack, true, false, $encoding);
9552
        $needle = self::strtocasefold($needle, true, false, $encoding);
9553
9554
        return self::strrpos($haystack, $needle, $offset, $encoding, $cleanUtf8);
9555
    }
9556
9557
    /**
9558
     * Finds position of last occurrence of a string within another, case insensitive.
9559
     *
9560
     * @param string $haystack <p>
9561
     *                         The string from which to get the position of the last occurrence
9562
     *                         of needle.
9563
     *                         </p>
9564
     * @param string $needle   <p>
9565
     *                         The string to find in haystack.
9566
     *                         </p>
9567
     * @param int    $offset   [optional] <p>
9568
     *                         The position in haystack
9569
     *                         to start searching.
9570
     *                         </p>
9571
     *
9572
     * @return false|int return the numeric position of the last occurrence of needle in the
9573
     *                   haystack string, or false if needle is not found
9574
     */
9575
    public static function strripos_in_byte(string $haystack, string $needle, int $offset = 0)
9576
    {
9577
        if ($haystack === '' || $needle === '') {
9578
            return false;
9579
        }
9580
9581
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
9582
            // "mb_" is available if overload is used, so use it ...
9583
            return \mb_strripos($haystack, $needle, $offset, 'CP850'); // 8-BIT
9584
        }
9585
9586
        return \strripos($haystack, $needle, $offset);
9587
    }
9588
9589
    /**
9590
     * Find position of last occurrence of a string in a string.
9591
     *
9592
     * @see http://php.net/manual/en/function.mb-strrpos.php
9593
     *
9594
     * @param string     $haystack  <p>The string being checked, for the last occurrence of needle</p>
9595
     * @param int|string $needle    <p>The string to find in haystack.<br>Or a code point as int.</p>
9596
     * @param int        $offset    [optional] <p>May be specified to begin searching an arbitrary number of characters
9597
     *                              into the string. Negative values will stop searching at an arbitrary point prior to
9598
     *                              the end of the string.
9599
     *                              </p>
9600
     * @param string     $encoding  [optional] <p>Set the charset.</p>
9601
     * @param bool       $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
9602
     *
9603
     * @return false|int
9604
     *                   The <strong>(int)</strong> numeric position of the last occurrence of needle in the haystack
9605
     *                   string.<br>If needle is not found, it returns false.
9606
     */
9607 35
    public static function strrpos(
9608
        string $haystack,
9609
        $needle,
9610
        int $offset = 0,
9611
        string $encoding = 'UTF-8',
9612
        bool $cleanUtf8 = false
9613
    ) {
9614 35
        if ($haystack === '') {
9615 3
            return false;
9616
        }
9617
9618
        // iconv and mbstring do not support integer $needle
9619 34
        if ((int) $needle === $needle && $needle >= 0) {
9620 2
            $needle = (string) self::chr($needle);
9621
        }
9622 34
        $needle = (string) $needle;
9623
9624 34
        if ($needle === '') {
9625 2
            return false;
9626
        }
9627
9628 34
        if ($cleanUtf8 === true) {
9629
            // \mb_strrpos && iconv_strrpos is not tolerant to invalid characters
9630 4
            $needle = self::clean($needle);
9631 4
            $haystack = self::clean($haystack);
9632
        }
9633
9634 34
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
9635 8
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
9636
        }
9637
9638
        //
9639
        // fallback via mbstring
9640
        //
9641
9642 34
        if (self::$SUPPORT['mbstring'] === true) {
9643 34
            if ($encoding === 'UTF-8') {
9644 34
                return \mb_strrpos($haystack, $needle, $offset);
9645
            }
9646
9647 2
            return \mb_strrpos($haystack, $needle, $offset, $encoding);
9648
        }
9649
9650
        //
9651
        // fallback for binary || ascii only
9652
        //
9653
9654
        if (
9655
            $encoding === 'CP850'
9656
            ||
9657
            $encoding === 'ASCII'
9658
        ) {
9659
            return \strrpos($haystack, $needle, $offset);
9660
        }
9661
9662
        if (
9663
            $encoding !== 'UTF-8'
9664
            &&
9665
            self::$SUPPORT['mbstring'] === false
9666
        ) {
9667
            \trigger_error('UTF8::strrpos() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
9668
        }
9669
9670
        //
9671
        // fallback via intl
9672
        //
9673
9674
        if (
9675
            $offset >= 0 // grapheme_strrpos() can't handle negative offset
9676
            &&
9677
            $encoding === 'UTF-8' // INFO: "grapheme_strrpos()" can't handle other encodings
9678
            &&
9679
            self::$SUPPORT['intl'] === true
9680
        ) {
9681
            $returnTmp = \grapheme_strrpos($haystack, $needle, $offset);
9682
            if ($returnTmp !== false) {
9683
                return $returnTmp;
9684
            }
9685
        }
9686
9687
        //
9688
        // fallback for ascii only
9689
        //
9690
9691
        if (self::is_ascii($haystack . $needle)) {
9692
            return \strrpos($haystack, $needle, $offset);
9693
        }
9694
9695
        //
9696
        // fallback via vanilla php
9697
        //
9698
9699
        $haystackTmp = null;
9700
        if ($offset > 0) {
9701
            $haystackTmp = self::substr($haystack, $offset);
9702
        } elseif ($offset < 0) {
9703
            $haystackTmp = self::substr($haystack, 0, $offset);
9704
            $offset = 0;
9705
        }
9706
9707
        if ($haystackTmp !== null) {
9708
            if ($haystackTmp === false) {
9709
                $haystackTmp = '';
9710
            }
9711
            $haystack = (string) $haystackTmp;
9712
        }
9713
9714
        $pos = \strrpos($haystack, $needle);
9715
        if ($pos === false) {
9716
            return false;
9717
        }
9718
9719
        $strTmp = \substr($haystack, 0, $pos);
9720
        if ($strTmp === false) {
9721
            return false;
9722
        }
9723
9724
        return $offset + (int) self::strlen($strTmp);
9725
    }
9726
9727
    /**
9728
     * Find position of last occurrence of a string in a string.
9729
     *
9730
     * @param string $haystack <p>
9731
     *                         The string being checked, for the last occurrence
9732
     *                         of needle.
9733
     *                         </p>
9734
     * @param string $needle   <p>
9735
     *                         The string to find in haystack.
9736
     *                         </p>
9737
     * @param int    $offset   [optional] May be specified to begin searching an arbitrary number of characters into
9738
     *                         the string. Negative values will stop searching at an arbitrary point
9739
     *                         prior to the end of the string.
9740
     *
9741
     * @return false|int The numeric position of the last occurrence of needle in the
9742
     *                   haystack string. If needle is not found, it returns false.
9743
     */
9744
    public static function strrpos_in_byte(string $haystack, string $needle, int $offset = 0)
9745
    {
9746
        if ($haystack === '' || $needle === '') {
9747
            return false;
9748
        }
9749
9750
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
9751
            // "mb_" is available if overload is used, so use it ...
9752
            return \mb_strrpos($haystack, $needle, $offset, 'CP850'); // 8-BIT
9753
        }
9754
9755
        return \strrpos($haystack, $needle, $offset);
9756
    }
9757
9758
    /**
9759
     * Finds the length of the initial segment of a string consisting entirely of characters contained within a given
9760
     * mask.
9761
     *
9762
     * @param string $str      <p>The input string.</p>
9763
     * @param string $mask     <p>The mask of chars</p>
9764
     * @param int    $offset   [optional]
9765
     * @param int    $length   [optional]
9766
     * @param string $encoding [optional] <p>Set the charset.</p>
9767
     *
9768
     * @return false|int
9769
     */
9770 10
    public static function strspn(
9771
        string $str,
9772
        string $mask,
9773
        int $offset = 0,
9774
        int $length = null,
9775
        string $encoding = 'UTF-8'
9776
    ) {
9777 10
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
9778
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
9779
        }
9780
9781 10
        if ($offset || $length !== null) {
9782 2
            if ($encoding === 'UTF-8') {
9783 2
                if ($length === null) {
9784
                    $str = (string) \mb_substr($str, $offset);
9785
                } else {
9786 2
                    $str = (string) \mb_substr($str, $offset, $length);
9787
                }
9788
            } else {
9789
                $str = (string) self::substr($str, $offset, $length, $encoding);
9790
            }
9791
        }
9792
9793 10
        if ($str === '' || $mask === '') {
9794 2
            return 0;
9795
        }
9796
9797 8
        $matches = [];
9798
9799 8
        return \preg_match('/^' . self::rxClass($mask) . '+/u', $str, $matches) ? (int) self::strlen($matches[0], $encoding) : 0;
9800
    }
9801
9802
    /**
9803
     * Returns part of haystack string from the first occurrence of needle to the end of haystack.
9804
     *
9805
     * @param string $haystack      <p>The input string. Must be valid UTF-8.</p>
9806
     * @param string $needle        <p>The string to look for. Must be valid UTF-8.</p>
9807
     * @param bool   $before_needle [optional] <p>
9808
     *                              If <b>TRUE</b>, strstr() returns the part of the
9809
     *                              haystack before the first occurrence of the needle (excluding the needle).
9810
     *                              </p>
9811
     * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
9812
     * @param bool   $cleanUtf8     [optional] <p>Remove non UTF-8 chars from the string.</p>
9813
     *
9814
     * @return false|string
9815
     *                      A sub-string,<br>or <strong>false</strong> if needle is not found
9816
     */
9817 3
    public static function strstr(
9818
        string $haystack,
9819
        string $needle,
9820
        bool $before_needle = false,
9821
        string $encoding = 'UTF-8',
9822
        $cleanUtf8 = false
9823
    ) {
9824 3
        if ($haystack === '' || $needle === '') {
9825 2
            return false;
9826
        }
9827
9828 3
        if ($cleanUtf8 === true) {
9829
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
9830
            // if invalid characters are found in $haystack before $needle
9831
            $needle = self::clean($needle);
9832
            $haystack = self::clean($haystack);
9833
        }
9834
9835 3
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
9836 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
9837
        }
9838
9839
        //
9840
        // fallback via mbstring
9841
        //
9842
9843 3
        if (self::$SUPPORT['mbstring'] === true) {
9844 3
            if ($encoding === 'UTF-8') {
9845 3
                return \mb_strstr($haystack, $needle, $before_needle);
9846
            }
9847
9848 2
            return \mb_strstr($haystack, $needle, $before_needle, $encoding);
9849
        }
9850
9851
        //
9852
        // fallback for binary || ascii only
9853
        //
9854
9855
        if (
9856
            $encoding === 'CP850'
9857
            ||
9858
            $encoding === 'ASCII'
9859
        ) {
9860
            return \strstr($haystack, $needle, $before_needle);
9861
        }
9862
9863
        if (
9864
            $encoding !== 'UTF-8'
9865
            &&
9866
            self::$SUPPORT['mbstring'] === false
9867
        ) {
9868
            \trigger_error('UTF8::strstr() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
9869
        }
9870
9871
        //
9872
        // fallback via intl
9873
        //
9874
9875
        if (
9876
            $encoding === 'UTF-8' // INFO: "grapheme_strstr()" can't handle other encodings
9877
            &&
9878
            self::$SUPPORT['intl'] === true
9879
        ) {
9880
            $returnTmp = \grapheme_strstr($haystack, $needle, $before_needle);
9881
            if ($returnTmp !== false) {
9882
                return $returnTmp;
9883
            }
9884
        }
9885
9886
        //
9887
        // fallback for ascii only
9888
        //
9889
9890
        if (self::is_ascii($haystack . $needle)) {
9891
            return \strstr($haystack, $needle, $before_needle);
9892
        }
9893
9894
        //
9895
        // fallback via vanilla php
9896
        //
9897
9898
        \preg_match('/^(.*?)' . \preg_quote($needle, '/') . '/us', $haystack, $match);
9899
9900
        if (!isset($match[1])) {
9901
            return false;
9902
        }
9903
9904
        if ($before_needle) {
9905
            return $match[1];
9906
        }
9907
9908
        return self::substr($haystack, (int) self::strlen($match[1]));
9909
    }
9910
9911
    /**
9912
     *  * Finds first occurrence of a string within another.
9913
     *
9914
     * @param string $haystack      <p>
9915
     *                              The string from which to get the first occurrence
9916
     *                              of needle.
9917
     *                              </p>
9918
     * @param string $needle        <p>
9919
     *                              The string to find in haystack.
9920
     *                              </p>
9921
     * @param bool   $before_needle [optional] <p>
9922
     *                              Determines which portion of haystack
9923
     *                              this function returns.
9924
     *                              If set to true, it returns all of haystack
9925
     *                              from the beginning to the first occurrence of needle.
9926
     *                              If set to false, it returns all of haystack
9927
     *                              from the first occurrence of needle to the end,
9928
     *                              </p>
9929
     *
9930
     * @return false|string the portion of haystack,
9931
     *                      or false if needle is not found
9932
     */
9933
    public static function strstr_in_byte(string $haystack, string $needle, bool $before_needle = false)
9934
    {
9935
        if ($haystack === '' || $needle === '') {
9936
            return false;
9937
        }
9938
9939
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
9940
            // "mb_" is available if overload is used, so use it ...
9941
            return \mb_strstr($haystack, $needle, $before_needle, 'CP850'); // 8-BIT
9942
        }
9943
9944
        return \strstr($haystack, $needle, $before_needle);
9945
    }
9946
9947
    /**
9948
     * Unicode transformation for case-less matching.
9949
     *
9950
     * @see http://unicode.org/reports/tr21/tr21-5.html
9951
     *
9952
     * @param string      $str       <p>The input string.</p>
9953
     * @param bool        $full      [optional] <p>
9954
     *                               <b>true</b>, replace full case folding chars (default)<br>
9955
     *                               <b>false</b>, use only limited static array [UTF8::$COMMON_CASE_FOLD]
9956
     *                               </p>
9957
     * @param bool        $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
9958
     * @param string      $encoding  [optional] <p>Set the charset.</p>
9959
     * @param string|null $lang      [optional] <p>Set the language for special cases: az, el, lt, tr</p>
9960
     * @param bool        $lower     [optional] <p>Use lowercase string, otherwise use uppercase string. PS: uppercase
9961
     *                               is for some languages better ...</p>
9962
     *
9963
     * @return string
9964
     */
9965 32
    public static function strtocasefold(
9966
        string $str,
9967
        bool $full = true,
9968
        bool $cleanUtf8 = false,
9969
        string $encoding = 'UTF-8',
9970
        string $lang = null,
9971
        $lower = true
9972
    ): string {
9973 32
        if ($str === '') {
9974 5
            return '';
9975
        }
9976
9977 31
        if ($cleanUtf8 === true) {
9978
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
9979
            // if invalid characters are found in $haystack before $needle
9980 2
            $str = self::clean($str);
9981
        }
9982
9983 31
        $str = self::fixStrCaseHelper($str, $lower, $full);
9984
9985 31
        if ($lang === null && $encoding === 'UTF-8') {
9986 31
            if ($lower === true) {
9987 2
                return \mb_strtolower($str);
9988
            }
9989
9990 29
            return \mb_strtoupper($str);
9991
        }
9992
9993 2
        if ($lower === true) {
9994
            return self::strtolower($str, $encoding, false, $lang);
9995
        }
9996
9997 2
        return self::strtoupper($str, $encoding, false, $lang);
9998
    }
9999
10000
    /**
10001
     * Make a string lowercase.
10002
     *
10003
     * @see http://php.net/manual/en/function.mb-strtolower.php
10004
     *
10005
     * @param string      $str                   <p>The string being lowercased.</p>
10006
     * @param string      $encoding              [optional] <p>Set the charset for e.g. "mb_" function</p>
10007
     * @param bool        $cleanUtf8             [optional] <p>Remove non UTF-8 chars from the string.</p>
10008
     * @param string|null $lang                  [optional] <p>Set the language for special cases: az, el, lt, tr</p>
10009
     * @param bool        $tryToKeepStringLength [optional] <p>true === try to keep the string length: e.g. ẞ -> ß</p>
10010
     *
10011
     * @return string
10012
     *                <p>String with all alphabetic characters converted to lowercase.</p>
10013
     */
10014 73
    public static function strtolower(
10015
        $str,
10016
        string $encoding = 'UTF-8',
10017
        bool $cleanUtf8 = false,
10018
        string $lang = null,
10019
        bool $tryToKeepStringLength = false
10020
    ): string {
10021
        // init
10022 73
        $str = (string) $str;
10023
10024 73
        if ($str === '') {
10025 1
            return '';
10026
        }
10027
10028 72
        if ($cleanUtf8 === true) {
10029
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
10030
            // if invalid characters are found in $haystack before $needle
10031 2
            $str = self::clean($str);
10032
        }
10033
10034
        // hack for old php version or for the polyfill ...
10035 72
        if ($tryToKeepStringLength === true) {
10036
            $str = self::fixStrCaseHelper($str, true);
10037
        }
10038
10039 72
        if ($lang === null && $encoding === 'UTF-8') {
10040 13
            return \mb_strtolower($str);
10041
        }
10042
10043 61
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
10044
10045 61
        if ($lang !== null) {
10046 2
            if (self::$SUPPORT['intl'] === true) {
10047 2
                if (self::$INTL_TRANSLITERATOR_LIST === null) {
10048
                    self::$INTL_TRANSLITERATOR_LIST = self::getData('transliterator_list');
10049
                }
10050
10051 2
                $langCode = $lang . '-Lower';
10052 2
                if (!\in_array($langCode, self::$INTL_TRANSLITERATOR_LIST, true)) {
10053
                    \trigger_error('UTF8::strtolower() cannot handle special language: ' . $lang, \E_USER_WARNING);
10054
10055
                    $langCode = 'Any-Lower';
10056
                }
10057
10058
                /** @noinspection PhpComposerExtensionStubsInspection */
10059
                /** @noinspection UnnecessaryCastingInspection */
10060 2
                return (string) \transliterator_transliterate($langCode, $str);
10061
            }
10062
10063
            \trigger_error('UTF8::strtolower() without intl cannot handle the "lang" parameter: ' . $lang, \E_USER_WARNING);
10064
        }
10065
10066
        // always fallback via symfony polyfill
10067 61
        return \mb_strtolower($str, $encoding);
10068
    }
10069
10070
    /**
10071
     * Make a string uppercase.
10072
     *
10073
     * @see http://php.net/manual/en/function.mb-strtoupper.php
10074
     *
10075
     * @param string      $str                   <p>The string being uppercased.</p>
10076
     * @param string      $encoding              [optional] <p>Set the charset.</p>
10077
     * @param bool        $cleanUtf8             [optional] <p>Remove non UTF-8 chars from the string.</p>
10078
     * @param string|null $lang                  [optional] <p>Set the language for special cases: az, el, lt, tr</p>
10079
     * @param bool        $tryToKeepStringLength [optional] <p>true === try to keep the string length: e.g. ẞ -> ß</p>
10080
     *
10081
     * @return string
10082
     *                <p>String with all alphabetic characters converted to uppercase.</p>
10083
     */
10084 17
    public static function strtoupper(
10085
        $str,
10086
        string $encoding = 'UTF-8',
10087
        bool $cleanUtf8 = false,
10088
        string $lang = null,
10089
        bool $tryToKeepStringLength = false
10090
    ): string {
10091
        // init
10092 17
        $str = (string) $str;
10093
10094 17
        if ($str === '') {
10095 1
            return '';
10096
        }
10097
10098 16
        if ($cleanUtf8 === true) {
10099
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
10100
            // if invalid characters are found in $haystack before $needle
10101 2
            $str = self::clean($str);
10102
        }
10103
10104
        // hack for old php version or for the polyfill ...
10105 16
        if ($tryToKeepStringLength === true) {
10106 2
            $str = self::fixStrCaseHelper($str, false);
10107
        }
10108
10109 16
        if ($lang === null && $encoding === 'UTF-8') {
10110 8
            return \mb_strtoupper($str);
10111
        }
10112
10113 10
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
10114
10115 10
        if ($lang !== null) {
10116 2
            if (self::$SUPPORT['intl'] === true) {
10117 2
                if (self::$INTL_TRANSLITERATOR_LIST === null) {
10118
                    self::$INTL_TRANSLITERATOR_LIST = self::getData('transliterator_list');
10119
                }
10120
10121 2
                $langCode = $lang . '-Upper';
10122 2
                if (!\in_array($langCode, self::$INTL_TRANSLITERATOR_LIST, true)) {
10123
                    \trigger_error('UTF8::strtoupper() without intl for special language: ' . $lang, \E_USER_WARNING);
10124
10125
                    $langCode = 'Any-Upper';
10126
                }
10127
10128
                /** @noinspection PhpComposerExtensionStubsInspection */
10129
                /** @noinspection UnnecessaryCastingInspection */
10130 2
                return (string) \transliterator_transliterate($langCode, $str);
10131
            }
10132
10133
            \trigger_error('UTF8::strtolower() without intl cannot handle the "lang"-parameter: ' . $lang, \E_USER_WARNING);
10134
        }
10135
10136
        // always fallback via symfony polyfill
10137 10
        return \mb_strtoupper($str, $encoding);
10138
    }
10139
10140
    /**
10141
     * Translate characters or replace sub-strings.
10142
     *
10143
     * @see http://php.net/manual/en/function.strtr.php
10144
     *
10145
     * @param string          $str  <p>The string being translated.</p>
10146
     * @param string|string[] $from <p>The string replacing from.</p>
10147
     * @param string|string[] $to   [optional] <p>The string being translated to to.</p>
10148
     *
10149
     * @return string
10150
     *                This function returns a copy of str, translating all occurrences of each character in from to the
10151
     *                corresponding character in to
10152
     */
10153 2
    public static function strtr(string $str, $from, $to = ''): string
10154
    {
10155 2
        if ($str === '') {
10156
            return '';
10157
        }
10158
10159 2
        if ($from === $to) {
10160
            return $str;
10161
        }
10162
10163 2
        if ($to !== '') {
10164 2
            $from = self::str_split($from);
10165 2
            $to = self::str_split($to);
10166 2
            $countFrom = \count($from);
10167 2
            $countTo = \count($to);
10168
10169 2
            if ($countFrom > $countTo) {
10170 2
                $from = \array_slice($from, 0, $countTo);
10171 2
            } elseif ($countFrom < $countTo) {
10172 2
                $to = \array_slice($to, 0, $countFrom);
10173
            }
10174
10175 2
            $from = \array_combine($from, $to);
10176 2
            if ($from === false) {
10177
                throw new \InvalidArgumentException('The number of elements for each array isn\'t equal or the arrays are empty: (from: ' . \print_r($from, true) . ' | to: ' . \print_r($to, true) . ')');
10178
            }
10179
        }
10180
10181 2
        if (\is_string($from)) {
10182 2
            return \str_replace($from, '', $str);
10183
        }
10184
10185 2
        return \strtr($str, $from);
10186
    }
10187
10188
    /**
10189
     * Return the width of a string.
10190
     *
10191
     * @param string $str       <p>The input string.</p>
10192
     * @param string $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
10193
     * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
10194
     *
10195
     * @return int
10196
     */
10197 2
    public static function strwidth(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false): int
10198
    {
10199 2
        if ($str === '') {
10200 2
            return 0;
10201
        }
10202
10203 2
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
10204 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
10205
        }
10206
10207 2
        if ($cleanUtf8 === true) {
10208
            // iconv and mbstring are not tolerant to invalid encoding
10209
            // further, their behaviour is inconsistent with that of PHP's substr
10210 2
            $str = self::clean($str);
10211
        }
10212
10213
        //
10214
        // fallback via mbstring
10215
        //
10216
10217 2
        if (self::$SUPPORT['mbstring'] === true) {
10218 2
            if ($encoding === 'UTF-8') {
10219 2
                return \mb_strwidth($str);
10220
            }
10221
10222
            return \mb_strwidth($str, $encoding);
10223
        }
10224
10225
        //
10226
        // fallback via vanilla php
10227
        //
10228
10229
        if ($encoding !== 'UTF-8') {
10230
            $str = self::encode('UTF-8', $str, false, $encoding);
10231
        }
10232
10233
        $wide = 0;
10234
        $str = (string) \preg_replace('/[\x{1100}-\x{115F}\x{2329}\x{232A}\x{2E80}-\x{303E}\x{3040}-\x{A4CF}\x{AC00}-\x{D7A3}\x{F900}-\x{FAFF}\x{FE10}-\x{FE19}\x{FE30}-\x{FE6F}\x{FF00}-\x{FF60}\x{FFE0}-\x{FFE6}\x{20000}-\x{2FFFD}\x{30000}-\x{3FFFD}]/u', '', $str, -1, $wide);
10235
10236
        return ($wide << 1) + (int) self::strlen($str, 'UTF-8');
10237
    }
10238
10239
    /**
10240
     * Get part of a string.
10241
     *
10242
     * @see http://php.net/manual/en/function.mb-substr.php
10243
     *
10244
     * @param string $str       <p>The string being checked.</p>
10245
     * @param int    $offset    <p>The first position used in str.</p>
10246
     * @param int    $length    [optional] <p>The maximum length of the returned string.</p>
10247
     * @param string $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
10248
     * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
10249
     *
10250
     * @return false|string
10251
     *                      The portion of <i>str</i> specified by the <i>offset</i> and
10252
     *                      <i>length</i> parameters.</p><p>If <i>str</i> is shorter than <i>offset</i>
10253
     *                      characters long, <b>FALSE</b> will be returned.
10254
     */
10255 172
    public static function substr(
10256
        string $str,
10257
        int $offset = 0,
10258
        int $length = null,
10259
        string $encoding = 'UTF-8',
10260
        bool $cleanUtf8 = false
10261
    ) {
10262
        // empty string
10263 172
        if ($str === '' || $length === 0) {
10264 8
            return '';
10265
        }
10266
10267 168
        if ($cleanUtf8 === true) {
10268
            // iconv and mbstring are not tolerant to invalid encoding
10269
            // further, their behaviour is inconsistent with that of PHP's substr
10270 2
            $str = self::clean($str);
10271
        }
10272
10273
        // whole string
10274 168
        if (!$offset && $length === null) {
10275 7
            return $str;
10276
        }
10277
10278 163
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
10279 19
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
10280
        }
10281
10282
        //
10283
        // fallback via mbstring
10284
        //
10285
10286 163
        if (self::$SUPPORT['mbstring'] === true) {
10287 161
            if ($encoding === 'UTF-8') {
10288 161
                if ($length === null) {
10289 64
                    return \mb_substr($str, $offset);
10290
                }
10291
10292 102
                return \mb_substr($str, $offset, $length);
10293
            }
10294
10295
            return self::substr($str, $offset, $length, $encoding);
10296
        }
10297
10298
        //
10299
        // fallback for binary || ascii only
10300
        //
10301
10302
        if (
10303 4
            $encoding === 'CP850'
10304
            ||
10305 4
            $encoding === 'ASCII'
10306
        ) {
10307
            if ($length === null) {
10308
                return \substr($str, $offset);
10309
            }
10310
10311
            return \substr($str, $offset, $length);
10312
        }
10313
10314
        // otherwise we need the string-length
10315 4
        $str_length = 0;
10316 4
        if ($offset || $length === null) {
10317 4
            $str_length = self::strlen($str, $encoding);
10318
        }
10319
10320
        // e.g.: invalid chars + mbstring not installed
10321 4
        if ($str_length === false) {
10322
            return false;
10323
        }
10324
10325
        // empty string
10326 4
        if ($offset === $str_length && !$length) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $length of type integer|null is loosely compared to false; this is ambiguous if the integer can be 0. You might want to explicitly use === null instead.

In PHP, under loose comparison (like ==, or !=, or switch conditions), values of different types might be equal.

For integer values, zero is a special case, in particular the following results might be unexpected:

0   == false // true
0   == null  // true
123 == false // false
123 == null  // false

// It is often better to use strict comparison
0 === false // false
0 === null  // false
Loading history...
10327
            return '';
10328
        }
10329
10330
        // impossible
10331 4
        if ($offset && $offset > $str_length) {
10332
            return '';
10333
        }
10334
10335 4
        if ($length === null) {
10336 4
            $length = (int) $str_length;
10337
        } else {
10338 2
            $length = (int) $length;
10339
        }
10340
10341
        if (
10342 4
            $encoding !== 'UTF-8'
10343
            &&
10344 4
            self::$SUPPORT['mbstring'] === false
10345
        ) {
10346 2
            \trigger_error('UTF8::substr() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
10347
        }
10348
10349
        //
10350
        // fallback via intl
10351
        //
10352
10353
        if (
10354 4
            $encoding === 'UTF-8' // INFO: "grapheme_substr()" can't handle other encodings
10355
            &&
10356 4
            $offset >= 0 // grapheme_substr() can't handle negative offset
10357
            &&
10358 4
            self::$SUPPORT['intl'] === true
10359
        ) {
10360
            $returnTmp = \grapheme_substr($str, $offset, $length);
10361
            if ($returnTmp !== false) {
10362
                return $returnTmp;
10363
            }
10364
        }
10365
10366
        //
10367
        // fallback via iconv
10368
        //
10369
10370
        if (
10371 4
            $length >= 0 // "iconv_substr()" can't handle negative length
10372
            &&
10373 4
            self::$SUPPORT['iconv'] === true
10374
        ) {
10375
            $returnTmp = \iconv_substr($str, $offset, $length);
10376
            if ($returnTmp !== false) {
10377
                return $returnTmp;
10378
            }
10379
        }
10380
10381
        //
10382
        // fallback for ascii only
10383
        //
10384
10385 4
        if (self::is_ascii($str)) {
10386
            return \substr($str, $offset, $length);
10387
        }
10388
10389
        //
10390
        // fallback via vanilla php
10391
        //
10392
10393
        // split to array, and remove invalid characters
10394 4
        $array = self::str_split($str);
10395
10396
        // extract relevant part, and join to make sting again
10397 4
        return \implode('', \array_slice($array, $offset, $length));
10398
    }
10399
10400
    /**
10401
     * Binary safe comparison of two strings from an offset, up to length characters.
10402
     *
10403
     * @param string   $str1               <p>The main string being compared.</p>
10404
     * @param string   $str2               <p>The secondary string being compared.</p>
10405
     * @param int      $offset             [optional] <p>The start position for the comparison. If negative, it starts
10406
     *                                     counting from the end of the string.</p>
10407
     * @param int|null $length             [optional] <p>The length of the comparison. The default value is the largest
10408
     *                                     of the length of the str compared to the length of main_str less the
10409
     *                                     offset.</p>
10410
     * @param bool     $case_insensitivity [optional] <p>If case_insensitivity is TRUE, comparison is case
10411
     *                                     insensitive.</p>
10412
     * @param string   $encoding           [optional] <p>Set the charset for e.g. "mb_" function</p>
10413
     *
10414
     * @return int
10415
     *             <strong>&lt; 0</strong> if str1 is less than str2;<br>
10416
     *             <strong>&gt; 0</strong> if str1 is greater than str2,<br>
10417
     *             <strong>0</strong> if they are equal
10418
     */
10419 2
    public static function substr_compare(
10420
        string $str1,
10421
        string $str2,
10422
        int $offset = 0,
10423
        int $length = null,
10424
        bool $case_insensitivity = false,
10425
        string $encoding = 'UTF-8'
10426
    ): int {
10427
        if (
10428 2
            $offset !== 0
10429
            ||
10430 2
            $length !== null
10431
        ) {
10432 2
            if ($encoding === 'UTF-8') {
10433 2
                if ($length === null) {
10434 2
                    $str1 = (string) \mb_substr($str1, $offset);
10435
                } else {
10436 2
                    $str1 = (string) \mb_substr($str1, $offset, $length);
10437
                }
10438 2
                $str2 = (string) \mb_substr($str2, 0, (int) self::strlen($str1));
10439
            } else {
10440
                $encoding = self::normalize_encoding($encoding, 'UTF-8');
10441
10442
                $str1 = (string) self::substr($str1, $offset, $length, $encoding);
10443
                $str2 = (string) self::substr($str2, 0, (int) self::strlen($str1), $encoding);
10444
            }
10445
        }
10446
10447 2
        if ($case_insensitivity === true) {
10448 2
            return self::strcasecmp($str1, $str2, $encoding);
10449
        }
10450
10451 2
        return self::strcmp($str1, $str2);
10452
    }
10453
10454
    /**
10455
     * Count the number of substring occurrences.
10456
     *
10457
     * @see http://php.net/manual/en/function.substr-count.php
10458
     *
10459
     * @param string $haystack  <p>The string to search in.</p>
10460
     * @param string $needle    <p>The substring to search for.</p>
10461
     * @param int    $offset    [optional] <p>The offset where to start counting.</p>
10462
     * @param int    $length    [optional] <p>
10463
     *                          The maximum length after the specified offset to search for the
10464
     *                          substring. It outputs a warning if the offset plus the length is
10465
     *                          greater than the haystack length.
10466
     *                          </p>
10467
     * @param string $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
10468
     * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
10469
     *
10470
     * @return false|int this functions returns an integer or false if there isn't a string
10471
     */
10472 5
    public static function substr_count(
10473
        string $haystack,
10474
        string $needle,
10475
        int $offset = 0,
10476
        int $length = null,
10477
        string $encoding = 'UTF-8',
10478
        bool $cleanUtf8 = false
10479
    ) {
10480 5
        if ($haystack === '' || $needle === '') {
10481 2
            return false;
10482
        }
10483
10484 5
        if ($length === 0) {
10485 2
            return 0;
10486
        }
10487
10488 5
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
10489 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
10490
        }
10491
10492 5
        if ($cleanUtf8 === true) {
10493
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
10494
            // if invalid characters are found in $haystack before $needle
10495
            $needle = self::clean($needle);
10496
            $haystack = self::clean($haystack);
10497
        }
10498
10499 5
        if ($offset || $length > 0) {
10500 2
            if ($length === null) {
10501 2
                $lengthTmp = self::strlen($haystack, $encoding);
10502 2
                if ($lengthTmp === false) {
10503
                    return false;
10504
                }
10505 2
                $length = (int) $lengthTmp;
10506
            }
10507
10508 2
            if ($encoding === 'UTF-8') {
10509 2
                $haystack = (string) \mb_substr($haystack, $offset, $length);
10510
            } else {
10511 2
                $haystack = (string) \mb_substr($haystack, $offset, $length, $encoding);
10512
            }
10513
        }
10514
10515
        if (
10516 5
            $encoding !== 'UTF-8'
10517
            &&
10518 5
            self::$SUPPORT['mbstring'] === false
10519
        ) {
10520
            \trigger_error('UTF8::substr_count() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
10521
        }
10522
10523 5
        if (self::$SUPPORT['mbstring'] === true) {
10524 5
            if ($encoding === 'UTF-8') {
10525 5
                return \mb_substr_count($haystack, $needle);
10526
            }
10527
10528 2
            return \mb_substr_count($haystack, $needle, $encoding);
10529
        }
10530
10531
        \preg_match_all('/' . \preg_quote($needle, '/') . '/us', $haystack, $matches, \PREG_SET_ORDER);
10532
10533
        return \count($matches);
10534
    }
10535
10536
    /**
10537
     * Count the number of substring occurrences.
10538
     *
10539
     * @param string $haystack <p>
10540
     *                         The string being checked.
10541
     *                         </p>
10542
     * @param string $needle   <p>
10543
     *                         The string being found.
10544
     *                         </p>
10545
     * @param int    $offset   [optional] <p>
10546
     *                         The offset where to start counting
10547
     *                         </p>
10548
     * @param int    $length   [optional] <p>
10549
     *                         The maximum length after the specified offset to search for the
10550
     *                         substring. It outputs a warning if the offset plus the length is
10551
     *                         greater than the haystack length.
10552
     *                         </p>
10553
     *
10554
     * @return false|int the number of times the
10555
     *                   needle substring occurs in the
10556
     *                   haystack string
10557
     */
10558
    public static function substr_count_in_byte(
10559
        string $haystack,
10560
        string $needle,
10561
        int $offset = 0,
10562
        int $length = null
10563
    ) {
10564
        if ($haystack === '' || $needle === '') {
10565
            return 0;
10566
        }
10567
10568
        if (
10569
            ($offset || $length !== null)
10570
            &&
10571
            self::$SUPPORT['mbstring_func_overload'] === true
10572
        ) {
10573
            if ($length === null) {
10574
                $lengthTmp = self::strlen($haystack);
10575
                if ($lengthTmp === false) {
10576
                    return false;
10577
                }
10578
                $length = (int) $lengthTmp;
10579
            }
10580
10581
            if (
10582
                (
10583
                    $length !== 0
10584
                    &&
10585
                    $offset !== 0
10586
                )
10587
                &&
10588
                ($length + $offset) <= 0
10589
                &&
10590
                Bootup::is_php('7.1') === false // output from "substr_count()" have changed in PHP 7.1
10591
            ) {
10592
                return false;
10593
            }
10594
10595
            $haystackTmp = \substr($haystack, $offset, $length);
10596
            if ($haystackTmp === false) {
10597
                $haystackTmp = '';
10598
            }
10599
            $haystack = (string) $haystackTmp;
10600
        }
10601
10602
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
10603
            // "mb_" is available if overload is used, so use it ...
10604
            return \mb_substr_count($haystack, $needle, 'CP850'); // 8-BIT
10605
        }
10606
10607
        if ($length === null) {
10608
            return \substr_count($haystack, $needle, $offset);
10609
        }
10610
10611
        return \substr_count($haystack, $needle, $offset, $length);
10612
    }
10613
10614
    /**
10615
     * Returns the number of occurrences of $substring in the given string.
10616
     * By default, the comparison is case-sensitive, but can be made insensitive
10617
     * by setting $caseSensitive to false.
10618
     *
10619
     * @param string $str           <p>The input string.</p>
10620
     * @param string $substring     <p>The substring to search for.</p>
10621
     * @param bool   $caseSensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
10622
     * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
10623
     *
10624
     * @return int
10625
     */
10626 15
    public static function substr_count_simple(
10627
        string $str,
10628
        string $substring,
10629
        bool $caseSensitive = true,
10630
        string $encoding = 'UTF-8'
10631
    ): int {
10632 15
        if ($str === '' || $substring === '') {
10633 2
            return 0;
10634
        }
10635
10636 13
        if ($encoding === 'UTF-8') {
10637 7
            if ($caseSensitive) {
10638
                return (int) \mb_substr_count($str, $substring);
10639
            }
10640
10641 7
            return (int) \mb_substr_count(
10642 7
                \mb_strtoupper($str),
10643 7
                \mb_strtoupper($substring)
10644
10645
            );
10646
        }
10647
10648 6
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
10649
10650 6
        if ($caseSensitive) {
10651 3
            return (int) \mb_substr_count($str, $substring, $encoding);
10652
        }
10653
10654 3
        return (int) \mb_substr_count(
10655 3
            self::strtocasefold($str, true, false, $encoding, null, false),
10656 3
            self::strtocasefold($substring, true, false, $encoding, null, false),
10657 3
            $encoding
10658
        );
10659
    }
10660
10661
    /**
10662
     * Removes an prefix ($needle) from start of the string ($haystack), case insensitive.
10663
     *
10664
     * @param string $haystack <p>The string to search in.</p>
10665
     * @param string $needle   <p>The substring to search for.</p>
10666
     *
10667
     * @return string return the sub-string
10668
     */
10669 2
    public static function substr_ileft(string $haystack, string $needle): string
10670
    {
10671 2
        if ($haystack === '') {
10672 2
            return '';
10673
        }
10674
10675 2
        if ($needle === '') {
10676 2
            return $haystack;
10677
        }
10678
10679 2
        if (self::str_istarts_with($haystack, $needle) === true) {
10680 2
            $haystack = (string) \mb_substr($haystack, (int) self::strlen($needle));
10681
        }
10682
10683 2
        return $haystack;
10684
    }
10685
10686
    /**
10687
     * Get part of a string process in bytes.
10688
     *
10689
     * @param string $str    <p>The string being checked.</p>
10690
     * @param int    $offset <p>The first position used in str.</p>
10691
     * @param int    $length [optional] <p>The maximum length of the returned string.</p>
10692
     *
10693
     * @return false|string
10694
     *                      The portion of <i>str</i> specified by the <i>offset</i> and
10695
     *                      <i>length</i> parameters.</p><p>If <i>str</i> is shorter than <i>offset</i>
10696
     *                      characters long, <b>FALSE</b> will be returned.
10697
     */
10698
    public static function substr_in_byte(string $str, int $offset = 0, int $length = null)
10699
    {
10700
        // empty string
10701
        if ($str === '' || $length === 0) {
10702
            return '';
10703
        }
10704
10705
        // whole string
10706
        if (!$offset && $length === null) {
10707
            return $str;
10708
        }
10709
10710
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
10711
            // "mb_" is available if overload is used, so use it ...
10712
            return \mb_substr($str, $offset, $length, 'CP850'); // 8-BIT
10713
        }
10714
10715
        return \substr($str, $offset, $length ?? 2147483647);
10716
    }
10717
10718
    /**
10719
     * Removes an suffix ($needle) from end of the string ($haystack), case insensitive.
10720
     *
10721
     * @param string $haystack <p>The string to search in.</p>
10722
     * @param string $needle   <p>The substring to search for.</p>
10723
     *
10724
     * @return string return the sub-string
10725
     */
10726 2
    public static function substr_iright(string $haystack, string $needle): string
10727
    {
10728 2
        if ($haystack === '') {
10729 2
            return '';
10730
        }
10731
10732 2
        if ($needle === '') {
10733 2
            return $haystack;
10734
        }
10735
10736 2
        if (self::str_iends_with($haystack, $needle) === true) {
10737 2
            $haystack = (string) \mb_substr($haystack, 0, (int) self::strlen($haystack) - (int) self::strlen($needle));
10738
        }
10739
10740 2
        return $haystack;
10741
    }
10742
10743
    /**
10744
     * Removes an prefix ($needle) from start of the string ($haystack).
10745
     *
10746
     * @param string $haystack <p>The string to search in.</p>
10747
     * @param string $needle   <p>The substring to search for.</p>
10748
     *
10749
     * @return string return the sub-string
10750
     */
10751 2
    public static function substr_left(string $haystack, string $needle): string
10752
    {
10753 2
        if ($haystack === '') {
10754 2
            return '';
10755
        }
10756
10757 2
        if ($needle === '') {
10758 2
            return $haystack;
10759
        }
10760
10761 2
        if (self::str_starts_with($haystack, $needle) === true) {
10762 2
            $haystack = (string) \mb_substr($haystack, (int) self::strlen($needle));
10763
        }
10764
10765 2
        return $haystack;
10766
    }
10767
10768
    /**
10769
     * Replace text within a portion of a string.
10770
     *
10771
     * source: https://gist.github.com/stemar/8287074
10772
     *
10773
     * @param string|string[] $str         <p>The input string or an array of stings.</p>
10774
     * @param string|string[] $replacement <p>The replacement string or an array of stings.</p>
10775
     * @param int|int[]       $offset      <p>
10776
     *                                     If start is positive, the replacing will begin at the start'th offset
10777
     *                                     into string.
10778
     *                                     <br><br>
10779
     *                                     If start is negative, the replacing will begin at the start'th character
10780
     *                                     from the end of string.
10781
     *                                     </p>
10782
     * @param int|int[]|null  $length      [optional] <p>If given and is positive, it represents the length of the
10783
     *                                     portion of string which is to be replaced. If it is negative, it
10784
     *                                     represents the number of characters from the end of string at which to
10785
     *                                     stop replacing. If it is not given, then it will default to strlen(
10786
     *                                     string ); i.e. end the replacing at the end of string. Of course, if
10787
     *                                     length is zero then this function will have the effect of inserting
10788
     *                                     replacement into string at the given start offset.</p>
10789
     * @param string          $encoding    [optional] <p>Set the charset for e.g. "mb_" function</p>
10790
     *
10791
     * @return string|string[] The result string is returned. If string is an array then array is returned.
10792
     */
10793 10
    public static function substr_replace(
10794
        $str,
10795
        $replacement,
10796
        $offset,
10797
        $length = null,
10798
        string $encoding = 'UTF-8'
10799
    ) {
10800 10
        if (\is_array($str) === true) {
10801 1
            $num = \count($str);
10802
10803
            // the replacement
10804 1
            if (\is_array($replacement) === true) {
10805 1
                $replacement = \array_slice($replacement, 0, $num);
10806
            } else {
10807 1
                $replacement = \array_pad([$replacement], $num, $replacement);
10808
            }
10809
10810
            // the offset
10811 1
            if (\is_array($offset) === true) {
10812 1
                $offset = \array_slice($offset, 0, $num);
10813 1
                foreach ($offset as &$valueTmp) {
10814 1
                    $valueTmp = (int) $valueTmp === $valueTmp ? $valueTmp : 0;
10815
                }
10816 1
                unset($valueTmp);
10817
            } else {
10818 1
                $offset = \array_pad([$offset], $num, $offset);
10819
            }
10820
10821
            // the length
10822 1
            if ($length === null) {
10823 1
                $length = \array_fill(0, $num, 0);
10824 1
            } elseif (\is_array($length) === true) {
10825 1
                $length = \array_slice($length, 0, $num);
10826 1
                foreach ($length as &$valueTmpV2) {
10827 1
                    $valueTmpV2 = (int) $valueTmpV2 === $valueTmpV2 ? $valueTmpV2 : $num;
10828
                }
10829 1
                unset($valueTmpV2);
10830
            } else {
10831 1
                $length = \array_pad([$length], $num, $length);
10832
            }
10833
10834
            // recursive call
10835 1
            return \array_map([self::class, 'substr_replace'], $str, $replacement, $offset, $length);
10836
        }
10837
10838 10
        if (\is_array($replacement) === true) {
10839 1
            if (\count($replacement) > 0) {
10840 1
                $replacement = $replacement[0];
10841
            } else {
10842 1
                $replacement = '';
10843
            }
10844
        }
10845
10846
        // init
10847 10
        $str = (string) $str;
10848 10
        $replacement = (string) $replacement;
10849
10850 10
        if (\is_array($length) === true) {
10851
            throw new \InvalidArgumentException('Parameter "$length" can only be an array, if "$str" is also an array.');
10852
        }
10853
10854 10
        if (\is_array($offset) === true) {
10855
            throw new \InvalidArgumentException('Parameter "$offset" can only be an array, if "$str" is also an array.');
10856
        }
10857
10858 10
        if ($str === '') {
10859 1
            return $replacement;
10860
        }
10861
10862 9
        if (self::$SUPPORT['mbstring'] === true) {
10863 9
            $string_length = (int) self::strlen($str, $encoding);
10864
10865 9
            if ($offset < 0) {
10866 1
                $offset = (int) \max(0, $string_length + $offset);
10867 9
            } elseif ($offset > $string_length) {
10868 1
                $offset = $string_length;
10869
            }
10870
10871 9
            if ($length !== null && $length < 0) {
10872 1
                $length = (int) \max(0, $string_length - $offset + $length);
10873 9
            } elseif ($length === null || $length > $string_length) {
10874 4
                $length = $string_length;
10875
            }
10876
10877
            /** @noinspection AdditionOperationOnArraysInspection */
10878 9
            if (($offset + $length) > $string_length) {
10879 4
                $length = $string_length - $offset;
10880
            }
10881
10882
            /** @noinspection AdditionOperationOnArraysInspection */
10883 9
            return ((string) \mb_substr($str, 0, $offset, $encoding)) .
10884 9
                   $replacement .
10885 9
                   ((string) \mb_substr($str, $offset + $length, $string_length - $offset - $length, $encoding));
10886
        }
10887
10888
        //
10889
        // fallback for ascii only
10890
        //
10891
10892
        if (self::is_ascii($str)) {
10893
            return ($length === null) ?
10894
                \substr_replace($str, $replacement, $offset) :
10895
                \substr_replace($str, $replacement, $offset, $length);
10896
        }
10897
10898
        //
10899
        // fallback via vanilla php
10900
        //
10901
10902
        \preg_match_all('/./us', $str, $smatches);
10903
        \preg_match_all('/./us', $replacement, $rmatches);
10904
10905
        if ($length === null) {
10906
            $lengthTmp = self::strlen($str, $encoding);
10907
            if ($lengthTmp === false) {
10908
                // e.g.: non mbstring support + invalid chars
10909
                return '';
10910
            }
10911
            $length = (int) $lengthTmp;
10912
        }
10913
10914
        \array_splice($smatches[0], $offset, $length, $rmatches[0]);
10915
10916
        return \implode('', $smatches[0]);
10917
    }
10918
10919
    /**
10920
     * Removes an suffix ($needle) from end of the string ($haystack).
10921
     *
10922
     * @param string $haystack <p>The string to search in.</p>
10923
     * @param string $needle   <p>The substring to search for.</p>
10924
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
10925
     *
10926
     * @return string return the sub-string
10927
     */
10928 2
    public static function substr_right(
10929
        string $haystack,
10930
        string $needle,
10931
        string $encoding = 'UTF-8'
10932
    ): string {
10933 2
        if ($haystack === '') {
10934 2
            return '';
10935
        }
10936
10937 2
        if ($needle === '') {
10938 2
            return $haystack;
10939
        }
10940
10941
        if (
10942 2
            $encoding === 'UTF-8'
10943
            &&
10944 2
            \substr($haystack, -\strlen($needle)) === $needle
10945
        ) {
10946 2
            return (string) \mb_substr($haystack, 0, (int) \mb_strlen($haystack) - (int) \mb_strlen($needle));
10947
        }
10948
10949 2
        if (\substr($haystack, -\strlen($needle)) === $needle) {
10950
            return (string) self::substr(
10951
                $haystack,
10952
                0,
10953
                (int) self::strlen($haystack, $encoding) - (int) self::strlen($needle, $encoding),
10954
                $encoding
10955
            );
10956
        }
10957
10958 2
        return $haystack;
10959
    }
10960
10961
    /**
10962
     * Returns a case swapped version of the string.
10963
     *
10964
     * @param string $str       <p>The input string.</p>
10965
     * @param string $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
10966
     * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
10967
     *
10968
     * @return string each character's case swapped
10969
     */
10970 6
    public static function swapCase(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false): string
10971
    {
10972 6
        if ($str === '') {
10973 1
            return '';
10974
        }
10975
10976 6
        if ($cleanUtf8 === true) {
10977
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
10978
            // if invalid characters are found in $haystack before $needle
10979 2
            $str = self::clean($str);
10980
        }
10981
10982 6
        if ($encoding === 'UTF-8') {
10983 4
            return (string) (\mb_strtolower($str) ^ \mb_strtoupper($str) ^ $str);
10984
        }
10985
10986 4
        return (string) (self::strtolower($str, $encoding) ^ self::strtoupper($str, $encoding) ^ $str);
10987
    }
10988
10989
    /**
10990
     * Checks whether symfony-polyfills are used.
10991
     *
10992
     * @return bool
10993
     *              <strong>true</strong> if in use, <strong>false</strong> otherwise
10994
     */
10995
    public static function symfony_polyfill_used(): bool
10996
    {
10997
        // init
10998
        $return = false;
10999
11000
        $returnTmp = \extension_loaded('mbstring');
11001
        if ($returnTmp === false && \function_exists('mb_strlen')) {
11002
            $return = true;
11003
        }
11004
11005
        $returnTmp = \extension_loaded('iconv');
11006
        if ($returnTmp === false && \function_exists('iconv')) {
11007
            $return = true;
11008
        }
11009
11010
        return $return;
11011
    }
11012
11013
    /**
11014
     * @param string $str
11015
     * @param int    $tabLength
11016
     *
11017
     * @return string
11018
     */
11019 6
    public static function tabs_to_spaces(string $str, int $tabLength = 4): string
11020
    {
11021 6
        if ($tabLength === 4) {
11022 3
            $spaces = '    ';
11023 3
        } elseif ($tabLength === 2) {
11024 1
            $spaces = '  ';
11025
        } else {
11026 2
            $spaces = \str_repeat(' ', $tabLength);
11027
        }
11028
11029 6
        return \str_replace("\t", $spaces, $str);
11030
    }
11031
11032
    /**
11033
     * Converts the first character of each word in the string to uppercase
11034
     * and all other chars to lowercase.
11035
     *
11036
     * @param string      $str                   <p>The input string.</p>
11037
     * @param string      $encoding              [optional] <p>Set the charset for e.g. "mb_" function</p>
11038
     * @param bool        $cleanUtf8             [optional] <p>Remove non UTF-8 chars from the string.</p>
11039
     * @param string|null $lang                  [optional] <p>Set the language for special cases: az, el, lt, tr</p>
11040
     * @param bool        $tryToKeepStringLength [optional] <p>true === try to keep the string length: e.g. ẞ -> ß</p>
11041
     *
11042
     * @return string string with all characters of $str being title-cased
11043
     */
11044 5
    public static function titlecase(
11045
        string $str,
11046
        string $encoding = 'UTF-8',
11047
        bool $cleanUtf8 = false,
11048
        string $lang = null,
11049
        bool $tryToKeepStringLength = false
11050
    ): string {
11051 5
        if ($cleanUtf8 === true) {
11052
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
11053
            // if invalid characters are found in $haystack before $needle
11054
            $str = self::clean($str);
11055
        }
11056
11057 5
        if ($lang === null && $tryToKeepStringLength === false) {
11058 5
            if ($encoding === 'UTF-8') {
11059 3
                return \mb_convert_case($str, \MB_CASE_TITLE);
11060
            }
11061
11062 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
11063
11064 2
            return \mb_convert_case($str, \MB_CASE_TITLE, $encoding);
11065
        }
11066
11067
        return self::str_titleize($str, null, $encoding, false, $lang, $tryToKeepStringLength, false);
11068
    }
11069
11070
    /**
11071
     * alias for "UTF8::to_ascii()"
11072
     *
11073
     * @param string $str
11074
     * @param string $subst_chr
11075
     * @param bool   $strict
11076
     *
11077
     * @return string
11078
     *
11079
     * @see UTF8::to_ascii()
11080
     * @deprecated <p>use "UTF8::to_ascii()"</p>
11081
     */
11082 7
    public static function toAscii(string $str, string $subst_chr = '?', bool $strict = false): string
11083
    {
11084 7
        return self::to_ascii($str, $subst_chr, $strict);
11085
    }
11086
11087
    /**
11088
     * alias for "UTF8::to_iso8859()"
11089
     *
11090
     * @param string|string[] $str
11091
     *
11092
     * @return string|string[]
11093
     *
11094
     * @see UTF8::to_iso8859()
11095
     * @deprecated <p>use "UTF8::to_iso8859()"</p>
11096
     */
11097 2
    public static function toIso8859($str)
11098
    {
11099 2
        return self::to_iso8859($str);
11100
    }
11101
11102
    /**
11103
     * alias for "UTF8::to_latin1()"
11104
     *
11105
     * @param string|string[] $str
11106
     *
11107
     * @return string|string[]
11108
     *
11109
     * @see UTF8::to_latin1()
11110
     * @deprecated <p>use "UTF8::to_latin1()"</p>
11111
     */
11112 2
    public static function toLatin1($str)
11113
    {
11114 2
        return self::to_latin1($str);
11115
    }
11116
11117
    /**
11118
     * alias for "UTF8::to_utf8()"
11119
     *
11120
     * @param string|string[] $str
11121
     *
11122
     * @return string|string[]
11123
     *
11124
     * @see UTF8::to_utf8()
11125
     * @deprecated <p>use "UTF8::to_utf8()"</p>
11126
     */
11127 2
    public static function toUTF8($str)
11128
    {
11129 2
        return self::to_utf8($str);
11130
    }
11131
11132
    /**
11133
     * Convert a string into ASCII.
11134
     *
11135
     * @param string $str     <p>The input string.</p>
11136
     * @param string $unknown [optional] <p>Character use if character unknown. (default is ?)</p>
11137
     * @param bool   $strict  [optional] <p>Use "transliterator_transliterate()" from PHP-Intl | WARNING: bad
11138
     *                        performance</p>
11139
     *
11140
     * @return string
11141
     */
11142 38
    public static function to_ascii(string $str, string $unknown = '?', bool $strict = false): string
11143
    {
11144 38
        static $UTF8_TO_ASCII;
11145
11146 38
        if ($str === '') {
11147 3
            return '';
11148
        }
11149
11150
        // check if we only have ASCII, first (better performance)
11151 35
        if (self::is_ascii($str) === true) {
11152 9
            return $str;
11153
        }
11154
11155 28
        $str = self::clean(
11156 28
            $str,
11157 28
            true,
11158 28
            true,
11159 28
            true,
11160 28
            false,
11161 28
            true,
11162 28
            true
11163
        );
11164
11165
        // check again, if we only have ASCII, now ...
11166 28
        if (self::is_ascii($str) === true) {
11167 10
            return $str;
11168
        }
11169
11170
        if (
11171 19
            $strict === true
11172
            &&
11173 19
            self::$SUPPORT['intl'] === true
11174
        ) {
11175
            // INFO: https://unicode.org/cldr/utility/character.jsp?a=%E2%84%8C
11176
            /** @noinspection PhpComposerExtensionStubsInspection */
11177
            /** @noinspection UnnecessaryCastingInspection */
11178 1
            $str = (string) \transliterator_transliterate('NFKC; [:Nonspacing Mark:] Remove; NFKC; Any-Latin; Latin-ASCII;', $str);
11179
11180
            // check again, if we only have ASCII, now ...
11181 1
            if (self::is_ascii($str) === true) {
11182 1
                return $str;
11183
            }
11184
        }
11185
11186 19
        if (self::$ORD === null) {
11187
            self::$ORD = self::getData('ord');
11188
        }
11189
11190 19
        \preg_match_all('/.{1}|[^\x00]{1,1}$/us', $str, $ar);
11191 19
        $chars = $ar[0];
11192 19
        $ord = null;
11193
        /** @noinspection ForeachSourceInspection */
11194 19
        foreach ($chars as &$c) {
11195 19
            $ordC0 = self::$ORD[$c[0]];
11196
11197 19
            if ($ordC0 >= 0 && $ordC0 <= 127) {
11198 15
                continue;
11199
            }
11200
11201 19
            $ordC1 = self::$ORD[$c[1]];
11202
11203
            // ASCII - next please
11204 19
            if ($ordC0 >= 192 && $ordC0 <= 223) {
11205 17
                $ord = ($ordC0 - 192) * 64 + ($ordC1 - 128);
11206
            }
11207
11208 19
            if ($ordC0 >= 224) {
11209 8
                $ordC2 = self::$ORD[$c[2]];
11210
11211 8
                if ($ordC0 <= 239) {
11212 7
                    $ord = ($ordC0 - 224) * 4096 + ($ordC1 - 128) * 64 + ($ordC2 - 128);
11213
                }
11214
11215 8
                if ($ordC0 >= 240) {
11216 2
                    $ordC3 = self::$ORD[$c[3]];
11217
11218 2
                    if ($ordC0 <= 247) {
11219 2
                        $ord = ($ordC0 - 240) * 262144 + ($ordC1 - 128) * 4096 + ($ordC2 - 128) * 64 + ($ordC3 - 128);
11220
                    }
11221
11222 2
                    if ($ordC0 >= 248) {
11223
                        $ordC4 = self::$ORD[$c[4]];
11224
11225
                        if ($ordC0 <= 251) {
11226
                            $ord = ($ordC0 - 248) * 16777216 + ($ordC1 - 128) * 262144 + ($ordC2 - 128) * 4096 + ($ordC3 - 128) * 64 + ($ordC4 - 128);
11227
                        }
11228
11229
                        if ($ordC0 >= 252) {
11230
                            $ordC5 = self::$ORD[$c[5]];
11231
11232
                            if ($ordC0 <= 253) {
11233
                                $ord = ($ordC0 - 252) * 1073741824 + ($ordC1 - 128) * 16777216 + ($ordC2 - 128) * 262144 + ($ordC3 - 128) * 4096 + ($ordC4 - 128) * 64 + ($ordC5 - 128);
11234
                            }
11235
                        }
11236
                    }
11237
                }
11238
            }
11239
11240 19
            if ($ordC0 === 254 || $ordC0 === 255) {
11241
                $c = $unknown;
11242
11243
                continue;
11244
            }
11245
11246 19
            if ($ord === null) {
11247
                $c = $unknown;
11248
11249
                continue;
11250
            }
11251
11252 19
            $bank = $ord >> 8;
11253 19
            if (!isset($UTF8_TO_ASCII[$bank])) {
11254 9
                $UTF8_TO_ASCII[$bank] = self::getDataIfExists(\sprintf('x%02x', $bank));
11255 9
                if ($UTF8_TO_ASCII[$bank] === false) {
11256 2
                    $UTF8_TO_ASCII[$bank] = [];
11257
                }
11258
            }
11259
11260 19
            $newchar = $ord & 255;
11261
11262
            /** @noinspection NullCoalescingOperatorCanBeUsedInspection */
11263 19
            if (isset($UTF8_TO_ASCII[$bank][$newchar])) {
11264
11265
                // keep for debugging
11266
                /*
11267
                echo "file: " . sprintf('x%02x', $bank) . "\n";
11268
                echo "char: " . $c . "\n";
11269
                echo "ord: " . $ord . "\n";
11270
                echo "newchar: " . $newchar . "\n";
11271
                echo "ascii: " . $UTF8_TO_ASCII[$bank][$newchar] . "\n";
11272
                echo "bank:" . $bank . "\n\n";
11273
                 */
11274
11275 18
                $c = $UTF8_TO_ASCII[$bank][$newchar];
11276
            } else {
11277
11278
                // keep for debugging missing chars
11279
                /*
11280
                echo "file: " . sprintf('x%02x', $bank) . "\n";
11281
                echo "char: " . $c . "\n";
11282
                echo "ord: " . $ord . "\n";
11283
                echo "newchar: " . $newchar . "\n";
11284
                echo "bank:" . $bank . "\n\n";
11285
                 */
11286
11287 19
                $c = $unknown;
11288
            }
11289
        }
11290
11291 19
        return \implode('', $chars);
11292
    }
11293
11294
    /**
11295
     * @param mixed $str
11296
     *
11297
     * @return bool
11298
     */
11299 19
    public static function to_boolean($str): bool
11300
    {
11301
        // init
11302 19
        $str = (string) $str;
11303
11304 19
        if ($str === '') {
11305 2
            return false;
11306
        }
11307
11308
        // Info: http://php.net/manual/en/filter.filters.validate.php
11309
        $map = [
11310 17
            'true'  => true,
11311
            '1'     => true,
11312
            'on'    => true,
11313
            'yes'   => true,
11314
            'false' => false,
11315
            '0'     => false,
11316
            'off'   => false,
11317
            'no'    => false,
11318
        ];
11319
11320 17
        if (isset($map[$str])) {
11321 11
            return $map[$str];
11322
        }
11323
11324 6
        $key = \strtolower($str);
11325 6
        if (isset($map[$key])) {
11326 2
            return $map[$key];
11327
        }
11328
11329 4
        if (\is_numeric($str)) {
11330 2
            return ((float) $str + 0) > 0;
11331
        }
11332
11333 2
        return (bool) \trim($str);
11334
    }
11335
11336
    /**
11337
     * Convert given string to safe filename (and keep string case).
11338
     *
11339
     * @param string $string
11340
     * @param bool   $use_transliterate No transliteration, conversion etc. is done by default - unsafe characters are
11341
     *                                  simply replaced with hyphen.
11342
     * @param string $fallback_char
11343
     *
11344
     * @return string
11345
     */
11346 1
    public static function to_filename(string $string, bool $use_transliterate = false, string $fallback_char = '-'): string
11347
    {
11348 1
        if ($use_transliterate === true) {
11349 1
            $string = self::str_transliterate($string, $fallback_char);
11350
        }
11351
11352 1
        $fallback_char_escaped = \preg_quote($fallback_char, '/');
11353
11354 1
        $string = (string) \preg_replace(
11355
            [
11356 1
                '/[^' . $fallback_char_escaped . '\\.\\-a-zA-Z0-9\\s]/', // 1) remove un-needed chars
11357 1
                '/[\\s]+/u',                                           // 2) convert spaces to $fallback_char
11358 1
                '/[' . $fallback_char_escaped . ']+/u',                // 3) remove double $fallback_char's
11359
            ],
11360
            [
11361 1
                '',
11362 1
                $fallback_char,
11363 1
                $fallback_char,
11364
            ],
11365 1
            $string
11366
        );
11367
11368
        // trim "$fallback_char" from beginning and end of the string
11369 1
        return \trim($string, $fallback_char);
11370
    }
11371
11372
    /**
11373
     * Convert a string into "ISO-8859"-encoding (Latin-1).
11374
     *
11375
     * @param string|string[] $str
11376
     *
11377
     * @return string|string[]
11378
     */
11379 8
    public static function to_iso8859($str)
11380
    {
11381 8
        if (\is_array($str) === true) {
11382 2
            foreach ($str as $k => &$v) {
11383 2
                $v = self::to_iso8859($v);
11384
            }
11385
11386 2
            return $str;
11387
        }
11388
11389 8
        $str = (string) $str;
11390 8
        if ($str === '') {
11391 2
            return '';
11392
        }
11393
11394 8
        return self::utf8_decode($str);
11395
    }
11396
11397
    /**
11398
     * alias for "UTF8::to_iso8859()"
11399
     *
11400
     * @param string|string[] $str
11401
     *
11402
     * @return string|string[]
11403
     *
11404
     * @see UTF8::to_iso8859()
11405
     */
11406 2
    public static function to_latin1($str)
11407
    {
11408 2
        return self::to_iso8859($str);
11409
    }
11410
11411
    /**
11412
     * This function leaves UTF-8 characters alone, while converting almost all non-UTF8 to UTF8.
11413
     *
11414
     * <ul>
11415
     * <li>It decode UTF-8 codepoints and unicode escape sequences.</li>
11416
     * <li>It assumes that the encoding of the original string is either WINDOWS-1252 or ISO-8859.</li>
11417
     * <li>WARNING: It does not remove invalid UTF-8 characters, so you maybe need to use "UTF8::clean()" for this
11418
     * case.</li>
11419
     * </ul>
11420
     *
11421
     * @param string|string[] $str                    <p>Any string or array.</p>
11422
     * @param bool            $decodeHtmlEntityToUtf8 <p>Set to true, if you need to decode html-entities.</p>
11423
     *
11424
     * @return string|string[] the UTF-8 encoded string
11425
     */
11426 41
    public static function to_utf8($str, bool $decodeHtmlEntityToUtf8 = false)
11427
    {
11428 41
        if (\is_array($str) === true) {
11429 4
            foreach ($str as $k => &$v) {
11430 4
                $v = self::to_utf8($v, $decodeHtmlEntityToUtf8);
11431
            }
11432
11433 4
            return $str;
11434
        }
11435
11436 41
        $str = (string) $str;
11437 41
        if ($str === '') {
11438 6
            return $str;
11439
        }
11440
11441 41
        $max = \strlen($str);
11442 41
        $buf = '';
11443
11444 41
        for ($i = 0; $i < $max; ++$i) {
11445 41
            $c1 = $str[$i];
11446
11447 41
            if ($c1 >= "\xC0") { // should be converted to UTF8, if it's not UTF8 already
11448
11449 37
                if ($c1 <= "\xDF") { // looks like 2 bytes UTF8
11450
11451 34
                    $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
11452
11453 34
                    if ($c2 >= "\x80" && $c2 <= "\xBF") { // yeah, almost sure it's UTF8 already
11454 20
                        $buf .= $c1 . $c2;
11455 20
                        ++$i;
11456
                    } else { // not valid UTF8 - convert it
11457 34
                        $buf .= self::to_utf8_convert_helper($c1);
11458
                    }
11459 34
                } elseif ($c1 >= "\xE0" && $c1 <= "\xEF") { // looks like 3 bytes UTF8
11460
11461 33
                    $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
11462 33
                    $c3 = $i + 2 >= $max ? "\x00" : $str[$i + 2];
11463
11464 33
                    if ($c2 >= "\x80" && $c2 <= "\xBF" && $c3 >= "\x80" && $c3 <= "\xBF") { // yeah, almost sure it's UTF8 already
11465 15
                        $buf .= $c1 . $c2 . $c3;
11466 15
                        $i += 2;
11467
                    } else { // not valid UTF8 - convert it
11468 33
                        $buf .= self::to_utf8_convert_helper($c1);
11469
                    }
11470 26
                } elseif ($c1 >= "\xF0" && $c1 <= "\xF7") { // looks like 4 bytes UTF8
11471
11472 26
                    $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
11473 26
                    $c3 = $i + 2 >= $max ? "\x00" : $str[$i + 2];
11474 26
                    $c4 = $i + 3 >= $max ? "\x00" : $str[$i + 3];
11475
11476 26
                    if ($c2 >= "\x80" && $c2 <= "\xBF" && $c3 >= "\x80" && $c3 <= "\xBF" && $c4 >= "\x80" && $c4 <= "\xBF") { // yeah, almost sure it's UTF8 already
11477 8
                        $buf .= $c1 . $c2 . $c3 . $c4;
11478 8
                        $i += 3;
11479
                    } else { // not valid UTF8 - convert it
11480 26
                        $buf .= self::to_utf8_convert_helper($c1);
11481
                    }
11482
                } else { // doesn't look like UTF8, but should be converted
11483
11484 37
                    $buf .= self::to_utf8_convert_helper($c1);
11485
                }
11486 38
            } elseif (($c1 & "\xC0") === "\x80") { // needs conversion
11487
11488 4
                $buf .= self::to_utf8_convert_helper($c1);
11489
            } else { // it doesn't need conversion
11490
11491 38
                $buf .= $c1;
11492
            }
11493
        }
11494
11495
        // decode unicode escape sequences + unicode surrogate pairs
11496 41
        $buf = \preg_replace_callback(
11497 41
            '/\\\\u([dD][89abAB][0-9a-fA-F]{2})\\\\u([dD][cdefCDEF][\da-fA-F]{2})|\\\\u([0-9a-fA-F]{4})/',
11498
            /**
11499
             * @param array $matches
11500
             *
11501
             * @return string
11502
             */
11503
            static function (array $matches): string {
11504 12
                if (isset($matches[3])) {
11505 12
                    $cp = (int) \hexdec($matches[3]);
11506
                } else {
11507
                    // http://unicode.org/faq/utf_bom.html#utf16-4
11508
                    $cp = ((int) \hexdec($matches[1]) << 10)
11509
                          + (int) \hexdec($matches[2])
11510
                          + 0x10000
11511
                          - (0xD800 << 10)
11512
                          - 0xDC00;
11513
                }
11514
11515
                // https://github.com/php/php-src/blob/php-7.3.2/ext/standard/html.c#L471
11516
                //
11517
                // php_utf32_utf8(unsigned char *buf, unsigned k)
11518
11519 12
                if ($cp < 0x80) {
11520 8
                    return (string) self::chr($cp);
11521
                }
11522
11523 9
                if ($cp < 0xA0) {
11524
                    /** @noinspection UnnecessaryCastingInspection */
11525
                    return (string) self::chr(0xC0 | $cp >> 6) . (string) self::chr(0x80 | $cp & 0x3F);
11526
                }
11527
11528 9
                return self::decimal_to_chr($cp);
11529 41
            },
11530 41
            $buf
11531
        );
11532
11533 41
        if ($buf === null) {
11534
            return '';
11535
        }
11536
11537
        // decode UTF-8 codepoints
11538 41
        if ($decodeHtmlEntityToUtf8 === true) {
11539 2
            $buf = self::html_entity_decode($buf);
11540
        }
11541
11542 41
        return $buf;
11543
    }
11544
11545
    /**
11546
     * Strip whitespace or other characters from beginning or end of a UTF-8 string.
11547
     *
11548
     * INFO: This is slower then "trim()"
11549
     *
11550
     * We can only use the original-function, if we use <= 7-Bit in the string / chars
11551
     * but the check for ACSII (7-Bit) cost more time, then we can safe here.
11552
     *
11553
     * @param string      $str   <p>The string to be trimmed</p>
11554
     * @param string|null $chars [optional] <p>Optional characters to be stripped</p>
11555
     *
11556
     * @return string the trimmed string
11557
     */
11558 55
    public static function trim(string $str = '', string $chars = null): string
11559
    {
11560 55
        if ($str === '') {
11561 9
            return '';
11562
        }
11563
11564 48
        if ($chars) {
11565 27
            $chars = \preg_quote($chars, '/');
11566 27
            $pattern = "^[${chars}]+|[${chars}]+\$";
11567
        } else {
11568 21
            $pattern = '^[\\s]+|[\\s]+$';
11569
        }
11570
11571 48
        if (self::$SUPPORT['mbstring'] === true) {
11572
            /** @noinspection PhpComposerExtensionStubsInspection */
11573 48
            return (string) \mb_ereg_replace($pattern, '', $str);
11574
        }
11575
11576 8
        return self::regex_replace($str, $pattern, '', '', '/');
11577
    }
11578
11579
    /**
11580
     * Makes string's first char uppercase.
11581
     *
11582
     * @param string      $str                   <p>The input string.</p>
11583
     * @param string      $encoding              [optional] <p>Set the charset for e.g. "mb_" function</p>
11584
     * @param bool        $cleanUtf8             [optional] <p>Remove non UTF-8 chars from the string.</p>
11585
     * @param string|null $lang                  [optional] <p>Set the language for special cases: az, el, lt, tr</p>
11586
     * @param bool        $tryToKeepStringLength [optional] <p>true === try to keep the string length: e.g. ẞ -> ß</p>
11587
     *
11588
     * @return string the resulting string
11589
     */
11590 69
    public static function ucfirst(
11591
        string $str,
11592
        string $encoding = 'UTF-8',
11593
        bool $cleanUtf8 = false,
11594
        string $lang = null,
11595
        bool $tryToKeepStringLength = false
11596
    ): string {
11597 69
        if ($str === '') {
11598 3
            return '';
11599
        }
11600
11601 68
        if ($cleanUtf8 === true) {
11602
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
11603
            // if invalid characters are found in $haystack before $needle
11604 1
            $str = self::clean($str);
11605
        }
11606
11607 68
        $useMbFunction = $lang === null && $tryToKeepStringLength === false;
11608
11609 68
        if ($encoding === 'UTF-8') {
11610 22
            $strPartTwo = (string) \mb_substr($str, 1);
11611
11612 22
            if ($useMbFunction === true) {
11613 22
                $strPartOne = \mb_strtoupper(
11614 22
                    (string) \mb_substr($str, 0, 1)
11615
                );
11616
            } else {
11617
                $strPartOne = self::strtoupper(
11618
                    (string) \mb_substr($str, 0, 1),
11619
                    $encoding,
11620
                    false,
11621
                    $lang,
11622 22
                    $tryToKeepStringLength
11623
                );
11624
            }
11625
        } else {
11626 47
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
11627
11628 47
            $strPartTwo = (string) self::substr($str, 1, null, $encoding);
11629
11630 47
            if ($useMbFunction === true) {
11631 47
                $strPartOne = \mb_strtoupper(
11632 47
                    (string) \mb_substr($str, 0, 1, $encoding),
11633 47
                    $encoding
11634
                );
11635
            } else {
11636
                $strPartOne = self::strtoupper(
11637
                    (string) self::substr($str, 0, 1, $encoding),
11638
                    $encoding,
11639
                    false,
11640
                    $lang,
11641
                    $tryToKeepStringLength
11642
                );
11643
            }
11644
        }
11645
11646 68
        return $strPartOne . $strPartTwo;
11647
    }
11648
11649
    /**
11650
     * alias for "UTF8::ucfirst()"
11651
     *
11652
     * @param string $str
11653
     * @param string $encoding
11654
     * @param bool   $cleanUtf8
11655
     *
11656
     * @return string
11657
     *
11658
     * @see UTF8::ucfirst()
11659
     */
11660 1
    public static function ucword(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false): string
11661
    {
11662 1
        return self::ucfirst($str, $encoding, $cleanUtf8);
11663
    }
11664
11665
    /**
11666
     * Uppercase for all words in the string.
11667
     *
11668
     * @param string   $str        <p>The input string.</p>
11669
     * @param string[] $exceptions [optional] <p>Exclusion for some words.</p>
11670
     * @param string   $charlist   [optional] <p>Additional chars that contains to words and do not start a new
11671
     *                             word.</p>
11672
     * @param string   $encoding   [optional] <p>Set the charset.</p>
11673
     * @param bool     $cleanUtf8  [optional] <p>Remove non UTF-8 chars from the string.</p>
11674
     *
11675
     * @return string
11676
     */
11677 8
    public static function ucwords(
11678
        string $str,
11679
        array $exceptions = [],
11680
        string $charlist = '',
11681
        string $encoding = 'UTF-8',
11682
        bool $cleanUtf8 = false
11683
    ): string {
11684 8
        if (!$str) {
11685 2
            return '';
11686
        }
11687
11688
        // INFO: mb_convert_case($str, MB_CASE_TITLE);
11689
        // -> MB_CASE_TITLE didn't only uppercase the first letter, it also lowercase all other letters
11690
11691 7
        if ($cleanUtf8 === true) {
11692
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
11693
            // if invalid characters are found in $haystack before $needle
11694 1
            $str = self::clean($str);
11695
        }
11696
11697 7
        $usePhpDefaultFunctions = !(bool) ($charlist . \implode('', $exceptions));
11698
11699
        if (
11700 7
            $usePhpDefaultFunctions === true
11701
            &&
11702 7
            self::is_ascii($str) === true
11703
        ) {
11704
            return \ucwords($str);
11705
        }
11706
11707 7
        $words = self::str_to_words($str, $charlist);
11708 7
        $useExceptions = \count($exceptions) > 0;
11709
11710 7
        foreach ($words as &$word) {
11711 7
            if (!$word) {
11712 7
                continue;
11713
            }
11714
11715
            if (
11716 7
                $useExceptions === false
11717
                ||
11718 7
                !\in_array($word, $exceptions, true)
11719
            ) {
11720 7
                $word = self::ucfirst($word, $encoding);
11721
            }
11722
        }
11723
11724 7
        return \implode('', $words);
11725
    }
11726
11727
    /**
11728
     * Multi decode html entity & fix urlencoded-win1252-chars.
11729
     *
11730
     * e.g:
11731
     * 'test+test'                     => 'test test'
11732
     * 'D&#252;sseldorf'               => 'Düsseldorf'
11733
     * 'D%FCsseldorf'                  => 'Düsseldorf'
11734
     * 'D&#xFC;sseldorf'               => 'Düsseldorf'
11735
     * 'D%26%23xFC%3Bsseldorf'         => 'Düsseldorf'
11736
     * 'Düsseldorf'                   => 'Düsseldorf'
11737
     * 'D%C3%BCsseldorf'               => 'Düsseldorf'
11738
     * 'D%C3%83%C2%BCsseldorf'         => 'Düsseldorf'
11739
     * 'D%25C3%2583%25C2%25BCsseldorf' => 'Düsseldorf'
11740
     *
11741
     * @param string $str          <p>The input string.</p>
11742
     * @param bool   $multi_decode <p>Decode as often as possible.</p>
11743
     *
11744
     * @return string
11745
     */
11746 4
    public static function urldecode(string $str, bool $multi_decode = true): string
11747
    {
11748 4
        if ($str === '') {
11749 3
            return '';
11750
        }
11751
11752
        if (
11753 4
            \strpos($str, '&') === false
11754
            &&
11755 4
            \strpos($str, '%') === false
11756
            &&
11757 4
            \strpos($str, '+') === false
11758
            &&
11759 4
            \strpos($str, '\u') === false
11760
        ) {
11761 3
            return self::fix_simple_utf8($str);
11762
        }
11763
11764 4
        $str = self::urldecode_unicode_helper($str);
11765
11766
        do {
11767 4
            $str_compare = $str;
11768
11769
            /**
11770
             * @psalm-suppress PossiblyInvalidArgument
11771
             */
11772 4
            $str = self::fix_simple_utf8(
11773 4
                \urldecode(
11774 4
                    self::html_entity_decode(
11775 4
                        self::to_utf8($str),
11776 4
                        \ENT_QUOTES | \ENT_HTML5
11777
                    )
11778
                )
11779
            );
11780 4
        } while ($multi_decode === true && $str_compare !== $str);
11781
11782 4
        return $str;
11783
    }
11784
11785
    /**
11786
     * Return a array with "urlencoded"-win1252 -> UTF-8
11787
     *
11788
     * @return string[]
11789
     *
11790
     * @deprecated <p>use the "UTF8::urldecode()" function to decode a string</p>
11791
     */
11792 2
    public static function urldecode_fix_win1252_chars(): array
11793
    {
11794
        return [
11795 2
            '%20' => ' ',
11796
            '%21' => '!',
11797
            '%22' => '"',
11798
            '%23' => '#',
11799
            '%24' => '$',
11800
            '%25' => '%',
11801
            '%26' => '&',
11802
            '%27' => "'",
11803
            '%28' => '(',
11804
            '%29' => ')',
11805
            '%2A' => '*',
11806
            '%2B' => '+',
11807
            '%2C' => ',',
11808
            '%2D' => '-',
11809
            '%2E' => '.',
11810
            '%2F' => '/',
11811
            '%30' => '0',
11812
            '%31' => '1',
11813
            '%32' => '2',
11814
            '%33' => '3',
11815
            '%34' => '4',
11816
            '%35' => '5',
11817
            '%36' => '6',
11818
            '%37' => '7',
11819
            '%38' => '8',
11820
            '%39' => '9',
11821
            '%3A' => ':',
11822
            '%3B' => ';',
11823
            '%3C' => '<',
11824
            '%3D' => '=',
11825
            '%3E' => '>',
11826
            '%3F' => '?',
11827
            '%40' => '@',
11828
            '%41' => 'A',
11829
            '%42' => 'B',
11830
            '%43' => 'C',
11831
            '%44' => 'D',
11832
            '%45' => 'E',
11833
            '%46' => 'F',
11834
            '%47' => 'G',
11835
            '%48' => 'H',
11836
            '%49' => 'I',
11837
            '%4A' => 'J',
11838
            '%4B' => 'K',
11839
            '%4C' => 'L',
11840
            '%4D' => 'M',
11841
            '%4E' => 'N',
11842
            '%4F' => 'O',
11843
            '%50' => 'P',
11844
            '%51' => 'Q',
11845
            '%52' => 'R',
11846
            '%53' => 'S',
11847
            '%54' => 'T',
11848
            '%55' => 'U',
11849
            '%56' => 'V',
11850
            '%57' => 'W',
11851
            '%58' => 'X',
11852
            '%59' => 'Y',
11853
            '%5A' => 'Z',
11854
            '%5B' => '[',
11855
            '%5C' => '\\',
11856
            '%5D' => ']',
11857
            '%5E' => '^',
11858
            '%5F' => '_',
11859
            '%60' => '`',
11860
            '%61' => 'a',
11861
            '%62' => 'b',
11862
            '%63' => 'c',
11863
            '%64' => 'd',
11864
            '%65' => 'e',
11865
            '%66' => 'f',
11866
            '%67' => 'g',
11867
            '%68' => 'h',
11868
            '%69' => 'i',
11869
            '%6A' => 'j',
11870
            '%6B' => 'k',
11871
            '%6C' => 'l',
11872
            '%6D' => 'm',
11873
            '%6E' => 'n',
11874
            '%6F' => 'o',
11875
            '%70' => 'p',
11876
            '%71' => 'q',
11877
            '%72' => 'r',
11878
            '%73' => 's',
11879
            '%74' => 't',
11880
            '%75' => 'u',
11881
            '%76' => 'v',
11882
            '%77' => 'w',
11883
            '%78' => 'x',
11884
            '%79' => 'y',
11885
            '%7A' => 'z',
11886
            '%7B' => '{',
11887
            '%7C' => '|',
11888
            '%7D' => '}',
11889
            '%7E' => '~',
11890
            '%7F' => '',
11891
            '%80' => '`',
11892
            '%81' => '',
11893
            '%82' => '‚',
11894
            '%83' => 'ƒ',
11895
            '%84' => '„',
11896
            '%85' => '…',
11897
            '%86' => '†',
11898
            '%87' => '‡',
11899
            '%88' => 'ˆ',
11900
            '%89' => '‰',
11901
            '%8A' => 'Š',
11902
            '%8B' => '‹',
11903
            '%8C' => 'Œ',
11904
            '%8D' => '',
11905
            '%8E' => 'Ž',
11906
            '%8F' => '',
11907
            '%90' => '',
11908
            '%91' => '‘',
11909
            '%92' => '’',
11910
            '%93' => '“',
11911
            '%94' => '”',
11912
            '%95' => '•',
11913
            '%96' => '–',
11914
            '%97' => '—',
11915
            '%98' => '˜',
11916
            '%99' => '™',
11917
            '%9A' => 'š',
11918
            '%9B' => '›',
11919
            '%9C' => 'œ',
11920
            '%9D' => '',
11921
            '%9E' => 'ž',
11922
            '%9F' => 'Ÿ',
11923
            '%A0' => '',
11924
            '%A1' => '¡',
11925
            '%A2' => '¢',
11926
            '%A3' => '£',
11927
            '%A4' => '¤',
11928
            '%A5' => '¥',
11929
            '%A6' => '¦',
11930
            '%A7' => '§',
11931
            '%A8' => '¨',
11932
            '%A9' => '©',
11933
            '%AA' => 'ª',
11934
            '%AB' => '«',
11935
            '%AC' => '¬',
11936
            '%AD' => '',
11937
            '%AE' => '®',
11938
            '%AF' => '¯',
11939
            '%B0' => '°',
11940
            '%B1' => '±',
11941
            '%B2' => '²',
11942
            '%B3' => '³',
11943
            '%B4' => '´',
11944
            '%B5' => 'µ',
11945
            '%B6' => '¶',
11946
            '%B7' => '·',
11947
            '%B8' => '¸',
11948
            '%B9' => '¹',
11949
            '%BA' => 'º',
11950
            '%BB' => '»',
11951
            '%BC' => '¼',
11952
            '%BD' => '½',
11953
            '%BE' => '¾',
11954
            '%BF' => '¿',
11955
            '%C0' => 'À',
11956
            '%C1' => 'Á',
11957
            '%C2' => 'Â',
11958
            '%C3' => 'Ã',
11959
            '%C4' => 'Ä',
11960
            '%C5' => 'Å',
11961
            '%C6' => 'Æ',
11962
            '%C7' => 'Ç',
11963
            '%C8' => 'È',
11964
            '%C9' => 'É',
11965
            '%CA' => 'Ê',
11966
            '%CB' => 'Ë',
11967
            '%CC' => 'Ì',
11968
            '%CD' => 'Í',
11969
            '%CE' => 'Î',
11970
            '%CF' => 'Ï',
11971
            '%D0' => 'Ð',
11972
            '%D1' => 'Ñ',
11973
            '%D2' => 'Ò',
11974
            '%D3' => 'Ó',
11975
            '%D4' => 'Ô',
11976
            '%D5' => 'Õ',
11977
            '%D6' => 'Ö',
11978
            '%D7' => '×',
11979
            '%D8' => 'Ø',
11980
            '%D9' => 'Ù',
11981
            '%DA' => 'Ú',
11982
            '%DB' => 'Û',
11983
            '%DC' => 'Ü',
11984
            '%DD' => 'Ý',
11985
            '%DE' => 'Þ',
11986
            '%DF' => 'ß',
11987
            '%E0' => 'à',
11988
            '%E1' => 'á',
11989
            '%E2' => 'â',
11990
            '%E3' => 'ã',
11991
            '%E4' => 'ä',
11992
            '%E5' => 'å',
11993
            '%E6' => 'æ',
11994
            '%E7' => 'ç',
11995
            '%E8' => 'è',
11996
            '%E9' => 'é',
11997
            '%EA' => 'ê',
11998
            '%EB' => 'ë',
11999
            '%EC' => 'ì',
12000
            '%ED' => 'í',
12001
            '%EE' => 'î',
12002
            '%EF' => 'ï',
12003
            '%F0' => 'ð',
12004
            '%F1' => 'ñ',
12005
            '%F2' => 'ò',
12006
            '%F3' => 'ó',
12007
            '%F4' => 'ô',
12008
            '%F5' => 'õ',
12009
            '%F6' => 'ö',
12010
            '%F7' => '÷',
12011
            '%F8' => 'ø',
12012
            '%F9' => 'ù',
12013
            '%FA' => 'ú',
12014
            '%FB' => 'û',
12015
            '%FC' => 'ü',
12016
            '%FD' => 'ý',
12017
            '%FE' => 'þ',
12018
            '%FF' => 'ÿ',
12019
        ];
12020
    }
12021
12022
    /**
12023
     * Decodes an UTF-8 string to ISO-8859-1.
12024
     *
12025
     * @param string $str           <p>The input string.</p>
12026
     * @param bool   $keepUtf8Chars
12027
     *
12028
     * @return string
12029
     */
12030 14
    public static function utf8_decode(string $str, bool $keepUtf8Chars = false): string
12031
    {
12032 14
        if ($str === '') {
12033 6
            return '';
12034
        }
12035
12036
        // save for later comparision
12037 14
        $str_backup = $str;
12038 14
        $len = \strlen($str);
12039
12040 14
        if (self::$ORD === null) {
12041
            self::$ORD = self::getData('ord');
12042
        }
12043
12044 14
        if (self::$CHR === null) {
12045
            self::$CHR = self::getData('chr');
12046
        }
12047
12048 14
        $noCharFound = '?';
12049
        /** @noinspection ForeachInvariantsInspection */
12050 14
        for ($i = 0, $j = 0; $i < $len; ++$i, ++$j) {
12051 14
            switch ($str[$i] & "\xF0") {
12052 14
                case "\xC0":
12053 13
                case "\xD0":
12054 13
                    $c = (self::$ORD[$str[$i] & "\x1F"] << 6) | self::$ORD[$str[++$i] & "\x3F"];
12055 13
                    $str[$j] = $c < 256 ? self::$CHR[$c] : $noCharFound;
12056
12057 13
                    break;
12058
12059
                /** @noinspection PhpMissingBreakStatementInspection */
12060 13
                case "\xF0":
12061
                    ++$i;
12062
12063
                // no break
12064
12065 13
                case "\xE0":
12066 11
                    $str[$j] = $noCharFound;
12067 11
                    $i += 2;
12068
12069 11
                    break;
12070
12071
                default:
12072 12
                    $str[$j] = $str[$i];
12073
            }
12074
        }
12075
12076 14
        $return = \substr($str, 0, $j);
12077 14
        if ($return === false) {
12078
            $return = '';
12079
        }
12080
12081
        if (
12082 14
            $keepUtf8Chars === true
12083
            &&
12084 14
            self::strlen($return) >= (int) self::strlen($str_backup)
12085
        ) {
12086 2
            return $str_backup;
12087
        }
12088
12089 14
        return $return;
12090
    }
12091
12092
    /**
12093
     * Encodes an ISO-8859-1 string to UTF-8.
12094
     *
12095
     * @param string $str <p>The input string.</p>
12096
     *
12097
     * @return string
12098
     */
12099 14
    public static function utf8_encode(string $str): string
12100
    {
12101 14
        if ($str === '') {
12102 14
            return '';
12103
        }
12104
12105 14
        $str = \utf8_encode($str);
12106
12107
        // the polyfill maybe return false
12108
        /** @noinspection CallableParameterUseCaseInTypeContextInspection */
12109
        /** @psalm-suppress TypeDoesNotContainType */
12110 14
        if ($str === false) {
12111
            return '';
12112
        }
12113
12114 14
        return $str;
12115
    }
12116
12117
    /**
12118
     * fix -> utf8-win1252 chars
12119
     *
12120
     * @param string $str <p>The input string.</p>
12121
     *
12122
     * @return string
12123
     *
12124
     * @deprecated <p>use "UTF8::fix_simple_utf8()"</p>
12125
     */
12126 2
    public static function utf8_fix_win1252_chars(string $str): string
12127
    {
12128 2
        return self::fix_simple_utf8($str);
12129
    }
12130
12131
    /**
12132
     * Returns an array with all utf8 whitespace characters.
12133
     *
12134
     * @see http://www.bogofilter.org/pipermail/bogofilter/2003-March/001889.html
12135
     *
12136
     * @return string[]
12137
     *                  An array with all known whitespace characters as values and the type of whitespace as keys
12138
     *                  as defined in above URL
12139
     */
12140 2
    public static function whitespace_table(): array
12141
    {
12142 2
        return self::$WHITESPACE_TABLE;
12143
    }
12144
12145
    /**
12146
     * Limit the number of words in a string.
12147
     *
12148
     * @param string $str      <p>The input string.</p>
12149
     * @param int    $limit    <p>The limit of words as integer.</p>
12150
     * @param string $strAddOn <p>Replacement for the striped string.</p>
12151
     *
12152
     * @return string
12153
     */
12154 2
    public static function words_limit(string $str, int $limit = 100, string $strAddOn = '…'): string
12155
    {
12156 2
        if ($str === '' || $limit < 1) {
12157 2
            return '';
12158
        }
12159
12160 2
        \preg_match('/^\\s*+(?:[^\\s]++\\s*+){1,' . $limit . '}/u', $str, $matches);
12161
12162
        if (
12163 2
            !isset($matches[0])
12164
            ||
12165 2
            \mb_strlen($str) === (int) \mb_strlen($matches[0])
12166
        ) {
12167 2
            return $str;
12168
        }
12169
12170 2
        return \rtrim($matches[0]) . $strAddOn;
12171
    }
12172
12173
    /**
12174
     * Wraps a string to a given number of characters
12175
     *
12176
     * @see http://php.net/manual/en/function.wordwrap.php
12177
     *
12178
     * @param string $str   <p>The input string.</p>
12179
     * @param int    $width [optional] <p>The column width.</p>
12180
     * @param string $break [optional] <p>The line is broken using the optional break parameter.</p>
12181
     * @param bool   $cut   [optional] <p>
12182
     *                      If the cut is set to true, the string is
12183
     *                      always wrapped at or before the specified width. So if you have
12184
     *                      a word that is larger than the given width, it is broken apart.
12185
     *                      </p>
12186
     *
12187
     * @return string
12188
     *                <p>The given string wrapped at the specified column.</p>
12189
     */
12190 12
    public static function wordwrap(
12191
        string $str,
12192
        int $width = 75,
12193
        string $break = "\n",
12194
        bool $cut = false
12195
    ): string {
12196 12
        if ($str === '' || $break === '') {
12197 4
            return '';
12198
        }
12199
12200 10
        $strSplit = \explode($break, $str);
12201 10
        if ($strSplit === false) {
12202
            return '';
12203
        }
12204
12205 10
        $chars = [];
12206 10
        $wordSplit = '';
12207 10
        foreach ($strSplit as $i => $iValue) {
12208 10
            if ($i) {
12209 3
                $chars[] = $break;
12210 3
                $wordSplit .= '#';
12211
            }
12212
12213 10
            foreach (self::str_split($iValue) as $c) {
12214 10
                $chars[] = $c;
12215 10
                if ($c === ' ') {
12216 3
                    $wordSplit .= ' ';
12217
                } else {
12218 10
                    $wordSplit .= '?';
12219
                }
12220
            }
12221
        }
12222
12223 10
        $strReturn = '';
12224 10
        $j = 0;
12225 10
        $b = -1;
12226 10
        $i = -1;
12227 10
        $wordSplit = \wordwrap($wordSplit, $width, '#', $cut);
12228
12229 10
        $max = \mb_strlen($wordSplit);
12230 10
        while (($b = \mb_strpos($wordSplit, '#', $b + 1)) !== false) {
12231 8
            for (++$i; $i < $b; ++$i) {
12232 8
                $strReturn .= $chars[$j];
12233 8
                unset($chars[$j++]);
12234
12235
                // prevent endless loop, e.g. if there is a error in the "mb_*" polyfill
12236 8
                if ($i > $max) {
12237
                    break 2;
12238
                }
12239
            }
12240
12241
            if (
12242 8
                $break === $chars[$j]
12243
                ||
12244 8
                $chars[$j] === ' '
12245
            ) {
12246 5
                unset($chars[$j++]);
12247
            }
12248
12249 8
            $strReturn .= $break;
12250
12251
            // prevent endless loop, e.g. if there is a error in the "mb_*" polyfill
12252 8
            if ($b > $max) {
12253
                break;
12254
            }
12255
        }
12256
12257 10
        return $strReturn . \implode('', $chars);
12258
    }
12259
12260
    /**
12261
     * Line-Wrap the string after $limit, but split the string by "$delimiter" before ...
12262
     *    ... so that we wrap the per line.
12263
     *
12264
     * @param string      $str           <p>The input string.</p>
12265
     * @param int         $width         [optional] <p>The column width.</p>
12266
     * @param string      $break         [optional] <p>The line is broken using the optional break parameter.</p>
12267
     * @param bool        $cut           [optional] <p>
12268
     *                                   If the cut is set to true, the string is
12269
     *                                   always wrapped at or before the specified width. So if you have
12270
     *                                   a word that is larger than the given width, it is broken apart.
12271
     *                                   </p>
12272
     * @param bool        $addFinalBreak [optional] <p>
12273
     *                                   If this flag is true, then the method will add a $break at the end
12274
     *                                   of the result string.
12275
     *                                   </p>
12276
     * @param string|null $delimiter     [optional] <p>
12277
     *                                   You can change the default behavior, where we split the string by newline.
12278
     *                                   </p>
12279
     *
12280
     * @return string
12281
     */
12282 1
    public static function wordwrap_per_line(
12283
        string $str,
12284
        int $width = 75,
12285
        string $break = "\n",
12286
        bool $cut = false,
12287
        bool $addFinalBreak = true,
12288
        string $delimiter = null
12289
    ): string {
12290 1
        if ($delimiter === null) {
12291 1
            $strings = \preg_split('/\\r\\n|\\r|\\n/', $str);
12292
        } else {
12293 1
            $strings = \explode($delimiter, $str);
12294
        }
12295
12296 1
        $stringArray = [];
12297 1
        if ($strings !== false) {
12298 1
            foreach ($strings as $value) {
12299 1
                $stringArray[] = self::wordwrap($value, $width, $break, $cut);
12300
            }
12301
        }
12302
12303 1
        if ($addFinalBreak) {
12304 1
            $finalBreak = $break;
12305
        } else {
12306 1
            $finalBreak = '';
12307
        }
12308
12309 1
        return \implode($delimiter ?? "\n", $stringArray) . $finalBreak;
12310
    }
12311
12312
    /**
12313
     * Returns an array of Unicode White Space characters.
12314
     *
12315
     * @return string[] an array with numeric code point as key and White Space Character as value
12316
     */
12317 2
    public static function ws(): array
12318
    {
12319 2
        return self::$WHITESPACE;
12320
    }
12321
12322
    /**
12323
     * @param string $str
12324
     * @param bool   $useLower     <p>Use uppercase by default, otherwise use lowecase.</p>
12325
     * @param bool   $fullCaseFold <p>Convert not only common cases.</p>
12326
     *
12327
     * @return string
12328
     */
12329 33
    private static function fixStrCaseHelper(string $str, $useLower = false, $fullCaseFold = false): string
12330
    {
12331 33
        $upper = self::$COMMON_CASE_FOLD['upper'];
12332 33
        $lower = self::$COMMON_CASE_FOLD['lower'];
12333
12334 33
        if ($useLower === true) {
12335 2
            $str = \str_replace(
12336 2
                $upper,
12337 2
                $lower,
12338 2
                $str
12339
            );
12340
        } else {
12341 31
            $str = \str_replace(
12342 31
                $lower,
12343 31
                $upper,
12344 31
                $str
12345
            );
12346
        }
12347
12348 33
        if ($fullCaseFold) {
12349 31
            static $FULL_CASE_FOLD = null;
12350 31
            if ($FULL_CASE_FOLD === null) {
12351 1
                $FULL_CASE_FOLD = self::getData('caseFolding_full');
12352
            }
12353
12354 31
            if ($useLower === true) {
12355 2
                $str = \str_replace($FULL_CASE_FOLD[0], $FULL_CASE_FOLD[1], $str);
12356
            } else {
12357 29
                $str = \str_replace($FULL_CASE_FOLD[1], $FULL_CASE_FOLD[0], $str);
12358
            }
12359
        }
12360
12361 33
        return $str;
12362
    }
12363
12364
    /**
12365
     * get data from "/data/*.php"
12366
     *
12367
     * @param string $file
12368
     *
12369
     * @return array
12370
     */
12371 6
    private static function getData(string $file): array
12372
    {
12373
        /** @noinspection PhpIncludeInspection */
12374
        /** @noinspection UsingInclusionReturnValueInspection */
12375
        /** @psalm-suppress UnresolvableInclude */
12376 6
        return include __DIR__ . '/data/' . $file . '.php';
12377
    }
12378
12379
    /**
12380
     * get data from "/data/*.php"
12381
     *
12382
     * @param string $file
12383
     *
12384
     * @return false|mixed will return false on error
12385
     */
12386 9
    private static function getDataIfExists(string $file)
12387
    {
12388 9
        $file = __DIR__ . '/data/' . $file . '.php';
12389 9
        if (\file_exists($file)) {
12390
            /** @noinspection PhpIncludeInspection */
12391
            /** @noinspection UsingInclusionReturnValueInspection */
12392 8
            return include $file;
12393
        }
12394
12395 2
        return false;
12396
    }
12397
12398
    /**
12399
     * @return true|null
12400
     */
12401 12
    private static function initEmojiData()
12402
    {
12403 12
        if (self::$EMOJI_KEYS_CACHE === null) {
12404 1
            if (self::$EMOJI === null) {
12405 1
                self::$EMOJI = self::getData('emoji');
12406
            }
12407
12408 1
            \uksort(
12409 1
                self::$EMOJI,
12410
                static function (string $a, string $b): int {
12411 1
                    return \strlen($b) <=> \strlen($a);
12412 1
                }
12413
            );
12414
12415 1
            self::$EMOJI_KEYS_CACHE = \array_keys(self::$EMOJI);
12416 1
            self::$EMOJI_VALUES_CACHE = \array_values(self::$EMOJI);
12417
12418 1
            foreach (self::$EMOJI_KEYS_CACHE as $key) {
12419 1
                $tmpKey = \crc32($key);
12420 1
                self::$EMOJI_KEYS_REVERSIBLE_CACHE[] = '_-_PORTABLE_UTF8_-_' . $tmpKey . '_-_' . \strrev((string) $tmpKey) . '_-_8FTU_ELBATROP_-_';
12421
            }
12422
12423 1
            return true;
12424
        }
12425
12426 12
        return null;
12427
    }
12428
12429
    /**
12430
     * Checks whether mbstring "overloaded" is active on the server.
12431
     *
12432
     * @return bool
12433
     */
12434
    private static function mbstring_overloaded(): bool
12435
    {
12436
        /**
12437
         * INI directive 'mbstring.func_overload' is deprecated since PHP 7.2
12438
         */
12439
12440
        /** @noinspection PhpComposerExtensionStubsInspection */
12441
        /** @noinspection PhpUsageOfSilenceOperatorInspection */
12442
        return \defined('MB_OVERLOAD_STRING')
12443
               &&
12444
               ((int) @\ini_get('mbstring.func_overload') & \MB_OVERLOAD_STRING);
12445
    }
12446
12447
    /**
12448
     * @param array $strings
12449
     * @param bool  $removeEmptyValues
12450
     * @param int   $removeShortValues
12451
     *
12452
     * @return array
12453
     */
12454 2
    private static function reduce_string_array(array $strings, bool $removeEmptyValues, int $removeShortValues = null): array
12455
    {
12456
        // init
12457 2
        $return = [];
12458
12459 2
        foreach ($strings as &$str) {
12460
            if (
12461 2
                $removeShortValues !== null
12462
                &&
12463 2
                \mb_strlen($str) <= $removeShortValues
12464
            ) {
12465 2
                continue;
12466
            }
12467
12468
            if (
12469 2
                $removeEmptyValues === true
12470
                &&
12471 2
                \trim($str) === ''
12472
            ) {
12473 2
                continue;
12474
            }
12475
12476 2
            $return[] = $str;
12477
        }
12478
12479 2
        return $return;
12480
    }
12481
12482
    /**
12483
     * rxClass
12484
     *
12485
     * @param string $s
12486
     * @param string $class
12487
     *
12488
     * @return string
12489
     */
12490 33
    private static function rxClass(string $s, string $class = ''): string
12491
    {
12492 33
        static $RX_CLASS_CACHE = [];
12493
12494 33
        $cacheKey = $s . $class;
12495
12496 33
        if (isset($RX_CLASS_CACHE[$cacheKey])) {
12497 21
            return $RX_CLASS_CACHE[$cacheKey];
12498
        }
12499
12500 16
        $classArray = [$class];
12501
12502
        /** @noinspection SuspiciousLoopInspection */
12503
        /** @noinspection AlterInForeachInspection */
12504 16
        foreach (self::str_split($s) as &$s) {
0 ignored issues
show
Bug introduced by
The expression self::str_split($s) cannot be used as a reference.

Let?s assume that you have the following foreach statement:

foreach ($array as &$itemValue) { }

$itemValue is assigned by reference. This is possible because the expression (in the example $array) can be used as a reference target.

However, if we were to replace $array with something different like the result of a function call as in

foreach (getArray() as &$itemValue) { }

then assigning by reference is not possible anymore as there is no target that could be modified.

Available Fixes

1. Do not assign by reference
foreach (getArray() as $itemValue) { }
2. Assign to a local variable first
$array = getArray();
foreach ($array as &$itemValue) {}
3. Return a reference
function &getArray() { $array = array(); return $array; }

foreach (getArray() as &$itemValue) { }
Loading history...
12505 15
            if ($s === '-') {
12506
                $classArray[0] = '-' . $classArray[0];
12507 15
            } elseif (!isset($s[2])) {
12508 15
                $classArray[0] .= \preg_quote($s, '/');
12509 1
            } elseif (self::strlen($s) === 1) {
12510 1
                $classArray[0] .= $s;
12511
            } else {
12512 15
                $classArray[] = $s;
12513
            }
12514
        }
12515
12516 16
        if ($classArray[0]) {
12517 16
            $classArray[0] = '[' . $classArray[0] . ']';
12518
        }
12519
12520 16
        if (\count($classArray) === 1) {
12521 16
            $return = $classArray[0];
12522
        } else {
12523
            $return = '(?:' . \implode('|', $classArray) . ')';
12524
        }
12525
12526 16
        $RX_CLASS_CACHE[$cacheKey] = $return;
12527
12528 16
        return $return;
12529
    }
12530
12531
    /**
12532
     * Personal names such as "Marcus Aurelius" are sometimes typed incorrectly using lowercase ("marcus aurelius").
12533
     *
12534
     * @param string $names
12535
     * @param string $delimiter
12536
     * @param string $encoding
12537
     *
12538
     * @return string
12539
     */
12540 1
    private static function str_capitalize_name_helper(string $names, string $delimiter, string $encoding = 'UTF-8'): string
12541
    {
12542
        // init
12543 1
        $namesArray = \explode($delimiter, $names);
12544
12545 1
        if ($namesArray === false) {
12546
            return '';
12547
        }
12548
12549
        $specialCases = [
12550 1
            'names' => [
12551
                'ab',
12552
                'af',
12553
                'al',
12554
                'and',
12555
                'ap',
12556
                'bint',
12557
                'binte',
12558
                'da',
12559
                'de',
12560
                'del',
12561
                'den',
12562
                'der',
12563
                'di',
12564
                'dit',
12565
                'ibn',
12566
                'la',
12567
                'mac',
12568
                'nic',
12569
                'of',
12570
                'ter',
12571
                'the',
12572
                'und',
12573
                'van',
12574
                'von',
12575
                'y',
12576
                'zu',
12577
            ],
12578
            'prefixes' => [
12579
                'al-',
12580
                "d'",
12581
                'ff',
12582
                "l'",
12583
                'mac',
12584
                'mc',
12585
                'nic',
12586
            ],
12587
        ];
12588
12589 1
        foreach ($namesArray as &$name) {
12590 1
            if (\in_array($name, $specialCases['names'], true)) {
12591 1
                continue;
12592
            }
12593
12594 1
            $continue = false;
12595
12596 1
            if ($delimiter === '-') {
12597
                /** @noinspection AlterInForeachInspection */
12598 1
                foreach ((array) $specialCases['names'] as &$beginning) {
12599 1
                    if (self::strpos($name, $beginning, 0, $encoding) === 0) {
12600 1
                        $continue = true;
12601
                    }
12602
                }
12603
            }
12604
12605
            /** @noinspection AlterInForeachInspection */
12606 1
            foreach ((array) $specialCases['prefixes'] as &$beginning) {
12607 1
                if (self::strpos($name, $beginning, 0, $encoding) === 0) {
12608 1
                    $continue = true;
12609
                }
12610
            }
12611
12612 1
            if ($continue === true) {
12613 1
                continue;
12614
            }
12615
12616 1
            $name = self::ucfirst($name);
12617
        }
12618
12619 1
        return \implode($delimiter, $namesArray);
12620
    }
12621
12622
    /**
12623
     * Generic case sensitive transformation for collation matching.
12624
     *
12625
     * @param string $str <p>The input string</p>
12626
     *
12627
     * @return string|null
12628
     */
12629 6
    private static function strtonatfold(string $str)
12630
    {
12631 6
        return \preg_replace(
12632 6
            '/\p{Mn}+/u',
12633 6
            '',
12634 6
            \Normalizer::normalize($str, \Normalizer::NFD)
12635
        );
12636
    }
12637
12638
    /**
12639
     * @param int|string $input
12640
     *
12641
     * @return string
12642
     */
12643 31
    private static function to_utf8_convert_helper($input): string
12644
    {
12645
        // init
12646 31
        $buf = '';
12647
12648 31
        if (self::$ORD === null) {
12649 1
            self::$ORD = self::getData('ord');
12650
        }
12651
12652 31
        if (self::$CHR === null) {
12653 1
            self::$CHR = self::getData('chr');
12654
        }
12655
12656 31
        if (self::$WIN1252_TO_UTF8 === null) {
12657 1
            self::$WIN1252_TO_UTF8 = self::getData('win1252_to_utf8');
12658
        }
12659
12660 31
        $ordC1 = self::$ORD[$input];
12661 31
        if (isset(self::$WIN1252_TO_UTF8[$ordC1])) { // found in Windows-1252 special cases
12662 31
            $buf .= self::$WIN1252_TO_UTF8[$ordC1];
12663
        } else {
12664
            /** @noinspection OffsetOperationsInspection */
12665 1
            $cc1 = self::$CHR[$ordC1 / 64] | "\xC0";
12666 1
            $cc2 = ((string) $input & "\x3F") | "\x80";
0 ignored issues
show
Bug introduced by
Are you sure you want to use the bitwise | or did you mean ||?
Loading history...
Bug introduced by
Are you sure you want to use the bitwise & or did you mean &&?
Loading history...
12667 1
            $buf .= $cc1 . $cc2;
12668
        }
12669
12670 31
        return $buf;
12671
    }
12672
12673
    /**
12674
     * @param string $str
12675
     *
12676
     * @return string
12677
     */
12678 9
    private static function urldecode_unicode_helper(string $str): string
12679
    {
12680 9
        $pattern = '/%u([0-9a-fA-F]{3,4})/';
12681 9
        if (\preg_match($pattern, $str)) {
12682 7
            $str = (string) \preg_replace($pattern, '&#x\\1;', $str);
12683
        }
12684
12685 9
        return $str;
12686
    }
12687
}
12688