Passed
Push — master ( 2cbebb...3870cb )
by Lars
03:52
created

UTF8   F

Complexity

Total Complexity 1711

Size/Duplication

Total Lines 12665
Duplicated Lines 0 %

Test Coverage

Coverage 79.69%

Importance

Changes 90
Bugs 51 Features 5
Metric Value
eloc 4365
c 90
b 51
f 5
dl 0
loc 12665
ccs 3069
cts 3851
cp 0.7969
rs 0.8
wmc 1711

298 Methods

Rating   Name   Duplication   Size   Complexity  
A ctype_loaded() 0 3 1
A decode_mimeheader() 0 15 5
A css_stripe_media_queries() 0 6 1
A json_loaded() 0 3 1
A collapse_whitespace() 0 8 2
A max() 0 14 3
A remove_left() 0 21 4
A remove_html() 0 3 1
A replace_all() 0 11 2
A replace() 0 11 2
A remove_right() 0 22 4
A remove_html_breaks() 0 3 1
A regex_replace() 0 20 3
A normalize_line_ending() 0 3 1
A add_bom_to_string() 0 7 2
A chr_to_int() 0 3 1
A __construct() 0 2 1
B between() 0 48 8
A char_at() 0 7 2
A chars() 0 3 1
A access() 0 11 4
A callback() 0 3 1
A bom() 0 3 1
A first_char() 0 11 4
A finfo_loaded() 0 3 1
A chr_to_decimal() 0 30 6
A filter_input() 0 13 2
A array_change_key_case() 0 20 5
D chr() 0 101 18
A chunk_split() 0 3 1
A fix_utf8() 0 30 4
D getCharDirection() 0 105 118
A filter_var_array() 0 9 2
A chr_map() 0 5 1
A fits_inside() 0 3 1
A chr_size_list() 0 17 3
A filter_var() 0 9 2
A fix_simple_utf8() 0 19 4
A checkForSupport() 0 47 4
A filter_input_array() 0 9 2
A getSupportInfo() 0 13 3
A chr_to_hex() 0 11 3
A binary_to_str() 0 12 3
A str_substr_after_first_separator() 0 28 6
A file_has_bom() 0 8 2
A str_begins() 0 3 1
B str_camelize() 0 70 10
A parse_str() 0 16 4
A str_contains() 0 10 2
B str_to_lines() 0 29 8
A substr_in_byte() 0 18 6
A get_unique_string() 0 15 2
A is_bom() 0 10 3
A is_hexadecimal() 0 8 2
A strnatcasecmp() 0 5 1
A encode_mimeheader() 0 25 5
A substr_left() 0 15 4
A count_chars() 0 11 1
D strlen() 0 96 19
A str_isubstr_last() 0 20 4
A str_replace_beginning() 0 21 6
A has_uppercase() 0 8 2
B stripos() 0 59 11
A str_offset_exists() 0 10 2
D strrchr() 0 101 20
A to_filename() 0 24 2
A str_iends_with() 0 11 3
A max_chr_width() 0 8 2
A isBinary() 0 3 1
C utf8_decode() 0 60 13
A ltrim() 0 19 4
A emoji_decode() 0 16 2
D is_utf8() 0 144 31
B str_longest_common_suffix() 0 51 10
B ucfirst() 0 57 7
A lcword() 0 8 1
A str_pad_both() 0 7 1
A str_index_last() 0 11 1
A str_substr_last() 0 33 6
A mbstring_loaded() 0 3 1
A str_limit() 0 26 6
A html_escape() 0 6 1
A toUTF8() 0 3 1
A string() 0 10 1
C normalize_encoding() 0 134 14
C get_file_type() 0 96 15
A str_ensure_right() 0 13 4
B str_titleize_for_humans() 0 152 5
C is_utf16() 0 65 16
A isHtml() 0 3 1
C filter() 0 56 13
A normalize_whitespace() 0 30 6
A str_starts_with() 0 11 3
A isBase64() 0 3 1
A str_humanize() 0 15 1
A is_html() 0 14 2
C substr_count_in_byte() 0 54 15
A html_decode() 0 3 1
A strchr() 0 8 1
A strichr() 0 8 1
A isUtf32() 0 3 1
A str_index_first() 0 11 1
A strlen_in_byte() 0 12 3
A str_ireplace_ending() 0 21 6
A rtrim() 0 19 4
C str_longest_common_substring() 0 73 16
A titlecase() 0 24 5
A str_iindex_first() 0 11 1
B strtolower() 0 54 10
B urldecode() 0 37 8
A str_isubstr_before_first_separator() 0 16 5
B strrev() 0 43 10
D substr_replace() 0 124 27
A removeBOM() 0 3 1
A strstr_in_byte() 0 12 4
A emoji_encode() 0 16 2
A str_matches_pattern() 0 3 1
A is_alpha() 0 8 2
B str_titleize() 0 55 10
B get_random_string() 0 53 10
A str_replace_first() 0 17 2
A toLatin1() 0 3 1
A str_pad_right() 0 7 1
B ucwords() 0 48 9
A to_boolean() 0 35 5
C stristr() 0 68 15
A isUtf8() 0 3 1
A strncasecmp() 0 10 1
B strwidth() 0 40 8
A str_iends() 0 3 1
A trim() 0 19 4
A clean() 0 47 6
A is_serialized() 0 11 3
A str_upper_camelize() 0 8 1
A is_uppercase() 0 8 2
A substr_compare() 0 33 6
C substr_count() 0 62 16
A strnatcmp() 0 9 2
D str_pad() 0 146 16
A str_ireplace() 0 18 3
A to_latin1() 0 3 1
A str_replace_ending() 0 21 6
A string_has_bom() 0 10 3
B strtr() 0 33 8
A str_contains_all() 0 23 6
A is_ascii() 0 7 2
A str_isubstr_after_last_separator() 0 23 5
B range() 0 41 10
B strspn() 0 30 10
A strcasecmp() 0 5 1
A str_transliterate() 0 3 1
B rawurldecode() 0 37 8
A str_ends() 0 3 1
A utf8_encode() 0 16 3
A normalize_msword() 0 43 2
C str_detect_encoding() 0 76 13
A spaces_to_tabs() 0 11 3
A str_istarts_with() 0 11 3
A is_blank() 0 8 2
A str_replace() 0 14 1
A substr_iright() 0 15 4
A htmlspecialchars() 0 11 3
A decimal_to_chr() 0 3 1
A to_iso8859() 0 16 4
A words_limit() 0 17 5
A strip_tags() 0 15 4
A pcre_utf8_support() 0 4 1
A str_isubstr_before_last_separator() 0 21 6
D str_truncate_safe() 0 78 18
A codepoints() 0 29 4
A substr_right() 0 31 6
A lowerCaseFirst() 0 8 1
D str_split() 0 125 28
A str_ends_with_any() 0 13 4
A strrpos_in_byte() 0 12 4
A cleanup() 0 25 2
F strrpos() 0 118 25
A showSupport() 0 8 2
A remove_invisible_characters() 0 19 3
A single_chr_html_encode() 0 15 4
A str_replace_last() 0 19 2
A str_iindex_last() 0 11 1
A str_substr_before_last_separator() 0 31 6
B is_binary() 0 35 9
A intlChar_loaded() 0 3 1
B strtocasefold() 0 33 7
A lcfirst() 0 44 5
A tabs_to_spaces() 0 11 3
B str_truncate() 0 44 7
D strripos() 0 96 19
A strpos_in_byte() 0 12 4
A str_ends_with() 0 11 3
F to_ascii() 0 150 27
A is_binary_file() 0 16 3
A intl_loaded() 0 3 1
B str_longest_common_prefix() 0 48 8
A str_pad_left() 0 7 1
A str_substr_first() 0 33 6
A html_stripe_empty_tags() 0 6 1
A remove_bom() 0 21 5
A str_repeat() 0 5 1
A strpbrk() 0 11 4
A whitespace_table() 0 3 1
A substr_count_simple() 0 32 6
A str_iends_with_any() 0 13 4
A str_substr_after_last_separator() 0 28 6
A str_isubstr_after_first_separator() 0 23 5
F extract_text() 0 175 34
A isBom() 0 3 1
B str_snakeize() 0 55 6
A int_to_chr() 0 3 1
A is_lowercase() 0 8 2
A str_sort() 0 15 3
D to_utf8() 0 117 35
A ucword() 0 3 1
A str_underscored() 0 3 1
A str_offset_get() 0 14 4
A strip_whitespace() 0 7 2
A hasBom() 0 3 1
A toAscii() 0 3 1
A str_ibegins() 0 3 1
A str_capitalize_name() 0 8 1
B str_limit_after_word() 0 53 11
A iconv_loaded() 0 3 1
A lcwords() 0 31 6
A str_upper_first() 0 8 1
A isAscii() 0 3 1
A normalizeEncoding() 0 3 1
A swapCase() 0 17 4
A substr_ileft() 0 15 4
A is_empty() 0 3 1
B html_encode() 0 42 7
A str_dasherize() 0 3 1
A isUtf16() 0 3 1
A str_ensure_left() 0 11 3
F encode() 0 139 37
B urldecode_fix_win1252_chars() 0 227 1
A toIso8859() 0 3 1
C is_utf32() 0 65 16
C ord() 0 72 16
A is_alphanumeric() 0 8 2
A json_decode() 0 14 2
C strcspn() 0 51 12
B is_json() 0 27 8
A int_to_hex() 0 7 2
B str_split_pattern() 0 49 11
D strstr() 0 92 18
A has_lowercase() 0 8 2
A json_encode() 0 10 2
A str_isubstr_first() 0 25 4
A is_base64() 0 16 5
A str_last_char() 0 13 4
A str_ireplace_beginning() 0 21 6
A hex_to_int() 0 14 3
A htmlentities() 0 23 3
A hex_to_chr() 0 3 1
A str_substr_before_first_separator() 0 32 6
F substr() 0 143 32
A isJson() 0 3 1
A str_surround() 0 3 1
A strncmp() 0 19 4
A str_insert() 0 28 4
A utf8_fix_win1252_chars() 0 3 1
A replace_diamond_question_mark() 0 35 5
B str_delimit() 0 33 8
B strtoupper() 0 54 10
A min() 0 14 3
C html_entity_decode() 0 86 17
A str_starts_with_any() 0 17 5
B strrichr() 0 54 11
A split() 0 6 1
A str_istarts_with_any() 0 17 5
B str_contains_any() 0 29 8
A remove_duplicates() 0 14 4
B str_slice() 0 33 10
F strpos() 0 131 27
A str_shuffle() 0 35 6
A strcmp() 0 9 2
B file_get_contents() 0 55 11
A str_word_count() 0 23 5
A strripos_in_byte() 0 12 4
A str_to_binary() 0 9 2
A symfony_polyfill_used() 0 16 5
B str_to_words() 0 36 8
C wordwrap() 0 68 14
B rxClass() 0 39 8
A getData() 0 6 1
A ws() 0 3 1
A urldecode_unicode_helper() 0 8 2
B str_capitalize_name_helper() 0 80 10
A reduce_string_array() 0 26 6
A mbstring_overloaded() 0 11 2
A getDataIfExists() 0 10 2
A strtonatfold() 0 6 1
A fixStrCaseHelper() 0 33 5
A wordwrap_per_line() 0 28 5
A to_utf8_convert_helper() 0 28 5
A initEmojiData() 0 26 4

How to fix   Complexity   

Complex Class

Complex classes like UTF8 often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.

Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.

While breaking up the class, it is a good idea to analyze how other classes use UTF8, and based on these observations, apply Extract Interface, too.

1
<?php
2
3
declare(strict_types=1);
4
5
namespace voku\helper;
6
7
final class UTF8
8
{
9
    // (CRLF|([ZWNJ-ZWJ]|T+|L*(LV?V+|LV|LVT)T*|L+|[^Control])[Extend]*|[Control])
10
    // This regular expression is a work around for http://bugs.exim.org/1279
11
    const GRAPHEME_CLUSTER_RX = "(?:\r\n|(?:[ -~\x{200C}\x{200D}]|[ᆨ-ᇹ]+|[ᄀ-ᅟ]*(?:[가개갸걔거게겨계고과괘괴교구궈궤귀규그긔기까깨꺄꺠꺼께껴꼐꼬꽈꽤꾀꾜꾸꿔꿰뀌뀨끄끠끼나내냐냬너네녀녜노놔놰뇌뇨누눠눼뉘뉴느늬니다대댜댸더데뎌뎨도돠돼되됴두둬뒈뒤듀드듸디따때땨떄떠떼뗘뗴또똬뙈뙤뚀뚜뚸뛔뛰뜌뜨띄띠라래랴럐러레려례로롸뢔뢰료루뤄뤠뤼류르릐리마매먀먜머메며몌모뫄뫠뫼묘무뭐뭬뮈뮤므믜미바배뱌뱨버베벼볘보봐봬뵈뵤부붜붸뷔뷰브븨비빠빼뺘뺴뻐뻬뼈뼤뽀뽜뽸뾔뾰뿌뿨쀄쀠쀼쁘쁴삐사새샤섀서세셔셰소솨쇄쇠쇼수숴쉐쉬슈스싀시싸쌔쌰썌써쎄쎠쎼쏘쏴쐐쐬쑈쑤쒀쒜쒸쓔쓰씌씨아애야얘어에여예오와왜외요우워웨위유으의이자재쟈쟤저제져졔조좌좨죄죠주줘줴쥐쥬즈즤지짜째쨔쨰쩌쩨쪄쪠쪼쫘쫴쬐쬬쭈쭤쮀쮜쮸쯔쯰찌차채챠챼처체쳐쳬초촤쵀최쵸추춰췌취츄츠츼치카캐캬컈커케켜켸코콰쾌쾨쿄쿠쿼퀘퀴큐크킈키타태탸턔터테텨톄토톼퇘퇴툐투퉈퉤튀튜트틔티파패퍄퍠퍼페펴폐포퐈퐤푀표푸풔풰퓌퓨프픠피하해햐햬허헤혀혜호화홰회효후훠훼휘휴흐희히]?[ᅠ-ᆢ]+|[가-힣])[ᆨ-ᇹ]*|[ᄀ-ᅟ]+|[^\p{Cc}\p{Cf}\p{Zl}\p{Zp}])[\p{Mn}\p{Me}\x{09BE}\x{09D7}\x{0B3E}\x{0B57}\x{0BBE}\x{0BD7}\x{0CC2}\x{0CD5}\x{0CD6}\x{0D3E}\x{0D57}\x{0DCF}\x{0DDF}\x{200C}\x{200D}\x{1D165}\x{1D16E}-\x{1D172}]*|[\p{Cc}\p{Cf}\p{Zl}\p{Zp}])";
12
13
    /**
14
     * Bom => Byte-Length
15
     *
16
     * INFO: https://en.wikipedia.org/wiki/Byte_order_mark
17
     *
18
     * @var array
19
     */
20
    private static $BOM = [
21
        "\xef\xbb\xbf"     => 3, // UTF-8 BOM
22
        ''              => 6, // UTF-8 BOM as "WINDOWS-1252" (one char has [maybe] more then one byte ...)
23
        "\x00\x00\xfe\xff" => 4, // UTF-32 (BE) BOM
24
        '  þÿ'             => 6, // UTF-32 (BE) BOM as "WINDOWS-1252"
25
        "\xff\xfe\x00\x00" => 4, // UTF-32 (LE) BOM
26
        'ÿþ  '             => 6, // UTF-32 (LE) BOM as "WINDOWS-1252"
27
        "\xfe\xff"         => 2, // UTF-16 (BE) BOM
28
        'þÿ'               => 4, // UTF-16 (BE) BOM as "WINDOWS-1252"
29
        "\xff\xfe"         => 2, // UTF-16 (LE) BOM
30
        'ÿþ'               => 4, // UTF-16 (LE) BOM as "WINDOWS-1252"
31
    ];
32
33
    /**
34
     * Numeric code point => UTF-8 Character
35
     *
36
     * url: http://www.w3schools.com/charsets/ref_utf_punctuation.asp
37
     *
38
     * @var array
39
     */
40
    private static $WHITESPACE = [
41
        // NUL Byte
42
        0 => "\x0",
43
        // Tab
44
        9 => "\x9",
45
        // New Line
46
        10 => "\xa",
47
        // Vertical Tab
48
        11 => "\xb",
49
        // Carriage Return
50
        13 => "\xd",
51
        // Ordinary Space
52
        32 => "\x20",
53
        // NO-BREAK SPACE
54
        160 => "\xc2\xa0",
55
        // OGHAM SPACE MARK
56
        5760 => "\xe1\x9a\x80",
57
        // MONGOLIAN VOWEL SEPARATOR
58
        6158 => "\xe1\xa0\x8e",
59
        // EN QUAD
60
        8192 => "\xe2\x80\x80",
61
        // EM QUAD
62
        8193 => "\xe2\x80\x81",
63
        // EN SPACE
64
        8194 => "\xe2\x80\x82",
65
        // EM SPACE
66
        8195 => "\xe2\x80\x83",
67
        // THREE-PER-EM SPACE
68
        8196 => "\xe2\x80\x84",
69
        // FOUR-PER-EM SPACE
70
        8197 => "\xe2\x80\x85",
71
        // SIX-PER-EM SPACE
72
        8198 => "\xe2\x80\x86",
73
        // FIGURE SPACE
74
        8199 => "\xe2\x80\x87",
75
        // PUNCTUATION SPACE
76
        8200 => "\xe2\x80\x88",
77
        // THIN SPACE
78
        8201 => "\xe2\x80\x89",
79
        //HAIR SPACE
80
        8202 => "\xe2\x80\x8a",
81
        // LINE SEPARATOR
82
        8232 => "\xe2\x80\xa8",
83
        // PARAGRAPH SEPARATOR
84
        8233 => "\xe2\x80\xa9",
85
        // NARROW NO-BREAK SPACE
86
        8239 => "\xe2\x80\xaf",
87
        // MEDIUM MATHEMATICAL SPACE
88
        8287 => "\xe2\x81\x9f",
89
        // IDEOGRAPHIC SPACE
90
        12288 => "\xe3\x80\x80",
91
    ];
92
93
    /**
94
     * @var array
95
     */
96
    private static $WHITESPACE_TABLE = [
97
        'SPACE'                     => "\x20",
98
        'NO-BREAK SPACE'            => "\xc2\xa0",
99
        'OGHAM SPACE MARK'          => "\xe1\x9a\x80",
100
        'EN QUAD'                   => "\xe2\x80\x80",
101
        'EM QUAD'                   => "\xe2\x80\x81",
102
        'EN SPACE'                  => "\xe2\x80\x82",
103
        'EM SPACE'                  => "\xe2\x80\x83",
104
        'THREE-PER-EM SPACE'        => "\xe2\x80\x84",
105
        'FOUR-PER-EM SPACE'         => "\xe2\x80\x85",
106
        'SIX-PER-EM SPACE'          => "\xe2\x80\x86",
107
        'FIGURE SPACE'              => "\xe2\x80\x87",
108
        'PUNCTUATION SPACE'         => "\xe2\x80\x88",
109
        'THIN SPACE'                => "\xe2\x80\x89",
110
        'HAIR SPACE'                => "\xe2\x80\x8a",
111
        'LINE SEPARATOR'            => "\xe2\x80\xa8",
112
        'PARAGRAPH SEPARATOR'       => "\xe2\x80\xa9",
113
        'ZERO WIDTH SPACE'          => "\xe2\x80\x8b",
114
        'NARROW NO-BREAK SPACE'     => "\xe2\x80\xaf",
115
        'MEDIUM MATHEMATICAL SPACE' => "\xe2\x81\x9f",
116
        'IDEOGRAPHIC SPACE'         => "\xe3\x80\x80",
117
    ];
118
119
    /**
120
     * bidirectional text chars
121
     *
122
     * url: https://www.w3.org/International/questions/qa-bidi-unicode-controls
123
     *
124
     * @var array
125
     */
126
    private static $BIDI_UNI_CODE_CONTROLS_TABLE = [
127
        // LEFT-TO-RIGHT EMBEDDING (use -> dir = "ltr")
128
        8234 => "\xE2\x80\xAA",
129
        // RIGHT-TO-LEFT EMBEDDING (use -> dir = "rtl")
130
        8235 => "\xE2\x80\xAB",
131
        // POP DIRECTIONAL FORMATTING // (use -> </bdo>)
132
        8236 => "\xE2\x80\xAC",
133
        // LEFT-TO-RIGHT OVERRIDE // (use -> <bdo dir = "ltr">)
134
        8237 => "\xE2\x80\xAD",
135
        // RIGHT-TO-LEFT OVERRIDE // (use -> <bdo dir = "rtl">)
136
        8238 => "\xE2\x80\xAE",
137
        // LEFT-TO-RIGHT ISOLATE // (use -> dir = "ltr")
138
        8294 => "\xE2\x81\xA6",
139
        // RIGHT-TO-LEFT ISOLATE // (use -> dir = "rtl")
140
        8295 => "\xE2\x81\xA7",
141
        // FIRST STRONG ISOLATE // (use -> dir = "auto")
142
        8296 => "\xE2\x81\xA8",
143
        // POP DIRECTIONAL ISOLATE
144
        8297 => "\xE2\x81\xA9",
145
    ];
146
147
    /**
148
     * @var array
149
     */
150
    private static $COMMON_CASE_FOLD = [
151
        'upper' => [
152
            'µ',
153
            'ſ',
154
            "\xCD\x85",
155
            'ς',
156
            'ẞ',
157
            "\xCF\x90",
158
            "\xCF\x91",
159
            "\xCF\x95",
160
            "\xCF\x96",
161
            "\xCF\xB0",
162
            "\xCF\xB1",
163
            "\xCF\xB5",
164
            "\xE1\xBA\x9B",
165
            "\xE1\xBE\xBE",
166
        ],
167
        'lower' => [
168
            'μ',
169
            's',
170
            'ι',
171
            'σ',
172
            'ß',
173
            'β',
174
            'θ',
175
            'φ',
176
            'π',
177
            'κ',
178
            'ρ',
179
            'ε',
180
            "\xE1\xB9\xA1",
181
            'ι',
182
        ],
183
    ];
184
185
    /**
186
     * @var array
187
     */
188
    private static $SUPPORT = [];
189
190
    /**
191
     * @var array|null
192
     */
193
    private static $BROKEN_UTF8_FIX;
194
195
    /**
196
     * @var array|null
197
     */
198
    private static $WIN1252_TO_UTF8;
199
200
    /**
201
     * @var array|null
202
     */
203
    private static $INTL_TRANSLITERATOR_LIST;
204
205
    /**
206
     * @var array|null
207
     */
208
    private static $ENCODINGS;
209
210
    /**
211
     * @var array|null
212
     */
213
    private static $ORD;
214
215
    /**
216
     * @var array|null
217
     */
218
    private static $EMOJI;
219
220
    /**
221
     * @var array|null
222
     */
223
    private static $EMOJI_VALUES_CACHE;
224
225
    /**
226
     * @var array|null
227
     */
228
    private static $EMOJI_KEYS_CACHE;
229
230
    /**
231
     * @var array|null
232
     */
233
    private static $EMOJI_KEYS_REVERSIBLE_CACHE;
234
235
    /**
236
     * @var array|null
237
     */
238
    private static $CHR;
239
240
    /**
241
     * __construct()
242
     */
243 32
    public function __construct()
244
    {
245 32
    }
246
247
    /**
248
     * Return the character at the specified position: $str[1] like functionality.
249
     *
250
     * @param string $str      <p>A UTF-8 string.</p>
251
     * @param int    $pos      <p>The position of character to return.</p>
252
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
253
     *
254
     * @return string single multi-byte character
255
     */
256 3
    public static function access(string $str, int $pos, string $encoding = 'UTF-8'): string
257
    {
258 3
        if ($str === '' || $pos < 0) {
259 2
            return '';
260
        }
261
262 3
        if ($encoding === 'UTF-8') {
263 3
            return (string) \mb_substr($str, $pos, 1);
264
        }
265
266
        return (string) self::substr($str, $pos, 1, $encoding);
267
    }
268
269
    /**
270
     * Prepends UTF-8 BOM character to the string and returns the whole string.
271
     *
272
     * INFO: If BOM already existed there, the Input string is returned.
273
     *
274
     * @param string $str <p>The input string.</p>
275
     *
276
     * @return string the output string that contains BOM
277
     */
278 2
    public static function add_bom_to_string(string $str): string
279
    {
280 2
        if (self::string_has_bom($str) === false) {
281 2
            $str = self::bom() . $str;
282
        }
283
284 2
        return $str;
285
    }
286
287
    /**
288
     * Changes all keys in an array.
289
     *
290
     * @param array  $array    <p>The array to work on</p>
291
     * @param int    $case     [optional] <p> Either <strong>CASE_UPPER</strong><br>
292
     *                         or <strong>CASE_LOWER</strong> (default)</p>
293
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
294
     *
295
     * @return string[] an array with its keys lower or uppercased
296
     */
297 2
    public static function array_change_key_case(array $array, int $case = \CASE_LOWER, string $encoding = 'UTF-8'): array
298
    {
299
        if (
300 2
            $case !== \CASE_LOWER
301
            &&
302 2
            $case !== \CASE_UPPER
303
        ) {
304
            $case = \CASE_LOWER;
305
        }
306
307 2
        $return = [];
308 2
        foreach ($array as $key => &$value) {
309 2
            $key = $case === \CASE_LOWER
310 2
                ? self::strtolower((string) $key, $encoding)
311 2
                : self::strtoupper((string) $key, $encoding);
312
313 2
            $return[$key] = $value;
314
        }
315
316 2
        return $return;
317
    }
318
319
    /**
320
     * Returns the substring between $start and $end, if found, or an empty
321
     * string. An optional offset may be supplied from which to begin the
322
     * search for the start string.
323
     *
324
     * @param string $str
325
     * @param string $start    <p>Delimiter marking the start of the substring.</p>
326
     * @param string $end      <p>Delimiter marking the end of the substring.</p>
327
     * @param int    $offset   [optional] <p>Index from which to begin the search. Default: 0</p>
328
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
329
     *
330
     * @return string
331
     */
332 16
    public static function between(
333
        string $str,
334
        string $start,
335
        string $end,
336
        int $offset = 0,
337
        string $encoding = 'UTF-8'
338
    ): string {
339 16
        if ($encoding === 'UTF-8') {
340 8
            $posStart = \mb_strpos($str, $start, $offset);
341 8
            if ($posStart === false) {
342 1
                return '';
343
            }
344
345 7
            $substrIndex = $posStart + (int) \mb_strlen($start);
346 7
            $posEnd = \mb_strpos($str, $end, $substrIndex);
347
            if (
348 7
                $posEnd === false
349
                ||
350 7
                $posEnd === $substrIndex
351
            ) {
352 2
                return '';
353
            }
354
355 5
            return (string) \mb_substr($str, $substrIndex, $posEnd - $substrIndex);
356
        }
357
358 8
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
359
360 8
        $posStart = self::strpos($str, $start, $offset, $encoding);
361 8
        if ($posStart === false) {
362 1
            return '';
363
        }
364
365 7
        $substrIndex = $posStart + (int) self::strlen($start, $encoding);
366 7
        $posEnd = self::strpos($str, $end, $substrIndex, $encoding);
367
        if (
368 7
            $posEnd === false
369
            ||
370 7
            $posEnd === $substrIndex
371
        ) {
372 2
            return '';
373
        }
374
375 5
        return (string) self::substr(
376 5
            $str,
377 5
            $substrIndex,
378 5
            $posEnd - $substrIndex,
379 5
            $encoding
380
        );
381
    }
382
383
    /**
384
     * Convert binary into an string.
385
     *
386
     * @param mixed $bin 1|0
387
     *
388
     * @return string
389
     */
390 2
    public static function binary_to_str($bin): string
391
    {
392 2
        if (!isset($bin[0])) {
393
            return '';
394
        }
395
396 2
        $convert = \base_convert($bin, 2, 16);
397 2
        if ($convert === '0') {
398 1
            return '';
399
        }
400
401 2
        return \pack('H*', $convert);
402
    }
403
404
    /**
405
     * Returns the UTF-8 Byte Order Mark Character.
406
     *
407
     * INFO: take a look at UTF8::$bom for e.g. UTF-16 and UTF-32 BOM values
408
     *
409
     * @return string UTF-8 Byte Order Mark
410
     */
411 4
    public static function bom(): string
412
    {
413 4
        return "\xef\xbb\xbf";
414
    }
415
416
    /**
417
     * @alias of UTF8::chr_map()
418
     *
419
     * @param array|string $callback
420
     * @param string       $str
421
     *
422
     * @return string[]
423
     *
424
     * @see UTF8::chr_map()
425
     */
426 2
    public static function callback($callback, string $str): array
427
    {
428 2
        return self::chr_map($callback, $str);
429
    }
430
431
    /**
432
     * Returns the character at $index, with indexes starting at 0.
433
     *
434
     * @param string $str      <p>The input string.</p>
435
     * @param int    $index    <p>Position of the character.</p>
436
     * @param string $encoding [optional] <p>Default is UTF-8</p>
437
     *
438
     * @return string the character at $index
439
     */
440 9
    public static function char_at(string $str, int $index, string $encoding = 'UTF-8'): string
441
    {
442 9
        if ($encoding === 'UTF-8') {
443 5
            return (string) \mb_substr($str, $index, 1);
444
        }
445
446 4
        return (string) self::substr($str, $index, 1, $encoding);
447
    }
448
449
    /**
450
     * Returns an array consisting of the characters in the string.
451
     *
452
     * @param string $str <p>The input string.</p>
453
     *
454
     * @return string[] an array of chars
455
     */
456 3
    public static function chars(string $str): array
457
    {
458 3
        return self::str_split($str);
459
    }
460
461
    /**
462
     * This method will auto-detect your server environment for UTF-8 support.
463
     *
464
     * @return true|null
465
     *
466
     * @internal <p>You don't need to run it manually, it will be triggered if it's needed.</p>
467
     */
468 5
    public static function checkForSupport()
469
    {
470 5
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
471
            self::$SUPPORT['already_checked_via_portable_utf8'] = true;
472
473
            // http://php.net/manual/en/book.mbstring.php
474
            self::$SUPPORT['mbstring'] = self::mbstring_loaded();
475
            self::$SUPPORT['mbstring_func_overload'] = self::mbstring_overloaded();
476
            if (self::$SUPPORT['mbstring'] === true) {
477
                \mb_internal_encoding('UTF-8');
478
                /** @noinspection UnusedFunctionResultInspection */
479
                /** @noinspection PhpComposerExtensionStubsInspection */
480
                \mb_regex_encoding('UTF-8');
481
                self::$SUPPORT['mbstring_internal_encoding'] = 'UTF-8';
482
            }
483
484
            // http://php.net/manual/en/book.iconv.php
485
            self::$SUPPORT['iconv'] = self::iconv_loaded();
486
487
            // http://php.net/manual/en/book.intl.php
488
            self::$SUPPORT['intl'] = self::intl_loaded();
489
490
            // http://php.net/manual/en/class.intlchar.php
491
            self::$SUPPORT['intlChar'] = self::intlChar_loaded();
492
493
            // http://php.net/manual/en/book.ctype.php
494
            self::$SUPPORT['ctype'] = self::ctype_loaded();
495
496
            // http://php.net/manual/en/class.finfo.php
497
            self::$SUPPORT['finfo'] = self::finfo_loaded();
498
499
            // http://php.net/manual/en/book.json.php
500
            self::$SUPPORT['json'] = self::json_loaded();
501
502
            // http://php.net/manual/en/book.pcre.php
503
            self::$SUPPORT['pcre_utf8'] = self::pcre_utf8_support();
504
505
            self::$SUPPORT['symfony_polyfill_used'] = self::symfony_polyfill_used();
506
            if (self::$SUPPORT['symfony_polyfill_used'] === true) {
507
                \mb_internal_encoding('UTF-8');
508
                self::$SUPPORT['mbstring_internal_encoding'] = 'UTF-8';
509
            }
510
511
            return true;
512
        }
513
514 5
        return null;
515
    }
516
517
    /**
518
     * Generates a UTF-8 encoded character from the given code point.
519
     *
520
     * INFO: opposite to UTF8::ord()
521
     *
522
     * @param int|string $code_point <p>The code point for which to generate a character.</p>
523
     * @param string     $encoding   [optional] <p>Default is UTF-8</p>
524
     *
525
     * @return string|null multi-byte character, returns null on failure or empty input
526
     */
527 25
    public static function chr($code_point, string $encoding = 'UTF-8')
528
    {
529
        // init
530 25
        static $CHAR_CACHE = [];
531
532 25
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
533 4
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
534
        }
535
536
        if (
537 25
            $encoding !== 'UTF-8'
538
            &&
539 25
            $encoding !== 'ISO-8859-1'
540
            &&
541 25
            $encoding !== 'WINDOWS-1252'
542
            &&
543 25
            self::$SUPPORT['mbstring'] === false
544
        ) {
545
            \trigger_error('UTF8::chr() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
546
        }
547
548 25
        $cacheKey = $code_point . $encoding;
549 25
        if (isset($CHAR_CACHE[$cacheKey]) === true) {
550 23
            return $CHAR_CACHE[$cacheKey];
551
        }
552
553 13
        if ($code_point <= 127) { // use "simple"-char only until "\x80"
554
555 12
            if (self::$CHR === null) {
556
                self::$CHR = self::getData('chr');
557
            }
558
559
            /**
560
             * @psalm-suppress PossiblyNullArrayAccess
561
             */
562 12
            $chr = self::$CHR[$code_point];
563
564 12
            if ($encoding !== 'UTF-8') {
565 1
                $chr = self::encode($encoding, $chr);
566
            }
567
568 12
            return $CHAR_CACHE[$cacheKey] = $chr;
569
        }
570
571
        //
572
        // fallback via "IntlChar"
573
        //
574
575 7
        if (self::$SUPPORT['intlChar'] === true) {
576
            /** @noinspection PhpComposerExtensionStubsInspection */
577 7
            $chr = \IntlChar::chr($code_point);
578
579 7
            if ($encoding !== 'UTF-8') {
580
                $chr = self::encode($encoding, $chr);
581
            }
582
583 7
            return $CHAR_CACHE[$cacheKey] = $chr;
584
        }
585
586
        //
587
        // fallback via vanilla php
588
        //
589
590
        if (self::$CHR === null) {
591
            self::$CHR = self::getData('chr');
592
        }
593
594
        $code_point = (int) $code_point;
595
        if ($code_point <= 0x7F) {
596
            /**
597
             * @psalm-suppress PossiblyNullArrayAccess
598
             */
599
            $chr = self::$CHR[$code_point];
600
        } elseif ($code_point <= 0x7FF) {
601
            /**
602
             * @psalm-suppress PossiblyNullArrayAccess
603
             */
604
            $chr = self::$CHR[($code_point >> 6) + 0xC0] .
605
                   self::$CHR[($code_point & 0x3F) + 0x80];
606
        } elseif ($code_point <= 0xFFFF) {
607
            /**
608
             * @psalm-suppress PossiblyNullArrayAccess
609
             */
610
            $chr = self::$CHR[($code_point >> 12) + 0xE0] .
611
                   self::$CHR[(($code_point >> 6) & 0x3F) + 0x80] .
612
                   self::$CHR[($code_point & 0x3F) + 0x80];
613
        } else {
614
            /**
615
             * @psalm-suppress PossiblyNullArrayAccess
616
             */
617
            $chr = self::$CHR[($code_point >> 18) + 0xF0] .
618
                   self::$CHR[(($code_point >> 12) & 0x3F) + 0x80] .
619
                   self::$CHR[(($code_point >> 6) & 0x3F) + 0x80] .
620
                   self::$CHR[($code_point & 0x3F) + 0x80];
621
        }
622
623
        if ($encoding !== 'UTF-8') {
624
            $chr = self::encode($encoding, $chr);
625
        }
626
627
        return $CHAR_CACHE[$cacheKey] = $chr;
628
    }
629
630
    /**
631
     * Applies callback to all characters of a string.
632
     *
633
     * @param array|string $callback <p>The callback function.</p>
634
     * @param string       $str      <p>UTF-8 string to run callback on.</p>
635
     *
636
     * @return string[] the outcome of callback
637
     */
638 2
    public static function chr_map($callback, string $str): array
639
    {
640 2
        return \array_map(
641 2
            $callback,
642 2
            self::str_split($str)
643
        );
644
    }
645
646
    /**
647
     * Generates an array of byte length of each character of a Unicode string.
648
     *
649
     * 1 byte => U+0000  - U+007F
650
     * 2 byte => U+0080  - U+07FF
651
     * 3 byte => U+0800  - U+FFFF
652
     * 4 byte => U+10000 - U+10FFFF
653
     *
654
     * @param string $str <p>The original unicode string.</p>
655
     *
656
     * @return int[] an array of byte lengths of each character
657
     */
658 4
    public static function chr_size_list(string $str): array
659
    {
660 4
        if ($str === '') {
661 4
            return [];
662
        }
663
664 4
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
665
            return \array_map(
666
                static function (string $data): int {
667
                    // "mb_" is available if overload is used, so use it ...
668
                    return \mb_strlen($data, 'CP850'); // 8-BIT
669
                },
670
                self::str_split($str)
671
            );
672
        }
673
674 4
        return \array_map('\strlen', self::str_split($str));
675
    }
676
677
    /**
678
     * Get a decimal code representation of a specific character.
679
     *
680
     * @param string $char <p>The input character.</p>
681
     *
682
     * @return int
683
     */
684 4
    public static function chr_to_decimal(string $char): int
685
    {
686 4
        $code = self::ord($char[0]);
687 4
        $bytes = 1;
688
689 4
        if (!($code & 0x80)) {
690
            // 0xxxxxxx
691 4
            return $code;
692
        }
693
694 4
        if (($code & 0xe0) === 0xc0) {
695
            // 110xxxxx
696 4
            $bytes = 2;
697 4
            $code &= ~0xc0;
698 4
        } elseif (($code & 0xf0) === 0xe0) {
699
            // 1110xxxx
700 4
            $bytes = 3;
701 4
            $code &= ~0xe0;
702 2
        } elseif (($code & 0xf8) === 0xf0) {
703
            // 11110xxx
704 2
            $bytes = 4;
705 2
            $code &= ~0xf0;
706
        }
707
708 4
        for ($i = 2; $i <= $bytes; ++$i) {
709
            // 10xxxxxx
710 4
            $code = ($code << 6) + (self::ord($char[$i - 1]) & ~0x80);
711
        }
712
713 4
        return $code;
714
    }
715
716
    /**
717
     * Get hexadecimal code point (U+xxxx) of a UTF-8 encoded character.
718
     *
719
     * @param int|string $char <p>The input character</p>
720
     * @param string     $pfix [optional]
721
     *
722
     * @return string The code point encoded as U+xxxx
723
     */
724 2
    public static function chr_to_hex($char, string $pfix = 'U+'): string
725
    {
726 2
        if ($char === '') {
727 2
            return '';
728
        }
729
730 2
        if ($char === '&#0;') {
731 2
            $char = '';
732
        }
733
734 2
        return self::int_to_hex(self::ord((string) $char), $pfix);
735
    }
736
737
    /**
738
     * alias for "UTF8::chr_to_decimal()"
739
     *
740
     * @param string $chr
741
     *
742
     * @return int
743
     *
744
     * @see UTF8::chr_to_decimal()
745
     */
746 2
    public static function chr_to_int(string $chr): int
747
    {
748 2
        return self::chr_to_decimal($chr);
749
    }
750
751
    /**
752
     * Splits a string into smaller chunks and multiple lines, using the specified line ending character.
753
     *
754
     * @param string $body     <p>The original string to be split.</p>
755
     * @param int    $chunklen [optional] <p>The maximum character length of a chunk.</p>
756
     * @param string $end      [optional] <p>The character(s) to be inserted at the end of each chunk.</p>
757
     *
758
     * @return string the chunked string
759
     */
760 4
    public static function chunk_split(string $body, int $chunklen = 76, string $end = "\r\n"): string
761
    {
762 4
        return \implode($end, self::str_split($body, $chunklen));
763
    }
764
765
    /**
766
     * Accepts a string and removes all non-UTF-8 characters from it + extras if needed.
767
     *
768
     * @param string $str                           <p>The string to be sanitized.</p>
769
     * @param bool   $remove_bom                    [optional] <p>Set to true, if you need to remove UTF-BOM.</p>
770
     * @param bool   $normalize_whitespace          [optional] <p>Set to true, if you need to normalize the
771
     *                                              whitespace.</p>
772
     * @param bool   $normalize_msword              [optional] <p>Set to true, if you need to normalize MS Word chars
773
     *                                              e.g.: "…"
774
     *                                              => "..."</p>
775
     * @param bool   $keep_non_breaking_space       [optional] <p>Set to true, to keep non-breaking-spaces, in
776
     *                                              combination with
777
     *                                              $normalize_whitespace</p>
778
     * @param bool   $replace_diamond_question_mark [optional] <p>Set to true, if you need to remove diamond question
779
     *                                              mark e.g.: "�"</p>
780
     * @param bool   $remove_invisible_characters   [optional] <p>Set to false, if you not want to remove invisible
781
     *                                              characters e.g.: "\0"</p>
782
     *
783
     * @return string clean UTF-8 encoded string
784
     */
785 114
    public static function clean(
786
        string $str,
787
        bool $remove_bom = false,
788
        bool $normalize_whitespace = false,
789
        bool $normalize_msword = false,
790
        bool $keep_non_breaking_space = false,
791
        bool $replace_diamond_question_mark = false,
792
        bool $remove_invisible_characters = true
793
    ): string {
794
        // http://stackoverflow.com/questions/1401317/remove-non-utf8-characters-from-string
795
        // caused connection reset problem on larger strings
796
797 114
        $regex = '/
798
          (
799
            (?: [\x00-\x7F]               # single-byte sequences   0xxxxxxx
800
            |   [\xC0-\xDF][\x80-\xBF]    # double-byte sequences   110xxxxx 10xxxxxx
801
            |   [\xE0-\xEF][\x80-\xBF]{2} # triple-byte sequences   1110xxxx 10xxxxxx * 2
802
            |   [\xF0-\xF7][\x80-\xBF]{3} # quadruple-byte sequence 11110xxx 10xxxxxx * 3
803
            ){1,100}                      # ...one or more times
804
          )
805
        | ( [\x80-\xBF] )                 # invalid byte in range 10000000 - 10111111
806
        | ( [\xC0-\xFF] )                 # invalid byte in range 11000000 - 11111111
807
        /x';
808
        /** @noinspection NotOptimalRegularExpressionsInspection */
809 114
        $str = (string) \preg_replace($regex, '$1', $str);
810
811 114
        if ($replace_diamond_question_mark === true) {
812 60
            $str = self::replace_diamond_question_mark($str, '');
813
        }
814
815 114
        if ($remove_invisible_characters === true) {
816 114
            $str = self::remove_invisible_characters($str);
817
        }
818
819 114
        if ($normalize_whitespace === true) {
820 64
            $str = self::normalize_whitespace($str, $keep_non_breaking_space);
821
        }
822
823 114
        if ($normalize_msword === true) {
824 32
            $str = self::normalize_msword($str);
825
        }
826
827 114
        if ($remove_bom === true) {
828 64
            $str = self::remove_bom($str);
829
        }
830
831 114
        return $str;
832
    }
833
834
    /**
835
     * Clean-up a and show only printable UTF-8 chars at the end  + fix UTF-8 encoding.
836
     *
837
     * @param string $str <p>The input string.</p>
838
     *
839
     * @return string
840
     */
841 33
    public static function cleanup($str): string
842
    {
843
        // init
844 33
        $str = (string) $str;
845
846 33
        if ($str === '') {
847 5
            return '';
848
        }
849
850
        // fixed ISO <-> UTF-8 Errors
851 33
        $str = self::fix_simple_utf8($str);
852
853
        // remove all none UTF-8 symbols
854
        // && remove diamond question mark (�)
855
        // && remove remove invisible characters (e.g. "\0")
856
        // && remove BOM
857
        // && normalize whitespace chars (but keep non-breaking-spaces)
858 33
        return self::clean(
859 33
            $str,
860 33
            true,
861 33
            true,
862 33
            false,
863 33
            true,
864 33
            true,
865 33
            true
866
        );
867
    }
868
869
    /**
870
     * Accepts a string or a array of strings and returns an array of Unicode code points.
871
     *
872
     * INFO: opposite to UTF8::string()
873
     *
874
     * @param string|string[] $arg     <p>A UTF-8 encoded string or an array of such strings.</p>
875
     * @param bool            $u_style <p>If True, will return code points in U+xxxx format,
876
     *                                 default, code points will be returned as integers.</p>
877
     *
878
     * @return array<int|string>
879
     *                           The array of code points:<br>
880
     *                           array<int> for $u_style === false<br>
881
     *                           array<string> for $u_style === true<br>
882
     */
883 12
    public static function codepoints($arg, bool $u_style = false): array
884
    {
885 12
        if (\is_string($arg) === true) {
886 12
            $arg = self::str_split($arg);
887
        }
888
889 12
        $arg = \array_map(
890
            [
891 12
                self::class,
892
                'ord',
893
            ],
894 12
            $arg
895
        );
896
897 12
        if (\count($arg) === 0) {
898 7
            return [];
899
        }
900
901 11
        if ($u_style === true) {
902 2
            $arg = \array_map(
903
                [
904 2
                    self::class,
905
                    'int_to_hex',
906
                ],
907 2
                $arg
908
            );
909
        }
910
911 11
        return $arg;
912
    }
913
914
    /**
915
     * Trims the string and replaces consecutive whitespace characters with a
916
     * single space. This includes tabs and newline characters, as well as
917
     * multibyte whitespace such as the thin space and ideographic space.
918
     *
919
     * @param string $str <p>The input string.</p>
920
     *
921
     * @return string string with a trimmed $str and condensed whitespace
922
     */
923 13
    public static function collapse_whitespace(string $str): string
924
    {
925 13
        if (self::$SUPPORT['mbstring'] === true) {
926
            /** @noinspection PhpComposerExtensionStubsInspection */
927 13
            return \trim((string) \mb_ereg_replace('[[:space:]]+', ' ', $str));
928
        }
929
930
        return \trim(self::regex_replace($str, '[[:space:]]+', ' '));
931
    }
932
933
    /**
934
     * Returns count of characters used in a string.
935
     *
936
     * @param string $str                <p>The input string.</p>
937
     * @param bool   $cleanUtf8          [optional] <p>Remove non UTF-8 chars from the string.</p>
938
     * @param bool   $tryToUseMbFunction [optional] <p>Set to false, if you don't want to use
939
     *
940
     * @return int[] an associative array of Character as keys and
941
     *               their count as values
942
     */
943 19
    public static function count_chars(
944
        string $str,
945
        bool $cleanUtf8 = false,
946
        bool $tryToUseMbFunction = true
947
    ): array {
948 19
        return \array_count_values(
949 19
            self::str_split(
950 19
                $str,
951 19
                1,
952 19
                $cleanUtf8,
953 19
                $tryToUseMbFunction
954
            )
955
        );
956
    }
957
958
    /**
959
     * Remove css media-queries.
960
     *
961
     * @param string $str
962
     *
963
     * @return string
964
     */
965 1
    public static function css_stripe_media_queries(string $str): string
966
    {
967 1
        return (string) \preg_replace(
968 1
            '#@media\\s+(?:only\\s)?(?:[\\s{\\(]|screen|all)\\s?[^{]+{.*}\\s*}\\s*#isumU',
969 1
            '',
970 1
            $str
971
        );
972
    }
973
974
    /**
975
     * Checks whether ctype is available on the server.
976
     *
977
     * @return bool
978
     *              <strong>true</strong> if available, <strong>false</strong> otherwise
979
     */
980
    public static function ctype_loaded(): bool
981
    {
982
        return \extension_loaded('ctype');
983
    }
984
985
    /**
986
     * Converts a int-value into an UTF-8 character.
987
     *
988
     * @param mixed $int
989
     *
990
     * @return string
991
     */
992 19
    public static function decimal_to_chr($int): string
993
    {
994 19
        return self::html_entity_decode('&#' . $int . ';', \ENT_QUOTES | \ENT_HTML5);
995
    }
996
997
    /**
998
     * Decodes a MIME header field
999
     *
1000
     * @param string $str
1001
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
1002
     *
1003
     * @return false|string
1004
     *                      A decoded MIME field on success,
1005
     *                      or false if an error occurs during the decoding
1006
     */
1007
    public static function decode_mimeheader($str, string $encoding = 'UTF-8')
1008
    {
1009
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
1010
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
1011
        }
1012
1013
        if (self::$SUPPORT['iconv'] === true) {
1014
            return \iconv_mime_decode($str, \ICONV_MIME_DECODE_CONTINUE_ON_ERROR, $encoding);
1015
        }
1016
1017
        if ($encoding !== 'UTF-8') {
1018
            $str = self::encode($encoding, $str);
1019
        }
1020
1021
        return \mb_decode_mimeheader($str);
1022
    }
1023
1024
    /**
1025
     * Decodes a string which was encoded by "UTF8::emoji_encode()".
1026
     *
1027
     * @param string $str                        <p>The input string.</p>
1028
     * @param bool   $useReversibleStringMapping [optional] <p>
1029
     *                                           When <b>TRUE</b>, we se a reversible string mapping
1030
     *                                           between "emoji_encode" and "emoji_decode".</p>
1031
     *
1032
     * @return string
1033
     */
1034 9
    public static function emoji_decode(string $str, bool $useReversibleStringMapping = false): string
1035
    {
1036 9
        self::initEmojiData();
1037
1038 9
        if ($useReversibleStringMapping === true) {
1039 9
            return (string) \str_replace(
1040 9
                (array) self::$EMOJI_KEYS_REVERSIBLE_CACHE,
1041 9
                (array) self::$EMOJI_VALUES_CACHE,
1042 9
                $str
1043
            );
1044
        }
1045
1046 1
        return (string) \str_replace(
1047 1
            (array) self::$EMOJI_KEYS_CACHE,
1048 1
            (array) self::$EMOJI_VALUES_CACHE,
1049 1
            $str
1050
        );
1051
    }
1052
1053
    /**
1054
     * Encode a string with emoji chars into a non-emoji string.
1055
     *
1056
     * @param string $str                        <p>The input string</p>
1057
     * @param bool   $useReversibleStringMapping [optional] <p>
1058
     *                                           when <b>TRUE</b>, we se a reversible string mapping
1059
     *                                           between "emoji_encode" and "emoji_decode"</p>
1060
     *
1061
     * @return string
1062
     */
1063 12
    public static function emoji_encode(string $str, bool $useReversibleStringMapping = false): string
1064
    {
1065 12
        self::initEmojiData();
1066
1067 12
        if ($useReversibleStringMapping === true) {
1068 9
            return (string) \str_replace(
1069 9
                (array) self::$EMOJI_VALUES_CACHE,
1070 9
                (array) self::$EMOJI_KEYS_REVERSIBLE_CACHE,
1071 9
                $str
1072
            );
1073
        }
1074
1075 4
        return (string) \str_replace(
1076 4
            (array) self::$EMOJI_VALUES_CACHE,
1077 4
            (array) self::$EMOJI_KEYS_CACHE,
1078 4
            $str
1079
        );
1080
    }
1081
1082
    /**
1083
     * Encode a string with a new charset-encoding.
1084
     *
1085
     * INFO:  The different to "UTF8::utf8_encode()" is that this function, try to fix also broken / double encoding,
1086
     *        so you can call this function also on a UTF-8 String and you don't mess the string.
1087
     *
1088
     * @param string $toEncoding             <p>e.g. 'UTF-16', 'UTF-8', 'ISO-8859-1', etc.</p>
1089
     * @param string $str                    <p>The input string</p>
1090
     * @param bool   $autodetectFromEncoding [optional] <p>Force the new encoding (we try to fix broken / double
1091
     *                                       encoding for UTF-8)<br> otherwise we auto-detect the current
1092
     *                                       string-encoding</p>
1093
     * @param string $fromEncoding           [optional] <p>e.g. 'UTF-16', 'UTF-8', 'ISO-8859-1', etc.<br>
1094
     *                                       A empty string will trigger the autodetect anyway.</p>
1095
     *
1096
     * @return string
1097
     *
1098
     * @psalm-suppress InvalidReturnStatement
1099
     */
1100 28
    public static function encode(
1101
        string $toEncoding,
1102
        string $str,
1103
        bool $autodetectFromEncoding = true,
1104
        string $fromEncoding = ''
1105
    ): string {
1106 28
        if ($str === '' || $toEncoding === '') {
1107 13
            return $str;
1108
        }
1109
1110 28
        if ($toEncoding !== 'UTF-8' && $toEncoding !== 'CP850') {
1111 7
            $toEncoding = self::normalize_encoding($toEncoding, 'UTF-8');
1112
        }
1113
1114 28
        if ($fromEncoding && $fromEncoding !== 'UTF-8' && $fromEncoding !== 'CP850') {
1115 2
            $fromEncoding = self::normalize_encoding($fromEncoding, null);
1116
        }
1117
1118
        if (
1119 28
            $toEncoding
1120
            &&
1121 28
            $fromEncoding
1122
            &&
1123 28
            $fromEncoding === $toEncoding
1124
        ) {
1125
            return $str;
1126
        }
1127
1128 28
        if ($toEncoding === 'JSON') {
1129 1
            $return = self::json_encode($str);
1130 1
            if ($return === false) {
1131
                throw new \InvalidArgumentException('The input string [' . $str . '] can not be used for json_encode().');
1132
            }
1133
1134 1
            return $return;
1135
        }
1136 28
        if ($fromEncoding === 'JSON') {
1137 1
            $str = self::json_decode($str);
1138 1
            $fromEncoding = '';
1139
        }
1140
1141 28
        if ($toEncoding === 'BASE64') {
1142 2
            return \base64_encode($str);
1143
        }
1144 28
        if ($fromEncoding === 'BASE64') {
1145 2
            $str = \base64_decode($str, true);
1146 2
            $fromEncoding = '';
1147
        }
1148
1149 28
        if ($toEncoding === 'HTML-ENTITIES') {
1150 2
            return self::html_encode($str, true, 'UTF-8');
1151
        }
1152 28
        if ($fromEncoding === 'HTML-ENTITIES') {
1153 2
            $str = self::html_decode($str, \ENT_COMPAT, 'UTF-8');
1154 2
            $fromEncoding = '';
1155
        }
1156
1157 28
        $fromEncodingDetected = false;
1158
        if (
1159 28
            $autodetectFromEncoding === true
1160
            ||
1161 28
            !$fromEncoding
1162
        ) {
1163 28
            $fromEncodingDetected = self::str_detect_encoding($str);
1164
        }
1165
1166
        // DEBUG
1167
        //var_dump($toEncoding, $fromEncoding, $fromEncodingDetected, $str, "\n\n");
1168
1169 28
        if ($fromEncodingDetected !== false) {
1170 24
            $fromEncoding = $fromEncodingDetected;
1171 7
        } elseif ($autodetectFromEncoding === true) {
1172
            // fallback for the "autodetect"-mode
1173 7
            return self::to_utf8($str);
1174
        }
1175
1176
        if (
1177 24
            !$fromEncoding
1178
            ||
1179 24
            $fromEncoding === $toEncoding
1180
        ) {
1181 15
            return $str;
1182
        }
1183
1184
        if (
1185 19
            $toEncoding === 'UTF-8'
1186
            &&
1187
            (
1188 17
                $fromEncoding === 'WINDOWS-1252'
1189
                ||
1190 19
                $fromEncoding === 'ISO-8859-1'
1191
            )
1192
        ) {
1193 13
            return self::to_utf8($str);
1194
        }
1195
1196
        if (
1197 12
            $toEncoding === 'ISO-8859-1'
1198
            &&
1199
            (
1200 6
                $fromEncoding === 'WINDOWS-1252'
1201
                ||
1202 12
                $fromEncoding === 'UTF-8'
1203
            )
1204
        ) {
1205 6
            return self::to_iso8859($str);
1206
        }
1207
1208
        if (
1209 10
            $toEncoding !== 'UTF-8'
1210
            &&
1211 10
            $toEncoding !== 'ISO-8859-1'
1212
            &&
1213 10
            $toEncoding !== 'WINDOWS-1252'
1214
            &&
1215 10
            self::$SUPPORT['mbstring'] === false
1216
        ) {
1217
            \trigger_error('UTF8::encode() without mbstring cannot handle "' . $toEncoding . '" encoding', \E_USER_WARNING);
1218
        }
1219
1220 10
        if (self::$SUPPORT['mbstring'] === true) {
1221
            // warning: do not use the symfony polyfill here
1222 10
            $strEncoded = \mb_convert_encoding(
1223 10
                $str,
1224 10
                $toEncoding,
1225 10
                $fromEncoding
1226
            );
1227
1228 10
            if ($strEncoded) {
1229 10
                return $strEncoded;
1230
            }
1231
        }
1232
1233
        $return = \iconv($fromEncoding, $toEncoding, $str);
1234
        if ($return !== false) {
1235
            return $return;
1236
        }
1237
1238
        return $str;
1239
    }
1240
1241
    /**
1242
     * @param string $str
1243
     * @param string $fromCharset      [optional] <p>Set the input charset.</p>
1244
     * @param string $toCharset        [optional] <p>Set the output charset.</p>
1245
     * @param string $transferEncoding [optional] <p>Set the transfer encoding.</p>
1246
     * @param string $linefeed         [optional] <p>Set the used linefeed.</p>
1247
     * @param int    $indent           [optional] <p>Set the max length indent.</p>
1248
     *
1249
     * @return false|string
1250
     *                      An encoded MIME field on success,
1251
     *                      or false if an error occurs during the encoding
1252
     */
1253
    public static function encode_mimeheader(
1254
        $str,
1255
        $fromCharset = 'UTF-8',
1256
        $toCharset = 'UTF-8',
1257
        $transferEncoding = 'Q',
1258
        $linefeed = '\\r\\n',
1259
        $indent = 76
1260
    ) {
1261
        if ($fromCharset !== 'UTF-8' && $fromCharset !== 'CP850') {
1262
            $fromCharset = self::normalize_encoding($fromCharset, 'UTF-8');
1263
        }
1264
1265
        if ($toCharset !== 'UTF-8' && $toCharset !== 'CP850') {
1266
            $toCharset = self::normalize_encoding($toCharset, 'UTF-8');
1267
        }
1268
1269
        return \iconv_mime_encode(
1270
            '',
1271
            $str,
1272
            [
1273
                'scheme'           => $transferEncoding,
1274
                'line-length'      => $indent,
1275
                'input-charset'    => $fromCharset,
1276
                'output-charset'   => $toCharset,
1277
                'line-break-chars' => $linefeed,
1278
            ]
1279
        );
1280
    }
1281
1282
    /**
1283
     * Create an extract from a sentence, so if the search-string was found, it try to centered in the output.
1284
     *
1285
     * @param string   $str                    <p>The input string.</p>
1286
     * @param string   $search                 <p>The searched string.</p>
1287
     * @param int|null $length                 [optional] <p>Default: null === text->length / 2</p>
1288
     * @param string   $replacerForSkippedText [optional] <p>Default: …</p>
1289
     * @param string   $encoding               [optional] <p>Set the charset for e.g. "mb_" function</p>
1290
     *
1291
     * @return string
1292
     */
1293 1
    public static function extract_text(
1294
        string $str,
1295
        string $search = '',
1296
        int $length = null,
1297
        string $replacerForSkippedText = '…',
1298
        string $encoding = 'UTF-8'
1299
    ): string {
1300 1
        if ($str === '') {
1301 1
            return '';
1302
        }
1303
1304 1
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
1305
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
1306
        }
1307
1308 1
        $trimChars = "\t\r\n -_()!~?=+/*\\,.:;\"'[]{}`&";
1309
1310 1
        if ($length === null) {
1311 1
            $length = (int) \round((int) self::strlen($str, $encoding) / 2, 0);
1312
        }
1313
1314 1
        if ($search === '') {
1315 1
            if ($encoding === 'UTF-8') {
1316 1
                if ($length > 0) {
1317 1
                    $stringLength = (int) \mb_strlen($str);
1318 1
                    $end = ($length - 1) > $stringLength ? $stringLength : ($length - 1);
1319
                } else {
1320 1
                    $end = 0;
1321
                }
1322
1323 1
                $pos = (int) \min(
1324 1
                    \mb_strpos($str, ' ', $end),
1325 1
                    \mb_strpos($str, '.', $end)
1326
                );
1327
            } else {
1328
                if ($length > 0) {
1329
                    $stringLength = (int) self::strlen($str, $encoding);
1330
                    $end = ($length - 1) > $stringLength ? $stringLength : ($length - 1);
1331
                } else {
1332
                    $end = 0;
1333
                }
1334
1335
                $pos = (int) \min(
1336
                    self::strpos($str, ' ', $end, $encoding),
1337
                    self::strpos($str, '.', $end, $encoding)
1338
                );
1339
            }
1340
1341 1
            if ($pos) {
1342 1
                if ($encoding === 'UTF-8') {
1343 1
                    $strSub = \mb_substr($str, 0, $pos);
1344
                } else {
1345
                    $strSub = self::substr($str, 0, $pos, $encoding);
1346
                }
1347
1348 1
                if ($strSub === false) {
1349
                    return '';
1350
                }
1351
1352 1
                return \rtrim($strSub, $trimChars) . $replacerForSkippedText;
1353
            }
1354
1355
            return $str;
1356
        }
1357
1358 1
        if ($encoding === 'UTF-8') {
1359 1
            $wordPos = (int) \mb_stripos($str, $search);
1360 1
            $halfSide = (int) ($wordPos - $length / 2 + (int) \mb_strlen($search) / 2);
1361
        } else {
1362
            $wordPos = (int) self::stripos($str, $search, 0, $encoding);
1363
            $halfSide = (int) ($wordPos - $length / 2 + (int) self::strlen($search, $encoding) / 2);
1364
        }
1365
1366 1
        $pos_start = 0;
1367 1
        if ($halfSide > 0) {
1368 1
            if ($encoding === 'UTF-8') {
1369 1
                $halfText = \mb_substr($str, 0, $halfSide);
1370
            } else {
1371
                $halfText = self::substr($str, 0, $halfSide, $encoding);
1372
            }
1373 1
            if ($halfText !== false) {
1374 1
                if ($encoding === 'UTF-8') {
1375 1
                    $pos_start = (int) \max(
1376 1
                        \mb_strrpos($halfText, ' '),
1377 1
                        \mb_strrpos($halfText, '.')
1378
                    );
1379
                } else {
1380
                    $pos_start = (int) \max(
1381
                        self::strrpos($halfText, ' ', 0, $encoding),
1382
                        self::strrpos($halfText, '.', 0, $encoding)
1383
                    );
1384
                }
1385
            }
1386
        }
1387
1388 1
        if ($wordPos && $halfSide > 0) {
1389 1
            $offset = $pos_start + $length - 1;
1390 1
            $realLength = (int) self::strlen($str, $encoding);
1391
1392 1
            if ($offset > $realLength) {
1393
                $offset = $realLength;
1394
            }
1395
1396 1
            if ($encoding === 'UTF-8') {
1397 1
                $pos_end = (int) \min(
1398 1
                    \mb_strpos($str, ' ', $offset),
1399 1
                    \mb_strpos($str, '.', $offset)
1400 1
                    ) - $pos_start;
1401
            } else {
1402
                $pos_end = (int) \min(
1403
                    self::strpos($str, ' ', $offset, $encoding),
1404
                    self::strpos($str, '.', $offset, $encoding)
1405
                    ) - $pos_start;
1406
            }
1407
1408 1
            if (!$pos_end || $pos_end <= 0) {
1409 1
                if ($encoding === 'UTF-8') {
1410 1
                    $strSub = \mb_substr($str, $pos_start, (int) \mb_strlen($str));
1411
                } else {
1412
                    $strSub = self::substr($str, $pos_start, (int) self::strlen($str, $encoding), $encoding);
1413
                }
1414 1
                if ($strSub !== false) {
1415 1
                    $extract = $replacerForSkippedText . \ltrim($strSub, $trimChars);
1416
                } else {
1417 1
                    $extract = '';
1418
                }
1419
            } else {
1420 1
                if ($encoding === 'UTF-8') {
1421 1
                    $strSub = \mb_substr($str, $pos_start, $pos_end);
1422
                } else {
1423
                    $strSub = self::substr($str, $pos_start, $pos_end, $encoding);
1424
                }
1425 1
                if ($strSub !== false) {
1426 1
                    $extract = $replacerForSkippedText . \trim($strSub, $trimChars) . $replacerForSkippedText;
1427
                } else {
1428 1
                    $extract = '';
1429
                }
1430
            }
1431
        } else {
1432 1
            $offset = $length - 1;
1433 1
            $trueLength = (int) self::strlen($str, $encoding);
1434
1435 1
            if ($offset > $trueLength) {
1436
                $offset = $trueLength;
1437
            }
1438
1439 1
            if ($encoding === 'UTF-8') {
1440 1
                $pos_end = (int) \min(
1441 1
                    \mb_strpos($str, ' ', $offset),
1442 1
                    \mb_strpos($str, '.', $offset)
1443
                );
1444
            } else {
1445
                $pos_end = (int) \min(
1446
                    self::strpos($str, ' ', $offset, $encoding),
1447
                    self::strpos($str, '.', $offset, $encoding)
1448
                );
1449
            }
1450
1451 1
            if ($pos_end) {
1452 1
                if ($encoding === 'UTF-8') {
1453 1
                    $strSub = \mb_substr($str, 0, $pos_end);
1454
                } else {
1455
                    $strSub = self::substr($str, 0, $pos_end, $encoding);
1456
                }
1457 1
                if ($strSub !== false) {
1458 1
                    $extract = \rtrim($strSub, $trimChars) . $replacerForSkippedText;
1459
                } else {
1460 1
                    $extract = '';
1461
                }
1462
            } else {
1463 1
                $extract = $str;
1464
            }
1465
        }
1466
1467 1
        return $extract;
1468
    }
1469
1470
    /**
1471
     * Reads entire file into a string.
1472
     *
1473
     * WARNING: do not use UTF-8 Option ($convertToUtf8) for binary-files (e.g.: images) !!!
1474
     *
1475
     * @see http://php.net/manual/en/function.file-get-contents.php
1476
     *
1477
     * @param string        $filename         <p>
1478
     *                                        Name of the file to read.
1479
     *                                        </p>
1480
     * @param bool          $use_include_path [optional] <p>
1481
     *                                        Prior to PHP 5, this parameter is called
1482
     *                                        use_include_path and is a bool.
1483
     *                                        As of PHP 5 the FILE_USE_INCLUDE_PATH can be used
1484
     *                                        to trigger include path
1485
     *                                        search.
1486
     *                                        </p>
1487
     * @param resource|null $context          [optional] <p>
1488
     *                                        A valid context resource created with
1489
     *                                        stream_context_create. If you don't need to use a
1490
     *                                        custom context, you can skip this parameter by &null;.
1491
     *                                        </p>
1492
     * @param int|null      $offset           [optional] <p>
1493
     *                                        The offset where the reading starts.
1494
     *                                        </p>
1495
     * @param int|null      $maxLength        [optional] <p>
1496
     *                                        Maximum length of data read. The default is to read until end
1497
     *                                        of file is reached.
1498
     *                                        </p>
1499
     * @param int           $timeout          <p>The time in seconds for the timeout.</p>
1500
     * @param bool          $convertToUtf8    <strong>WARNING!!!</strong> <p>Maybe you can't use this option for
1501
     *                                        some files, because they used non default utf-8 chars. Binary files
1502
     *                                        like images or pdf will not be converted.</p>
1503
     * @param string        $fromEncoding     [optional] <p>e.g. 'UTF-16', 'UTF-8', 'ISO-8859-1', etc.<br>
1504
     *                                        A empty string will trigger the autodetect anyway.</p>
1505
     *
1506
     * @return false|string the function returns the read data as string or <b>false</b> on failure
1507
     */
1508 12
    public static function file_get_contents(
1509
        string $filename,
1510
        bool $use_include_path = false,
1511
        $context = null,
1512
        int $offset = null,
1513
        int $maxLength = null,
1514
        int $timeout = 10,
1515
        bool $convertToUtf8 = true,
1516
        string $fromEncoding = ''
1517
    ) {
1518
        // init
1519 12
        $filename = \filter_var($filename, \FILTER_SANITIZE_STRING);
1520 12
        if ($filename === false) {
1521
            return false;
1522
        }
1523
1524 12
        if ($timeout && $context === null) {
1525 9
            $context = \stream_context_create(
1526
                [
1527
                    'http' => [
1528 9
                        'timeout' => $timeout,
1529
                    ],
1530
                ]
1531
            );
1532
        }
1533
1534 12
        if ($offset === null) {
1535 12
            $offset = 0;
1536
        }
1537
1538 12
        if (\is_int($maxLength) === true) {
1539 2
            $data = \file_get_contents($filename, $use_include_path, $context, $offset, $maxLength);
1540
        } else {
1541 12
            $data = \file_get_contents($filename, $use_include_path, $context, $offset);
1542
        }
1543
1544
        // return false on error
1545 12
        if ($data === false) {
1546
            return false;
1547
        }
1548
1549 12
        if ($convertToUtf8 === true) {
1550
            if (
1551 12
                self::is_binary($data, true) !== true
1552
                ||
1553 9
                self::is_utf16($data, false) !== false
1554
                ||
1555 12
                self::is_utf32($data, false) !== false
1556
            ) {
1557 9
                $data = self::encode('UTF-8', $data, false, $fromEncoding);
1558 9
                $data = self::cleanup($data);
1559
            }
1560
        }
1561
1562 12
        return $data;
1563
    }
1564
1565
    /**
1566
     * Checks if a file starts with BOM (Byte Order Mark) character.
1567
     *
1568
     * @param string $file_path <p>Path to a valid file.</p>
1569
     *
1570
     * @throws \RuntimeException if file_get_contents() returned false
1571
     *
1572
     * @return bool
1573
     *              <strong>true</strong> if the file has BOM at the start, <strong>false</strong> otherwise
1574
     */
1575 2
    public static function file_has_bom(string $file_path): bool
1576
    {
1577 2
        $file_content = \file_get_contents($file_path);
1578 2
        if ($file_content === false) {
1579
            throw new \RuntimeException('file_get_contents() returned false for:' . $file_path);
1580
        }
1581
1582 2
        return self::string_has_bom($file_content);
1583
    }
1584
1585
    /**
1586
     * Normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1587
     *
1588
     * @param mixed  $var
1589
     * @param int    $normalization_form
1590
     * @param string $leading_combining
1591
     *
1592
     * @return mixed
1593
     */
1594 62
    public static function filter($var, int $normalization_form = \Normalizer::NFC, string $leading_combining = '◌')
1595
    {
1596 62
        switch (\gettype($var)) {
1597 62
            case 'array':
1598
                /** @noinspection ForeachSourceInspection */
1599 6
                foreach ($var as $k => &$v) {
1600 6
                    $v = self::filter($v, $normalization_form, $leading_combining);
1601
                }
1602 6
                unset($v);
1603
1604 6
                break;
1605 62
            case 'object':
1606
                /** @noinspection ForeachSourceInspection */
1607 4
                foreach ($var as $k => &$v) {
1608 4
                    $v = self::filter($v, $normalization_form, $leading_combining);
1609
                }
1610 4
                unset($v);
1611
1612 4
                break;
1613 62
            case 'string':
1614
1615 62
                if (\strpos($var, "\r") !== false) {
1616
                    // Workaround https://bugs.php.net/65732
1617 3
                    $var = self::normalize_line_ending($var);
1618
                }
1619
1620 62
                if (self::is_ascii($var) === false) {
1621 32
                    if (\Normalizer::isNormalized($var, $normalization_form)) {
1622 27
                        $n = '-';
1623
                    } else {
1624 12
                        $n = \Normalizer::normalize($var, $normalization_form);
1625
1626 12
                        if (isset($n[0])) {
1627 7
                            $var = $n;
1628
                        } else {
1629 8
                            $var = self::encode('UTF-8', $var, true);
1630
                        }
1631
                    }
1632
1633
                    if (
1634 32
                        $var[0] >= "\x80"
1635
                        &&
1636 32
                        isset($n[0], $leading_combining[0])
1637
                        &&
1638 32
                        \preg_match('/^\\p{Mn}/u', $var)
1639
                    ) {
1640
                        // Prevent leading combining chars
1641
                        // for NFC-safe concatenations.
1642 3
                        $var = $leading_combining . $var;
1643
                    }
1644
                }
1645
1646 62
                break;
1647
        }
1648
1649 62
        return $var;
1650
    }
1651
1652
    /**
1653
     * "filter_input()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1654
     *
1655
     * Gets a specific external variable by name and optionally filters it
1656
     *
1657
     * @see http://php.net/manual/en/function.filter-input.php
1658
     *
1659
     * @param int    $type          <p>
1660
     *                              One of <b>INPUT_GET</b>, <b>INPUT_POST</b>,
1661
     *                              <b>INPUT_COOKIE</b>, <b>INPUT_SERVER</b>, or
1662
     *                              <b>INPUT_ENV</b>.
1663
     *                              </p>
1664
     * @param string $variable_name <p>
1665
     *                              Name of a variable to get.
1666
     *                              </p>
1667
     * @param int    $filter        [optional] <p>
1668
     *                              The ID of the filter to apply. The
1669
     *                              manual page lists the available filters.
1670
     *                              </p>
1671
     * @param mixed  $options       [optional] <p>
1672
     *                              Associative array of options or bitwise disjunction of flags. If filter
1673
     *                              accepts options, flags can be provided in "flags" field of array.
1674
     *                              </p>
1675
     *
1676
     * @return mixed Value of the requested variable on success, <b>FALSE</b> if the filter fails, or <b>NULL</b> if the
1677
     *               <i>variable_name</i> variable is not set. If the flag <b>FILTER_NULL_ON_FAILURE</b> is used, it
1678
     *               returns <b>FALSE</b> if the variable is not set and <b>NULL</b> if the filter fails.
1679
     */
1680
    public static function filter_input(
1681
        int $type,
1682
        string $variable_name,
1683
        int $filter = \FILTER_DEFAULT,
1684
        $options = null
1685
    ) {
1686
        if (\func_num_args() < 4) {
1687
            $var = \filter_input($type, $variable_name, $filter);
1688
        } else {
1689
            $var = \filter_input($type, $variable_name, $filter, $options);
1690
        }
1691
1692
        return self::filter($var);
1693
    }
1694
1695
    /**
1696
     * "filter_input_array()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1697
     *
1698
     * Gets external variables and optionally filters them
1699
     *
1700
     * @see http://php.net/manual/en/function.filter-input-array.php
1701
     *
1702
     * @param int   $type       <p>
1703
     *                          One of <b>INPUT_GET</b>, <b>INPUT_POST</b>,
1704
     *                          <b>INPUT_COOKIE</b>, <b>INPUT_SERVER</b>, or
1705
     *                          <b>INPUT_ENV</b>.
1706
     *                          </p>
1707
     * @param mixed $definition [optional] <p>
1708
     *                          An array defining the arguments. A valid key is a string
1709
     *                          containing a variable name and a valid value is either a filter type, or an array
1710
     *                          optionally specifying the filter, flags and options. If the value is an
1711
     *                          array, valid keys are filter which specifies the
1712
     *                          filter type,
1713
     *                          flags which specifies any flags that apply to the
1714
     *                          filter, and options which specifies any options that
1715
     *                          apply to the filter. See the example below for a better understanding.
1716
     *                          </p>
1717
     *                          <p>
1718
     *                          This parameter can be also an integer holding a filter constant. Then all values in the
1719
     *                          input array are filtered by this filter.
1720
     *                          </p>
1721
     * @param bool  $add_empty  [optional] <p>
1722
     *                          Add missing keys as <b>NULL</b> to the return value.
1723
     *                          </p>
1724
     *
1725
     * @return mixed An array containing the values of the requested variables on success, or <b>FALSE</b> on failure.
1726
     *               An array value will be <b>FALSE</b> if the filter fails, or <b>NULL</b> if the variable is not
1727
     *               set. Or if the flag <b>FILTER_NULL_ON_FAILURE</b> is used, it returns <b>FALSE</b> if the variable
1728
     *               is not set and <b>NULL</b> if the filter fails.
1729
     */
1730
    public static function filter_input_array(int $type, $definition = null, bool $add_empty = true)
1731
    {
1732
        if (\func_num_args() < 2) {
1733
            $a = \filter_input_array($type);
1734
        } else {
1735
            $a = \filter_input_array($type, $definition, $add_empty);
1736
        }
1737
1738
        return self::filter($a);
1739
    }
1740
1741
    /**
1742
     * "filter_var()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1743
     *
1744
     * Filters a variable with a specified filter
1745
     *
1746
     * @see http://php.net/manual/en/function.filter-var.php
1747
     *
1748
     * @param mixed $variable <p>
1749
     *                        Value to filter.
1750
     *                        </p>
1751
     * @param int   $filter   [optional] <p>
1752
     *                        The ID of the filter to apply. The
1753
     *                        manual page lists the available filters.
1754
     *                        </p>
1755
     * @param mixed $options  [optional] <p>
1756
     *                        Associative array of options or bitwise disjunction of flags. If filter
1757
     *                        accepts options, flags can be provided in "flags" field of array. For
1758
     *                        the "callback" filter, callable type should be passed. The
1759
     *                        callback must accept one argument, the value to be filtered, and return
1760
     *                        the value after filtering/sanitizing it.
1761
     *                        </p>
1762
     *                        <p>
1763
     *                        <code>
1764
     *                        // for filters that accept options, use this format
1765
     *                        $options = array(
1766
     *                        'options' => array(
1767
     *                        'default' => 3, // value to return if the filter fails
1768
     *                        // other options here
1769
     *                        'min_range' => 0
1770
     *                        ),
1771
     *                        'flags' => FILTER_FLAG_ALLOW_OCTAL,
1772
     *                        );
1773
     *                        $var = filter_var('0755', FILTER_VALIDATE_INT, $options);
1774
     *                        // for filter that only accept flags, you can pass them directly
1775
     *                        $var = filter_var('oops', FILTER_VALIDATE_BOOLEAN, FILTER_NULL_ON_FAILURE);
1776
     *                        // for filter that only accept flags, you can also pass as an array
1777
     *                        $var = filter_var('oops', FILTER_VALIDATE_BOOLEAN,
1778
     *                        array('flags' => FILTER_NULL_ON_FAILURE));
1779
     *                        // callback validate filter
1780
     *                        function foo($value)
1781
     *                        {
1782
     *                        // Expected format: Surname, GivenNames
1783
     *                        if (strpos($value, ", ") === false) return false;
1784
     *                        list($surname, $givennames) = explode(", ", $value, 2);
1785
     *                        $empty = (empty($surname) || empty($givennames));
1786
     *                        $notstrings = (!is_string($surname) || !is_string($givennames));
1787
     *                        if ($empty || $notstrings) {
1788
     *                        return false;
1789
     *                        } else {
1790
     *                        return $value;
1791
     *                        }
1792
     *                        }
1793
     *                        $var = filter_var('Doe, Jane Sue', FILTER_CALLBACK, array('options' => 'foo'));
1794
     *                        </code>
1795
     *                        </p>
1796
     *
1797
     * @return mixed the filtered data, or <b>FALSE</b> if the filter fails
1798
     */
1799 2
    public static function filter_var($variable, int $filter = \FILTER_DEFAULT, $options = null)
1800
    {
1801 2
        if (\func_num_args() < 3) {
1802 2
            $variable = \filter_var($variable, $filter);
1803
        } else {
1804 2
            $variable = \filter_var($variable, $filter, $options);
1805
        }
1806
1807 2
        return self::filter($variable);
1808
    }
1809
1810
    /**
1811
     * "filter_var_array()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1812
     *
1813
     * Gets multiple variables and optionally filters them
1814
     *
1815
     * @see http://php.net/manual/en/function.filter-var-array.php
1816
     *
1817
     * @param array $data       <p>
1818
     *                          An array with string keys containing the data to filter.
1819
     *                          </p>
1820
     * @param mixed $definition [optional] <p>
1821
     *                          An array defining the arguments. A valid key is a string
1822
     *                          containing a variable name and a valid value is either a
1823
     *                          filter type, or an
1824
     *                          array optionally specifying the filter, flags and options.
1825
     *                          If the value is an array, valid keys are filter
1826
     *                          which specifies the filter type,
1827
     *                          flags which specifies any flags that apply to the
1828
     *                          filter, and options which specifies any options that
1829
     *                          apply to the filter. See the example below for a better understanding.
1830
     *                          </p>
1831
     *                          <p>
1832
     *                          This parameter can be also an integer holding a filter constant. Then all values in the
1833
     *                          input array are filtered by this filter.
1834
     *                          </p>
1835
     * @param bool  $add_empty  [optional] <p>
1836
     *                          Add missing keys as <b>NULL</b> to the return value.
1837
     *                          </p>
1838
     *
1839
     * @return mixed an array containing the values of the requested variables on success, or <b>FALSE</b> on failure.
1840
     *               An array value will be <b>FALSE</b> if the filter fails, or <b>NULL</b> if the variable is not
1841
     *               set
1842
     */
1843 2
    public static function filter_var_array(array $data, $definition = null, bool $add_empty = true)
1844
    {
1845 2
        if (\func_num_args() < 2) {
1846 2
            $a = \filter_var_array($data);
1847
        } else {
1848 2
            $a = \filter_var_array($data, $definition, $add_empty);
1849
        }
1850
1851 2
        return self::filter($a);
1852
    }
1853
1854
    /**
1855
     * Checks whether finfo is available on the server.
1856
     *
1857
     * @return bool
1858
     *              <strong>true</strong> if available, <strong>false</strong> otherwise
1859
     */
1860
    public static function finfo_loaded(): bool
1861
    {
1862
        return \class_exists('finfo');
1863
    }
1864
1865
    /**
1866
     * Returns the first $n characters of the string.
1867
     *
1868
     * @param string $str      <p>The input string.</p>
1869
     * @param int    $n        <p>Number of characters to retrieve from the start.</p>
1870
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
1871
     *
1872
     * @return string
1873
     */
1874 13
    public static function first_char(string $str, int $n = 1, string $encoding = 'UTF-8'): string
1875
    {
1876 13
        if ($str === '' || $n <= 0) {
1877 5
            return '';
1878
        }
1879
1880 8
        if ($encoding === 'UTF-8') {
1881 4
            return (string) \mb_substr($str, 0, $n);
1882
        }
1883
1884 4
        return (string) self::substr($str, 0, $n, $encoding);
1885
    }
1886
1887
    /**
1888
     * Check if the number of unicode characters are not more than the specified integer.
1889
     *
1890
     * @param string $str      the original string to be checked
1891
     * @param int    $box_size the size in number of chars to be checked against string
1892
     *
1893
     * @return bool true if string is less than or equal to $box_size, false otherwise
1894
     */
1895 2
    public static function fits_inside(string $str, int $box_size): bool
1896
    {
1897 2
        return self::strlen($str) <= $box_size;
1898
    }
1899
1900
    /**
1901
     * Try to fix simple broken UTF-8 strings.
1902
     *
1903
     * INFO: Take a look at "UTF8::fix_utf8()" if you need a more advanced fix for broken UTF-8 strings.
1904
     *
1905
     * If you received an UTF-8 string that was converted from Windows-1252 as it was ISO-8859-1
1906
     * (ignoring Windows-1252 chars from 80 to 9F) use this function to fix it.
1907
     * See: http://en.wikipedia.org/wiki/Windows-1252
1908
     *
1909
     * @param string $str <p>The input string</p>
1910
     *
1911
     * @return string
1912
     */
1913 46
    public static function fix_simple_utf8(string $str): string
1914
    {
1915 46
        if ($str === '') {
1916 4
            return '';
1917
        }
1918
1919 46
        static $BROKEN_UTF8_TO_UTF8_KEYS_CACHE = null;
1920 46
        static $BROKEN_UTF8_TO_UTF8_VALUES_CACHE = null;
1921
1922 46
        if ($BROKEN_UTF8_TO_UTF8_KEYS_CACHE === null) {
1923 1
            if (self::$BROKEN_UTF8_FIX === null) {
1924 1
                self::$BROKEN_UTF8_FIX = self::getData('utf8_fix');
1925
            }
1926
1927 1
            $BROKEN_UTF8_TO_UTF8_KEYS_CACHE = \array_keys(self::$BROKEN_UTF8_FIX);
1928 1
            $BROKEN_UTF8_TO_UTF8_VALUES_CACHE = \array_values(self::$BROKEN_UTF8_FIX);
1929
        }
1930
1931 46
        return \str_replace($BROKEN_UTF8_TO_UTF8_KEYS_CACHE, $BROKEN_UTF8_TO_UTF8_VALUES_CACHE, $str);
1932
    }
1933
1934
    /**
1935
     * Fix a double (or multiple) encoded UTF8 string.
1936
     *
1937
     * @param string|string[] $str you can use a string or an array of strings
1938
     *
1939
     * @return string|string[]
1940
     *                         Will return the fixed input-"array" or
1941
     *                         the fixed input-"string"
1942
     *
1943
     * @psalm-suppress InvalidReturnType
1944
     */
1945 2
    public static function fix_utf8($str)
1946
    {
1947 2
        if (\is_array($str) === true) {
1948 2
            foreach ($str as $k => &$v) {
1949 2
                $v = self::fix_utf8($v);
1950
            }
1951 2
            unset($v);
1952
1953
            /**
1954
             * @psalm-suppress InvalidReturnStatement
1955
             */
1956 2
            return $str;
1957
        }
1958
1959 2
        $str = (string) $str;
1960 2
        $last = '';
1961 2
        while ($last !== $str) {
1962 2
            $last = $str;
1963
            /**
1964
             * @psalm-suppress PossiblyInvalidArgument
1965
             */
1966 2
            $str = self::to_utf8(
1967 2
                self::utf8_decode($str, true)
1968
            );
1969
        }
1970
1971
        /**
1972
         * @psalm-suppress InvalidReturnStatement
1973
         */
1974 2
        return $str;
1975
    }
1976
1977
    /**
1978
     * Get character of a specific character.
1979
     *
1980
     * @param string $char
1981
     *
1982
     * @return string 'RTL' or 'LTR'
1983
     */
1984 2
    public static function getCharDirection(string $char): string
1985
    {
1986 2
        if (self::$SUPPORT['intlChar'] === true) {
1987
            /** @noinspection PhpComposerExtensionStubsInspection */
1988 2
            $tmpReturn = \IntlChar::charDirection($char);
1989
1990
            // from "IntlChar"-Class
1991
            $charDirection = [
1992 2
                'RTL' => [1, 13, 14, 15, 21],
1993
                'LTR' => [0, 11, 12, 20],
1994
            ];
1995
1996 2
            if (\in_array($tmpReturn, $charDirection['LTR'], true)) {
1997
                return 'LTR';
1998
            }
1999
2000 2
            if (\in_array($tmpReturn, $charDirection['RTL'], true)) {
2001 2
                return 'RTL';
2002
            }
2003
        }
2004
2005 2
        $c = static::chr_to_decimal($char);
2006
2007 2
        if (!($c >= 0x5be && $c <= 0x10b7f)) {
2008 2
            return 'LTR';
2009
        }
2010
2011 2
        if ($c <= 0x85e) {
2012 2
            if ($c === 0x5be ||
2013 2
                $c === 0x5c0 ||
2014 2
                $c === 0x5c3 ||
2015 2
                $c === 0x5c6 ||
2016 2
                ($c >= 0x5d0 && $c <= 0x5ea) ||
2017 2
                ($c >= 0x5f0 && $c <= 0x5f4) ||
2018 2
                $c === 0x608 ||
2019 2
                $c === 0x60b ||
2020 2
                $c === 0x60d ||
2021 2
                $c === 0x61b ||
2022 2
                ($c >= 0x61e && $c <= 0x64a) ||
2023
                ($c >= 0x66d && $c <= 0x66f) ||
2024
                ($c >= 0x671 && $c <= 0x6d5) ||
2025
                ($c >= 0x6e5 && $c <= 0x6e6) ||
2026
                ($c >= 0x6ee && $c <= 0x6ef) ||
2027
                ($c >= 0x6fa && $c <= 0x70d) ||
2028
                $c === 0x710 ||
2029
                ($c >= 0x712 && $c <= 0x72f) ||
2030
                ($c >= 0x74d && $c <= 0x7a5) ||
2031
                $c === 0x7b1 ||
2032
                ($c >= 0x7c0 && $c <= 0x7ea) ||
2033
                ($c >= 0x7f4 && $c <= 0x7f5) ||
2034
                $c === 0x7fa ||
2035
                ($c >= 0x800 && $c <= 0x815) ||
2036
                $c === 0x81a ||
2037
                $c === 0x824 ||
2038
                $c === 0x828 ||
2039
                ($c >= 0x830 && $c <= 0x83e) ||
2040
                ($c >= 0x840 && $c <= 0x858) ||
2041 2
                $c === 0x85e
2042
            ) {
2043 2
                return 'RTL';
2044
            }
2045 2
        } elseif ($c === 0x200f) {
2046
            return 'RTL';
2047 2
        } elseif ($c >= 0xfb1d) {
2048 2
            if ($c === 0xfb1d ||
2049 2
                ($c >= 0xfb1f && $c <= 0xfb28) ||
2050 2
                ($c >= 0xfb2a && $c <= 0xfb36) ||
2051 2
                ($c >= 0xfb38 && $c <= 0xfb3c) ||
2052 2
                $c === 0xfb3e ||
2053 2
                ($c >= 0xfb40 && $c <= 0xfb41) ||
2054 2
                ($c >= 0xfb43 && $c <= 0xfb44) ||
2055 2
                ($c >= 0xfb46 && $c <= 0xfbc1) ||
2056 2
                ($c >= 0xfbd3 && $c <= 0xfd3d) ||
2057 2
                ($c >= 0xfd50 && $c <= 0xfd8f) ||
2058 2
                ($c >= 0xfd92 && $c <= 0xfdc7) ||
2059 2
                ($c >= 0xfdf0 && $c <= 0xfdfc) ||
2060 2
                ($c >= 0xfe70 && $c <= 0xfe74) ||
2061 2
                ($c >= 0xfe76 && $c <= 0xfefc) ||
2062 2
                ($c >= 0x10800 && $c <= 0x10805) ||
2063 2
                $c === 0x10808 ||
2064 2
                ($c >= 0x1080a && $c <= 0x10835) ||
2065 2
                ($c >= 0x10837 && $c <= 0x10838) ||
2066 2
                $c === 0x1083c ||
2067 2
                ($c >= 0x1083f && $c <= 0x10855) ||
2068 2
                ($c >= 0x10857 && $c <= 0x1085f) ||
2069 2
                ($c >= 0x10900 && $c <= 0x1091b) ||
2070 2
                ($c >= 0x10920 && $c <= 0x10939) ||
2071 2
                $c === 0x1093f ||
2072 2
                $c === 0x10a00 ||
2073 2
                ($c >= 0x10a10 && $c <= 0x10a13) ||
2074 2
                ($c >= 0x10a15 && $c <= 0x10a17) ||
2075 2
                ($c >= 0x10a19 && $c <= 0x10a33) ||
2076 2
                ($c >= 0x10a40 && $c <= 0x10a47) ||
2077 2
                ($c >= 0x10a50 && $c <= 0x10a58) ||
2078 2
                ($c >= 0x10a60 && $c <= 0x10a7f) ||
2079 2
                ($c >= 0x10b00 && $c <= 0x10b35) ||
2080 2
                ($c >= 0x10b40 && $c <= 0x10b55) ||
2081 2
                ($c >= 0x10b58 && $c <= 0x10b72) ||
2082 2
                ($c >= 0x10b78 && $c <= 0x10b7f)
2083
            ) {
2084 2
                return 'RTL';
2085
            }
2086
        }
2087
2088 2
        return 'LTR';
2089
    }
2090
2091
    /**
2092
     * Check for php-support.
2093
     *
2094
     * @param string|null $key
2095
     *
2096
     * @return mixed
2097
     *               Return the full support-"array", if $key === null<br>
2098
     *               return bool-value, if $key is used and available<br>
2099
     *               otherwise return <strong>null</strong>
2100
     */
2101 27
    public static function getSupportInfo(string $key = null)
2102
    {
2103 27
        if ($key === null) {
2104 4
            return self::$SUPPORT;
2105
        }
2106
2107 25
        if (self::$INTL_TRANSLITERATOR_LIST === null) {
2108 1
            self::$INTL_TRANSLITERATOR_LIST = self::getData('transliterator_list');
2109
        }
2110
        // compatibility fix for old versions
2111 25
        self::$SUPPORT['intl__transliterator_list_ids'] = self::$INTL_TRANSLITERATOR_LIST;
2112
2113 25
        return self::$SUPPORT[$key] ?? null;
2114
    }
2115
2116
    /**
2117
     * Warning: this method only works for some file-types (png bmp gif jpg rar zip midi exe pdf)
2118
     *          if you need more supported types, please use e.g. "finfo"
2119
     *
2120
     * @param string $str
2121
     * @param array  $fallback with this keys: 'ext', 'mime', 'type'
2122
     *
2123
     * @return array
2124
     *               with this keys: 'ext', 'mime', 'type'
2125
     */
2126 39
    public static function get_file_type(
2127
        string $str,
2128
        array $fallback = [
2129
            'ext'  => null,
2130
            'mime' => 'application/octet-stream',
2131
            'type' => null,
2132
        ]
2133
    ): array {
2134 39
        if ($str === '') {
2135
            return $fallback;
2136
        }
2137
2138 39
        $str_info = \substr($str, 0, 2);
2139 39
        if ($str_info === false || \strlen($str_info) !== 2) {
2140 11
            return $fallback;
2141
        }
2142
2143 35
        $str_info = \unpack('C2chars', $str_info);
2144 35
        if ($str_info === false) {
2145
            return $fallback;
2146
        }
2147
        /** @noinspection OffsetOperationsInspection */
2148 35
        $type_code = (int) ($str_info['chars1'] . $str_info['chars2']);
2149
2150
        // DEBUG
2151
        //var_dump($type_code);
2152
2153
        switch ($type_code) {
2154 35
            case 3780:
2155 5
                $ext = 'pdf';
2156 5
                $mime = 'application/pdf';
2157 5
                $type = 'binary';
2158
2159 5
                break;
2160 35
            case 7790:
2161
                $ext = 'exe';
2162
                $mime = 'application/octet-stream';
2163
                $type = 'binary';
2164
2165
                break;
2166 35
            case 7784:
2167
                $ext = 'midi';
2168
                $mime = 'audio/x-midi';
2169
                $type = 'binary';
2170
2171
                break;
2172 35
            case 8075:
2173 7
                $ext = 'zip';
2174 7
                $mime = 'application/zip';
2175 7
                $type = 'binary';
2176
2177 7
                break;
2178 35
            case 8297:
2179
                $ext = 'rar';
2180
                $mime = 'application/rar';
2181
                $type = 'binary';
2182
2183
                break;
2184 35
            case 255216:
2185
                $ext = 'jpg';
2186
                $mime = 'image/jpeg';
2187
                $type = 'binary';
2188
2189
                break;
2190 35
            case 7173:
2191
                $ext = 'gif';
2192
                $mime = 'image/gif';
2193
                $type = 'binary';
2194
2195
                break;
2196 35
            case 7373:
2197
                $ext = 'tiff';
2198
                $mime = 'image/tiff';
2199
                $type = 'binary';
2200
2201
                break;
2202 35
            case 6677:
2203
                $ext = 'bmp';
2204
                $mime = 'image/bmp';
2205
                $type = 'binary';
2206
2207
                break;
2208 35
            case 13780:
2209 7
                $ext = 'png';
2210 7
                $mime = 'image/png';
2211 7
                $type = 'binary';
2212
2213 7
                break;
2214
            default:
2215 32
                return $fallback;
2216
        }
2217
2218
        return [
2219 7
            'ext'  => $ext,
2220 7
            'mime' => $mime,
2221 7
            'type' => $type,
2222
        ];
2223
    }
2224
2225
    /**
2226
     * @param int    $length        <p>Length of the random string.</p>
2227
     * @param string $possibleChars [optional] <p>Characters string for the random selection.</p>
2228
     * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
2229
     *
2230
     * @return string
2231
     */
2232 1
    public static function get_random_string(int $length, string $possibleChars = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789', string $encoding = 'UTF-8'): string
2233
    {
2234
        // init
2235 1
        $i = 0;
2236 1
        $str = '';
2237
2238
        //
2239
        // add random chars
2240
        //
2241
2242 1
        if ($encoding === 'UTF-8') {
2243 1
            $maxlength = (int) \mb_strlen($possibleChars);
2244 1
            if ($maxlength === 0) {
2245 1
                return '';
2246
            }
2247
2248 1
            while ($i < $length) {
2249
                try {
2250 1
                    $randInt = \random_int(0, $maxlength - 1);
2251
                } catch (\Exception $e) {
2252
                    /** @noinspection RandomApiMigrationInspection */
2253
                    $randInt = \mt_rand(0, $maxlength - 1);
2254
                }
2255 1
                $char = \mb_substr($possibleChars, $randInt, 1);
2256 1
                if ($char !== false) {
2257 1
                    $str .= $char;
2258 1
                    ++$i;
2259
                }
2260
            }
2261
        } else {
2262
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
2263
2264
            $maxlength = (int) self::strlen($possibleChars, $encoding);
2265
            if ($maxlength === 0) {
2266
                return '';
2267
            }
2268
2269
            while ($i < $length) {
2270
                try {
2271
                    $randInt = \random_int(0, $maxlength - 1);
2272
                } catch (\Exception $e) {
2273
                    /** @noinspection RandomApiMigrationInspection */
2274
                    $randInt = \mt_rand(0, $maxlength - 1);
2275
                }
2276
                $char = self::substr($possibleChars, $randInt, 1, $encoding);
2277
                if ($char !== false) {
2278
                    $str .= $char;
2279
                    ++$i;
2280
                }
2281
            }
2282
        }
2283
2284 1
        return $str;
2285
    }
2286
2287
    /**
2288
     * @param int|string $entropyExtra [optional] <p>Extra entropy via a string or int value.</p>
2289
     * @param bool       $md5          [optional] <p>Return the unique identifier as md5-hash? Default: true</p>
2290
     *
2291
     * @return string
2292
     */
2293 1
    public static function get_unique_string($entropyExtra = '', bool $md5 = true): string
2294
    {
2295 1
        $uniqueHelper = \random_int(0, \mt_getrandmax()) .
2296 1
                        \session_id() .
2297 1
                        ($_SERVER['REMOTE_ADDR'] ?? '') .
2298 1
                        ($_SERVER['SERVER_ADDR'] ?? '') .
2299 1
                        $entropyExtra;
2300
2301 1
        $uniqueString = \uniqid($uniqueHelper, true);
2302
2303 1
        if ($md5) {
2304 1
            $uniqueString = \md5($uniqueString . $uniqueHelper);
2305
        }
2306
2307 1
        return $uniqueString;
2308
    }
2309
2310
    /**
2311
     * alias for "UTF8::string_has_bom()"
2312
     *
2313
     * @param string $str
2314
     *
2315
     * @return bool
2316
     *
2317
     * @see UTF8::string_has_bom()
2318
     * @deprecated <p>use "UTF8::string_has_bom()"</p>
2319
     */
2320 2
    public static function hasBom(string $str): bool
2321
    {
2322 2
        return self::string_has_bom($str);
2323
    }
2324
2325
    /**
2326
     * Returns true if the string contains a lower case char, false otherwise.
2327
     *
2328
     * @param string $str <p>The input string.</p>
2329
     *
2330
     * @return bool whether or not the string contains a lower case character
2331
     */
2332 47
    public static function has_lowercase(string $str): bool
2333
    {
2334 47
        if (self::$SUPPORT['mbstring'] === true) {
2335
            /** @noinspection PhpComposerExtensionStubsInspection */
2336 47
            return \mb_ereg_match('.*[[:lower:]]', $str);
2337
        }
2338
2339
        return self::str_matches_pattern($str, '.*[[:lower:]]');
2340
    }
2341
2342
    /**
2343
     * Returns true if the string contains an upper case char, false otherwise.
2344
     *
2345
     * @param string $str <p>The input string.</p>
2346
     *
2347
     * @return bool whether or not the string contains an upper case character
2348
     */
2349 12
    public static function has_uppercase(string $str): bool
2350
    {
2351 12
        if (self::$SUPPORT['mbstring'] === true) {
2352
            /** @noinspection PhpComposerExtensionStubsInspection */
2353 12
            return \mb_ereg_match('.*[[:upper:]]', $str);
2354
        }
2355
2356
        return self::str_matches_pattern($str, '.*[[:upper:]]');
2357
    }
2358
2359
    /**
2360
     * Converts a hexadecimal-value into an UTF-8 character.
2361
     *
2362
     * @param string $hexdec <p>The hexadecimal value.</p>
2363
     *
2364
     * @return false|string one single UTF-8 character
2365
     */
2366 4
    public static function hex_to_chr(string $hexdec)
2367
    {
2368 4
        return self::decimal_to_chr(\hexdec($hexdec));
2369
    }
2370
2371
    /**
2372
     * Converts hexadecimal U+xxxx code point representation to integer.
2373
     *
2374
     * INFO: opposite to UTF8::int_to_hex()
2375
     *
2376
     * @param string $hexDec <p>The hexadecimal code point representation.</p>
2377
     *
2378
     * @return false|int the code point, or false on failure
2379
     */
2380 2
    public static function hex_to_int($hexDec)
2381
    {
2382
        // init
2383 2
        $hexDec = (string) $hexDec;
2384
2385 2
        if ($hexDec === '') {
2386 2
            return false;
2387
        }
2388
2389 2
        if (\preg_match('/^(?:\\\u|U\+|)([a-zA-Z0-9]{4,6})$/', $hexDec, $match)) {
2390 2
            return \intval($match[1], 16);
2391
        }
2392
2393 2
        return false;
2394
    }
2395
2396
    /**
2397
     * alias for "UTF8::html_entity_decode()"
2398
     *
2399
     * @param string $str
2400
     * @param int    $flags
2401
     * @param string $encoding
2402
     *
2403
     * @return string
2404
     *
2405
     * @see UTF8::html_entity_decode()
2406
     */
2407 4
    public static function html_decode(string $str, int $flags = null, string $encoding = 'UTF-8'): string
2408
    {
2409 4
        return self::html_entity_decode($str, $flags, $encoding);
2410
    }
2411
2412
    /**
2413
     * Converts a UTF-8 string to a series of HTML numbered entities.
2414
     *
2415
     * INFO: opposite to UTF8::html_decode()
2416
     *
2417
     * @param string $str            <p>The Unicode string to be encoded as numbered entities.</p>
2418
     * @param bool   $keepAsciiChars [optional] <p>Keep ASCII chars.</p>
2419
     * @param string $encoding       [optional] <p>Set the charset for e.g. "mb_" function</p>
2420
     *
2421
     * @return string HTML numbered entities
2422
     */
2423 14
    public static function html_encode(string $str, bool $keepAsciiChars = false, string $encoding = 'UTF-8'): string
2424
    {
2425 14
        if ($str === '') {
2426 4
            return '';
2427
        }
2428
2429 14
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
2430 4
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
2431
        }
2432
2433
        // INFO: http://stackoverflow.com/questions/35854535/better-explanation-of-convmap-in-mb-encode-numericentity
2434 14
        if (self::$SUPPORT['mbstring'] === true) {
2435 14
            $startCode = 0x00;
2436 14
            if ($keepAsciiChars === true) {
2437 13
                $startCode = 0x80;
2438
            }
2439
2440 14
            if ($encoding === 'UTF-8') {
2441 14
                return \mb_encode_numericentity(
2442 14
                    $str,
2443 14
                    [$startCode, 0xfffff, 0, 0xfffff, 0]
2444
                );
2445
            }
2446
2447 4
            return \mb_encode_numericentity(
2448 4
                $str,
2449 4
                [$startCode, 0xfffff, 0, 0xfffff, 0],
2450 4
                $encoding
2451
            );
2452
        }
2453
2454
        //
2455
        // fallback via vanilla php
2456
        //
2457
2458
        return \implode(
2459
            '',
2460
            \array_map(
2461
                static function (string $chr) use ($keepAsciiChars, $encoding): string {
2462
                    return self::single_chr_html_encode($chr, $keepAsciiChars, $encoding);
2463
                },
2464
                self::str_split($str)
2465
            )
2466
        );
2467
    }
2468
2469
    /**
2470
     * UTF-8 version of html_entity_decode()
2471
     *
2472
     * The reason we are not using html_entity_decode() by itself is because
2473
     * while it is not technically correct to leave out the semicolon
2474
     * at the end of an entity most browsers will still interpret the entity
2475
     * correctly. html_entity_decode() does not convert entities without
2476
     * semicolons, so we are left with our own little solution here. Bummer.
2477
     *
2478
     * Convert all HTML entities to their applicable characters
2479
     *
2480
     * INFO: opposite to UTF8::html_encode()
2481
     *
2482
     * @see http://php.net/manual/en/function.html-entity-decode.php
2483
     *
2484
     * @param string $str      <p>
2485
     *                         The input string.
2486
     *                         </p>
2487
     * @param int    $flags    [optional] <p>
2488
     *                         A bitmask of one or more of the following flags, which specify how to handle quotes
2489
     *                         and which document type to use. The default is ENT_COMPAT | ENT_HTML401.
2490
     *                         <table>
2491
     *                         Available <i>flags</i> constants
2492
     *                         <tr valign="top">
2493
     *                         <td>Constant Name</td>
2494
     *                         <td>Description</td>
2495
     *                         </tr>
2496
     *                         <tr valign="top">
2497
     *                         <td><b>ENT_COMPAT</b></td>
2498
     *                         <td>Will convert double-quotes and leave single-quotes alone.</td>
2499
     *                         </tr>
2500
     *                         <tr valign="top">
2501
     *                         <td><b>ENT_QUOTES</b></td>
2502
     *                         <td>Will convert both double and single quotes.</td>
2503
     *                         </tr>
2504
     *                         <tr valign="top">
2505
     *                         <td><b>ENT_NOQUOTES</b></td>
2506
     *                         <td>Will leave both double and single quotes unconverted.</td>
2507
     *                         </tr>
2508
     *                         <tr valign="top">
2509
     *                         <td><b>ENT_HTML401</b></td>
2510
     *                         <td>
2511
     *                         Handle code as HTML 4.01.
2512
     *                         </td>
2513
     *                         </tr>
2514
     *                         <tr valign="top">
2515
     *                         <td><b>ENT_XML1</b></td>
2516
     *                         <td>
2517
     *                         Handle code as XML 1.
2518
     *                         </td>
2519
     *                         </tr>
2520
     *                         <tr valign="top">
2521
     *                         <td><b>ENT_XHTML</b></td>
2522
     *                         <td>
2523
     *                         Handle code as XHTML.
2524
     *                         </td>
2525
     *                         </tr>
2526
     *                         <tr valign="top">
2527
     *                         <td><b>ENT_HTML5</b></td>
2528
     *                         <td>
2529
     *                         Handle code as HTML 5.
2530
     *                         </td>
2531
     *                         </tr>
2532
     *                         </table>
2533
     *                         </p>
2534
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
2535
     *
2536
     * @return string the decoded string
2537
     */
2538 46
    public static function html_entity_decode(string $str, int $flags = null, string $encoding = 'UTF-8'): string
2539
    {
2540
        if (
2541 46
            !isset($str[3]) // examples: &; || &x;
2542
            ||
2543 46
            \strpos($str, '&') === false // no "&"
2544
        ) {
2545 23
            return $str;
2546
        }
2547
2548 44
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
2549 9
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
2550
        }
2551
2552 44
        if ($flags === null) {
2553 10
            $flags = \ENT_QUOTES | \ENT_HTML5;
2554
        }
2555
2556
        if (
2557 44
            $encoding !== 'UTF-8'
2558
            &&
2559 44
            $encoding !== 'ISO-8859-1'
2560
            &&
2561 44
            $encoding !== 'WINDOWS-1252'
2562
            &&
2563 44
            self::$SUPPORT['mbstring'] === false
2564
        ) {
2565
            \trigger_error('UTF8::html_entity_decode() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
2566
        }
2567
2568
        do {
2569 44
            $str_compare = $str;
2570
2571
            // INFO: http://stackoverflow.com/questions/35854535/better-explanation-of-convmap-in-mb-encode-numericentity
2572 44
            if (self::$SUPPORT['mbstring'] === true) {
2573 44
                if ($encoding === 'UTF-8') {
2574 44
                    $str = \mb_decode_numericentity(
2575 44
                        $str,
2576 44
                        [0x80, 0xfffff, 0, 0xfffff, 0]
2577
                    );
2578
                } else {
2579 4
                    $str = \mb_decode_numericentity(
2580 4
                        $str,
2581 4
                        [0x80, 0xfffff, 0, 0xfffff, 0],
2582 44
                        $encoding
2583
                    );
2584
                }
2585
            } else {
2586
                $str = (string) \preg_replace_callback(
2587
                    "/&#\d{2,6};/",
2588
                    /**
2589
                     * @param string[] $matches
2590
                     *
2591
                     * @return string
2592
                     */
2593
                    static function (array $matches) use ($encoding): string {
2594
                        $returnTmp = \mb_convert_encoding($matches[0], $encoding, 'HTML-ENTITIES');
2595
                        if ($returnTmp !== '"' && $returnTmp !== "'") {
2596
                            return $returnTmp;
2597
                        }
2598
2599
                        return $matches[0];
2600
                    },
2601
                    $str
2602
                );
2603
            }
2604
2605 44
            if (\strpos($str, '&') !== false) {
2606 40
                if (\strpos($str, '&#') !== false) {
2607
                    // decode also numeric & UTF16 two byte entities
2608 32
                    $str = (string) \preg_replace(
2609 32
                        '/(&#(?:x0*[0-9a-fA-F]{2,6}(?![0-9a-fA-F;])|(?:0*\d{2,6}(?![0-9;]))))/S',
2610 32
                        '$1;',
2611 32
                        $str
2612
                    );
2613
                }
2614
2615 40
                $str = \html_entity_decode(
2616 40
                    $str,
2617 40
                    $flags,
2618 40
                    $encoding
2619
                );
2620
            }
2621 44
        } while ($str_compare !== $str);
2622
2623 44
        return $str;
2624
    }
2625
2626
    /**
2627
     * Create a escape html version of the string via "UTF8::htmlspecialchars()".
2628
     *
2629
     * @param string $str
2630
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
2631
     *
2632
     * @return string
2633
     */
2634 6
    public static function html_escape(string $str, string $encoding = 'UTF-8'): string
2635
    {
2636 6
        return self::htmlspecialchars(
2637 6
            $str,
2638 6
            \ENT_QUOTES | \ENT_SUBSTITUTE,
2639 6
            $encoding
2640
        );
2641
    }
2642
2643
    /**
2644
     * Remove empty html-tag.
2645
     *
2646
     * e.g.: <tag></tag>
2647
     *
2648
     * @param string $str
2649
     *
2650
     * @return string
2651
     */
2652 1
    public static function html_stripe_empty_tags(string $str): string
2653
    {
2654 1
        return (string) \preg_replace(
2655 1
            '/<[^\\/>]*?>\\s*?<\\/[^>]*?>/u',
2656 1
            '',
2657 1
            $str
2658
        );
2659
    }
2660
2661
    /**
2662
     * Convert all applicable characters to HTML entities: UTF-8 version of htmlentities()
2663
     *
2664
     * @see http://php.net/manual/en/function.htmlentities.php
2665
     *
2666
     * @param string $str           <p>
2667
     *                              The input string.
2668
     *                              </p>
2669
     * @param int    $flags         [optional] <p>
2670
     *                              A bitmask of one or more of the following flags, which specify how to handle
2671
     *                              quotes, invalid code unit sequences and the used document type. The default is
2672
     *                              ENT_COMPAT | ENT_HTML401.
2673
     *                              <table>
2674
     *                              Available <i>flags</i> constants
2675
     *                              <tr valign="top">
2676
     *                              <td>Constant Name</td>
2677
     *                              <td>Description</td>
2678
     *                              </tr>
2679
     *                              <tr valign="top">
2680
     *                              <td><b>ENT_COMPAT</b></td>
2681
     *                              <td>Will convert double-quotes and leave single-quotes alone.</td>
2682
     *                              </tr>
2683
     *                              <tr valign="top">
2684
     *                              <td><b>ENT_QUOTES</b></td>
2685
     *                              <td>Will convert both double and single quotes.</td>
2686
     *                              </tr>
2687
     *                              <tr valign="top">
2688
     *                              <td><b>ENT_NOQUOTES</b></td>
2689
     *                              <td>Will leave both double and single quotes unconverted.</td>
2690
     *                              </tr>
2691
     *                              <tr valign="top">
2692
     *                              <td><b>ENT_IGNORE</b></td>
2693
     *                              <td>
2694
     *                              Silently discard invalid code unit sequences instead of returning
2695
     *                              an empty string. Using this flag is discouraged as it
2696
     *                              may have security implications.
2697
     *                              </td>
2698
     *                              </tr>
2699
     *                              <tr valign="top">
2700
     *                              <td><b>ENT_SUBSTITUTE</b></td>
2701
     *                              <td>
2702
     *                              Replace invalid code unit sequences with a Unicode Replacement Character
2703
     *                              U+FFFD (UTF-8) or &#38;#38;#FFFD; (otherwise) instead of returning an empty
2704
     *                              string.
2705
     *                              </td>
2706
     *                              </tr>
2707
     *                              <tr valign="top">
2708
     *                              <td><b>ENT_DISALLOWED</b></td>
2709
     *                              <td>
2710
     *                              Replace invalid code points for the given document type with a
2711
     *                              Unicode Replacement Character U+FFFD (UTF-8) or &#38;#38;#FFFD;
2712
     *                              (otherwise) instead of leaving them as is. This may be useful, for
2713
     *                              instance, to ensure the well-formedness of XML documents with
2714
     *                              embedded external content.
2715
     *                              </td>
2716
     *                              </tr>
2717
     *                              <tr valign="top">
2718
     *                              <td><b>ENT_HTML401</b></td>
2719
     *                              <td>
2720
     *                              Handle code as HTML 4.01.
2721
     *                              </td>
2722
     *                              </tr>
2723
     *                              <tr valign="top">
2724
     *                              <td><b>ENT_XML1</b></td>
2725
     *                              <td>
2726
     *                              Handle code as XML 1.
2727
     *                              </td>
2728
     *                              </tr>
2729
     *                              <tr valign="top">
2730
     *                              <td><b>ENT_XHTML</b></td>
2731
     *                              <td>
2732
     *                              Handle code as XHTML.
2733
     *                              </td>
2734
     *                              </tr>
2735
     *                              <tr valign="top">
2736
     *                              <td><b>ENT_HTML5</b></td>
2737
     *                              <td>
2738
     *                              Handle code as HTML 5.
2739
     *                              </td>
2740
     *                              </tr>
2741
     *                              </table>
2742
     *                              </p>
2743
     * @param string $encoding      [optional] <p>
2744
     *                              Like <b>htmlspecialchars</b>,
2745
     *                              <b>htmlentities</b> takes an optional third argument
2746
     *                              <i>encoding</i> which defines encoding used in
2747
     *                              conversion.
2748
     *                              Although this argument is technically optional, you are highly
2749
     *                              encouraged to specify the correct value for your code.
2750
     *                              </p>
2751
     * @param bool   $double_encode [optional] <p>
2752
     *                              When <i>double_encode</i> is turned off PHP will not
2753
     *                              encode existing html entities. The default is to convert everything.
2754
     *                              </p>
2755
     *
2756
     * @return string
2757
     *                <p>
2758
     *                The encoded string.
2759
     *                <br><br>
2760
     *                If the input <i>string</i> contains an invalid code unit
2761
     *                sequence within the given <i>encoding</i> an empty string
2762
     *                will be returned, unless either the <b>ENT_IGNORE</b> or
2763
     *                <b>ENT_SUBSTITUTE</b> flags are set.
2764
     *                </p>
2765
     */
2766 9
    public static function htmlentities(
2767
        string $str,
2768
        int $flags = \ENT_COMPAT,
2769
        string $encoding = 'UTF-8',
2770
        bool $double_encode = true
2771
    ): string {
2772 9
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
2773 7
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
2774
        }
2775
2776 9
        $str = \htmlentities($str, $flags, $encoding, $double_encode);
2777
2778
        /**
2779
         * PHP doesn't replace a backslash to its html entity since this is something
2780
         * that's mostly used to escape characters when inserting in a database. Since
2781
         * we're using a decent database layer, we don't need this shit and we're replacing
2782
         * the double backslashes by its' html entity equivalent.
2783
         *
2784
         * https://github.com/forkcms/library/blob/master/spoon/filter/filter.php#L303
2785
         */
2786 9
        $str = \str_replace('\\', '&#92;', $str);
2787
2788 9
        return self::html_encode($str, true, $encoding);
2789
    }
2790
2791
    /**
2792
     * Convert only special characters to HTML entities: UTF-8 version of htmlspecialchars()
2793
     *
2794
     * INFO: Take a look at "UTF8::htmlentities()"
2795
     *
2796
     * @see http://php.net/manual/en/function.htmlspecialchars.php
2797
     *
2798
     * @param string $str           <p>
2799
     *                              The string being converted.
2800
     *                              </p>
2801
     * @param int    $flags         [optional] <p>
2802
     *                              A bitmask of one or more of the following flags, which specify how to handle
2803
     *                              quotes, invalid code unit sequences and the used document type. The default is
2804
     *                              ENT_COMPAT | ENT_HTML401.
2805
     *                              <table>
2806
     *                              Available <i>flags</i> constants
2807
     *                              <tr valign="top">
2808
     *                              <td>Constant Name</td>
2809
     *                              <td>Description</td>
2810
     *                              </tr>
2811
     *                              <tr valign="top">
2812
     *                              <td><b>ENT_COMPAT</b></td>
2813
     *                              <td>Will convert double-quotes and leave single-quotes alone.</td>
2814
     *                              </tr>
2815
     *                              <tr valign="top">
2816
     *                              <td><b>ENT_QUOTES</b></td>
2817
     *                              <td>Will convert both double and single quotes.</td>
2818
     *                              </tr>
2819
     *                              <tr valign="top">
2820
     *                              <td><b>ENT_NOQUOTES</b></td>
2821
     *                              <td>Will leave both double and single quotes unconverted.</td>
2822
     *                              </tr>
2823
     *                              <tr valign="top">
2824
     *                              <td><b>ENT_IGNORE</b></td>
2825
     *                              <td>
2826
     *                              Silently discard invalid code unit sequences instead of returning
2827
     *                              an empty string. Using this flag is discouraged as it
2828
     *                              may have security implications.
2829
     *                              </td>
2830
     *                              </tr>
2831
     *                              <tr valign="top">
2832
     *                              <td><b>ENT_SUBSTITUTE</b></td>
2833
     *                              <td>
2834
     *                              Replace invalid code unit sequences with a Unicode Replacement Character
2835
     *                              U+FFFD (UTF-8) or &#38;#38;#FFFD; (otherwise) instead of returning an empty
2836
     *                              string.
2837
     *                              </td>
2838
     *                              </tr>
2839
     *                              <tr valign="top">
2840
     *                              <td><b>ENT_DISALLOWED</b></td>
2841
     *                              <td>
2842
     *                              Replace invalid code points for the given document type with a
2843
     *                              Unicode Replacement Character U+FFFD (UTF-8) or &#38;#38;#FFFD;
2844
     *                              (otherwise) instead of leaving them as is. This may be useful, for
2845
     *                              instance, to ensure the well-formedness of XML documents with
2846
     *                              embedded external content.
2847
     *                              </td>
2848
     *                              </tr>
2849
     *                              <tr valign="top">
2850
     *                              <td><b>ENT_HTML401</b></td>
2851
     *                              <td>
2852
     *                              Handle code as HTML 4.01.
2853
     *                              </td>
2854
     *                              </tr>
2855
     *                              <tr valign="top">
2856
     *                              <td><b>ENT_XML1</b></td>
2857
     *                              <td>
2858
     *                              Handle code as XML 1.
2859
     *                              </td>
2860
     *                              </tr>
2861
     *                              <tr valign="top">
2862
     *                              <td><b>ENT_XHTML</b></td>
2863
     *                              <td>
2864
     *                              Handle code as XHTML.
2865
     *                              </td>
2866
     *                              </tr>
2867
     *                              <tr valign="top">
2868
     *                              <td><b>ENT_HTML5</b></td>
2869
     *                              <td>
2870
     *                              Handle code as HTML 5.
2871
     *                              </td>
2872
     *                              </tr>
2873
     *                              </table>
2874
     *                              </p>
2875
     * @param string $encoding      [optional] <p>
2876
     *                              Defines encoding used in conversion.
2877
     *                              </p>
2878
     *                              <p>
2879
     *                              For the purposes of this function, the encodings
2880
     *                              ISO-8859-1, ISO-8859-15,
2881
     *                              UTF-8, cp866,
2882
     *                              cp1251, cp1252, and
2883
     *                              KOI8-R are effectively equivalent, provided the
2884
     *                              <i>string</i> itself is valid for the encoding, as
2885
     *                              the characters affected by <b>htmlspecialchars</b> occupy
2886
     *                              the same positions in all of these encodings.
2887
     *                              </p>
2888
     * @param bool   $double_encode [optional] <p>
2889
     *                              When <i>double_encode</i> is turned off PHP will not
2890
     *                              encode existing html entities, the default is to convert everything.
2891
     *                              </p>
2892
     *
2893
     * @return string the converted string.
2894
     *                </p>
2895
     *                <p>
2896
     *                If the input <i>string</i> contains an invalid code unit
2897
     *                sequence within the given <i>encoding</i> an empty string
2898
     *                will be returned, unless either the <b>ENT_IGNORE</b> or
2899
     *                <b>ENT_SUBSTITUTE</b> flags are set
2900
     */
2901 8
    public static function htmlspecialchars(
2902
        string $str,
2903
        int $flags = \ENT_COMPAT,
2904
        string $encoding = 'UTF-8',
2905
        bool $double_encode = true
2906
    ): string {
2907 8
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
2908 8
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
2909
        }
2910
2911 8
        return \htmlspecialchars($str, $flags, $encoding, $double_encode);
2912
    }
2913
2914
    /**
2915
     * Checks whether iconv is available on the server.
2916
     *
2917
     * @return bool
2918
     *              <strong>true</strong> if available, <strong>false</strong> otherwise
2919
     */
2920
    public static function iconv_loaded(): bool
2921
    {
2922
        return \extension_loaded('iconv');
2923
    }
2924
2925
    /**
2926
     * alias for "UTF8::decimal_to_chr()"
2927
     *
2928
     * @param mixed $int
2929
     *
2930
     * @return string
2931
     *
2932
     * @see UTF8::decimal_to_chr()
2933
     */
2934 4
    public static function int_to_chr($int): string
2935
    {
2936 4
        return self::decimal_to_chr($int);
2937
    }
2938
2939
    /**
2940
     * Converts Integer to hexadecimal U+xxxx code point representation.
2941
     *
2942
     * INFO: opposite to UTF8::hex_to_int()
2943
     *
2944
     * @param int    $int  <p>The integer to be converted to hexadecimal code point.</p>
2945
     * @param string $pfix [optional]
2946
     *
2947
     * @return string the code point, or empty string on failure
2948
     */
2949 6
    public static function int_to_hex(int $int, string $pfix = 'U+'): string
2950
    {
2951 6
        $hex = \dechex($int);
2952
2953 6
        $hex = (\strlen($hex) < 4 ? \substr('0000' . $hex, -4) : $hex);
2954
2955 6
        return $pfix . $hex . '';
2956
    }
2957
2958
    /**
2959
     * Checks whether intl-char is available on the server.
2960
     *
2961
     * @return bool
2962
     *              <strong>true</strong> if available, <strong>false</strong> otherwise
2963
     */
2964
    public static function intlChar_loaded(): bool
2965
    {
2966
        return \class_exists('IntlChar');
2967
    }
2968
2969
    /**
2970
     * Checks whether intl is available on the server.
2971
     *
2972
     * @return bool
2973
     *              <strong>true</strong> if available, <strong>false</strong> otherwise
2974
     */
2975 5
    public static function intl_loaded(): bool
2976
    {
2977 5
        return \extension_loaded('intl');
2978
    }
2979
2980
    /**
2981
     * alias for "UTF8::is_ascii()"
2982
     *
2983
     * @param string $str
2984
     *
2985
     * @return bool
2986
     *
2987
     * @see UTF8::is_ascii()
2988
     * @deprecated <p>use "UTF8::is_ascii()"</p>
2989
     */
2990 2
    public static function isAscii(string $str): bool
2991
    {
2992 2
        return self::is_ascii($str);
2993
    }
2994
2995
    /**
2996
     * alias for "UTF8::is_base64()"
2997
     *
2998
     * @param string $str
2999
     *
3000
     * @return bool
3001
     *
3002
     * @see UTF8::is_base64()
3003
     * @deprecated <p>use "UTF8::is_base64()"</p>
3004
     */
3005 2
    public static function isBase64($str): bool
3006
    {
3007 2
        return self::is_base64($str);
3008
    }
3009
3010
    /**
3011
     * alias for "UTF8::is_binary()"
3012
     *
3013
     * @param mixed $str
3014
     * @param bool  $strict
3015
     *
3016
     * @return bool
3017
     *
3018
     * @see UTF8::is_binary()
3019
     * @deprecated <p>use "UTF8::is_binary()"</p>
3020
     */
3021 4
    public static function isBinary($str, $strict = false): bool
3022
    {
3023 4
        return self::is_binary($str, $strict);
3024
    }
3025
3026
    /**
3027
     * alias for "UTF8::is_bom()"
3028
     *
3029
     * @param string $utf8_chr
3030
     *
3031
     * @return bool
3032
     *
3033
     * @see UTF8::is_bom()
3034
     * @deprecated <p>use "UTF8::is_bom()"</p>
3035
     */
3036 2
    public static function isBom(string $utf8_chr): bool
3037
    {
3038 2
        return self::is_bom($utf8_chr);
3039
    }
3040
3041
    /**
3042
     * alias for "UTF8::is_html()"
3043
     *
3044
     * @param string $str
3045
     *
3046
     * @return bool
3047
     *
3048
     * @see UTF8::is_html()
3049
     * @deprecated <p>use "UTF8::is_html()"</p>
3050
     */
3051 2
    public static function isHtml(string $str): bool
3052
    {
3053 2
        return self::is_html($str);
3054
    }
3055
3056
    /**
3057
     * alias for "UTF8::is_json()"
3058
     *
3059
     * @param string $str
3060
     *
3061
     * @return bool
3062
     *
3063
     * @see UTF8::is_json()
3064
     * @deprecated <p>use "UTF8::is_json()"</p>
3065
     */
3066
    public static function isJson(string $str): bool
3067
    {
3068
        return self::is_json($str);
3069
    }
3070
3071
    /**
3072
     * alias for "UTF8::is_utf16()"
3073
     *
3074
     * @param mixed $str
3075
     *
3076
     * @return false|int
3077
     *                   <strong>false</strong> if is't not UTF16,<br>
3078
     *                   <strong>1</strong> for UTF-16LE,<br>
3079
     *                   <strong>2</strong> for UTF-16BE
3080
     *
3081
     * @see UTF8::is_utf16()
3082
     * @deprecated <p>use "UTF8::is_utf16()"</p>
3083
     */
3084 2
    public static function isUtf16($str)
3085
    {
3086 2
        return self::is_utf16($str);
3087
    }
3088
3089
    /**
3090
     * alias for "UTF8::is_utf32()"
3091
     *
3092
     * @param mixed $str
3093
     *
3094
     * @return false|int
3095
     *                   <strong>false</strong> if is't not UTF16,
3096
     *                   <strong>1</strong> for UTF-32LE,
3097
     *                   <strong>2</strong> for UTF-32BE
3098
     *
3099
     * @see UTF8::is_utf32()
3100
     * @deprecated <p>use "UTF8::is_utf32()"</p>
3101
     */
3102 2
    public static function isUtf32($str)
3103
    {
3104 2
        return self::is_utf32($str);
3105
    }
3106
3107
    /**
3108
     * alias for "UTF8::is_utf8()"
3109
     *
3110
     * @param string $str
3111
     * @param bool   $strict
3112
     *
3113
     * @return bool
3114
     *
3115
     * @see UTF8::is_utf8()
3116
     * @deprecated <p>use "UTF8::is_utf8()"</p>
3117
     */
3118 17
    public static function isUtf8($str, $strict = false): bool
3119
    {
3120 17
        return self::is_utf8($str, $strict);
3121
    }
3122
3123
    /**
3124
     * Returns true if the string contains only alphabetic chars, false otherwise.
3125
     *
3126
     * @param string $str
3127
     *
3128
     * @return bool
3129
     *              Whether or not $str contains only alphabetic chars
3130
     */
3131 10
    public static function is_alpha(string $str): bool
3132
    {
3133 10
        if (self::$SUPPORT['mbstring'] === true) {
3134
            /** @noinspection PhpComposerExtensionStubsInspection */
3135 10
            return \mb_ereg_match('^[[:alpha:]]*$', $str);
3136
        }
3137
3138
        return self::str_matches_pattern($str, '^[[:alpha:]]*$');
3139
    }
3140
3141
    /**
3142
     * Returns true if the string contains only alphabetic and numeric chars, false otherwise.
3143
     *
3144
     * @param string $str
3145
     *
3146
     * @return bool
3147
     *              Whether or not $str contains only alphanumeric chars
3148
     */
3149 13
    public static function is_alphanumeric(string $str): bool
3150
    {
3151 13
        if (self::$SUPPORT['mbstring'] === true) {
3152
            /** @noinspection PhpComposerExtensionStubsInspection */
3153 13
            return \mb_ereg_match('^[[:alnum:]]*$', $str);
3154
        }
3155
3156
        return self::str_matches_pattern($str, '^[[:alnum:]]*$');
3157
    }
3158
3159
    /**
3160
     * Checks if a string is 7 bit ASCII.
3161
     *
3162
     * @param string $str <p>The string to check.</p>
3163
     *
3164
     * @return bool
3165
     *              <strong>true</strong> if it is ASCII<br>
3166
     *              <strong>false</strong> otherwise
3167
     */
3168 137
    public static function is_ascii(string $str): bool
3169
    {
3170 137
        if ($str === '') {
3171 10
            return true;
3172
        }
3173
3174 136
        return !\preg_match('/[^\x09\x10\x13\x0A\x0D\x20-\x7E]/', $str);
3175
    }
3176
3177
    /**
3178
     * Returns true if the string is base64 encoded, false otherwise.
3179
     *
3180
     * @param mixed|string $str                <p>The input string.</p>
3181
     * @param bool         $emptyStringIsValid [optional] <p>Is an empty string valid base64 or not?</p>
3182
     *
3183
     * @return bool whether or not $str is base64 encoded
3184
     */
3185 16
    public static function is_base64($str, $emptyStringIsValid = false): bool
3186
    {
3187 16
        if ($emptyStringIsValid === false && $str === '') {
3188 3
            return false;
3189
        }
3190
3191
        /**
3192
         * @psalm-suppress RedundantConditionGivenDocblockType
3193
         */
3194 15
        if (\is_string($str) === false) {
3195 2
            return false;
3196
        }
3197
3198 15
        $base64String = \base64_decode($str, true);
3199
3200 15
        return $base64String !== false && \base64_encode($base64String) === $str;
3201
    }
3202
3203
    /**
3204
     * Check if the input is binary... (is look like a hack).
3205
     *
3206
     * @param mixed $input
3207
     * @param bool  $strict
3208
     *
3209
     * @return bool
3210
     */
3211 39
    public static function is_binary($input, bool $strict = false): bool
3212
    {
3213 39
        $input = (string) $input;
3214 39
        if ($input === '') {
3215 10
            return false;
3216
        }
3217
3218 39
        if (\preg_match('~^[01]+$~', $input)) {
3219 13
            return true;
3220
        }
3221
3222 39
        $ext = self::get_file_type($input);
3223 39
        if ($ext['type'] === 'binary') {
3224 7
            return true;
3225
        }
3226
3227 36
        $testLength = \strlen($input);
3228 36
        $testNull = \substr_count($input, "\x0", 0, $testLength);
3229 36
        if (($testNull / $testLength) > 0.25) {
3230 12
            return true;
3231
        }
3232
3233 34
        if ($strict === true) {
3234 34
            if (self::$SUPPORT['finfo'] === false) {
3235
                throw new \RuntimeException('ext-fileinfo: is not installed');
3236
            }
3237
3238
            /** @noinspection PhpComposerExtensionStubsInspection */
3239 34
            $finfo_encoding = (new \finfo(\FILEINFO_MIME_ENCODING))->buffer($input);
3240 34
            if ($finfo_encoding && $finfo_encoding === 'binary') {
3241 15
                return true;
3242
            }
3243
        }
3244
3245 30
        return false;
3246
    }
3247
3248
    /**
3249
     * Check if the file is binary.
3250
     *
3251
     * @param string $file
3252
     *
3253
     * @return bool
3254
     */
3255 6
    public static function is_binary_file($file): bool
3256
    {
3257
        // init
3258 6
        $block = '';
3259
3260 6
        $fp = \fopen($file, 'rb');
3261 6
        if (\is_resource($fp)) {
3262 6
            $block = \fread($fp, 512);
3263 6
            \fclose($fp);
3264
        }
3265
3266 6
        if ($block === '') {
3267 2
            return false;
3268
        }
3269
3270 6
        return self::is_binary($block, true);
3271
    }
3272
3273
    /**
3274
     * Returns true if the string contains only whitespace chars, false otherwise.
3275
     *
3276
     * @param string $str
3277
     *
3278
     * @return bool
3279
     *              Whether or not $str contains only whitespace characters
3280
     */
3281 15
    public static function is_blank(string $str): bool
3282
    {
3283 15
        if (self::$SUPPORT['mbstring'] === true) {
3284
            /** @noinspection PhpComposerExtensionStubsInspection */
3285 15
            return \mb_ereg_match('^[[:space:]]*$', $str);
3286
        }
3287
3288
        return self::str_matches_pattern($str, '^[[:space:]]*$');
3289
    }
3290
3291
    /**
3292
     * Checks if the given string is equal to any "Byte Order Mark".
3293
     *
3294
     * WARNING: Use "UTF8::string_has_bom()" if you will check BOM in a string.
3295
     *
3296
     * @param string $str <p>The input string.</p>
3297
     *
3298
     * @return bool
3299
     *              <strong>true</strong> if the $utf8_chr is Byte Order Mark, <strong>false</strong> otherwise
3300
     */
3301 2
    public static function is_bom($str): bool
3302
    {
3303
        /** @noinspection PhpUnusedLocalVariableInspection */
3304 2
        foreach (self::$BOM as $bomString => &$bomByteLength) {
3305 2
            if ($str === $bomString) {
3306 2
                return true;
3307
            }
3308
        }
3309
3310 2
        return false;
3311
    }
3312
3313
    /**
3314
     * Determine whether the string is considered to be empty.
3315
     *
3316
     * A variable is considered empty if it does not exist or if its value equals FALSE.
3317
     * empty() does not generate a warning if the variable does not exist.
3318
     *
3319
     * @param mixed $str
3320
     *
3321
     * @return bool whether or not $str is empty()
3322
     */
3323
    public static function is_empty($str): bool
3324
    {
3325
        return empty($str);
3326
    }
3327
3328
    /**
3329
     * Returns true if the string contains only hexadecimal chars, false otherwise.
3330
     *
3331
     * @param string $str
3332
     *
3333
     * @return bool
3334
     *              Whether or not $str contains only hexadecimal chars
3335
     */
3336 13
    public static function is_hexadecimal(string $str): bool
3337
    {
3338 13
        if (self::$SUPPORT['mbstring'] === true) {
3339
            /** @noinspection PhpComposerExtensionStubsInspection */
3340 13
            return \mb_ereg_match('^[[:xdigit:]]*$', $str);
3341
        }
3342
3343
        return self::str_matches_pattern($str, '^[[:xdigit:]]*$');
3344
    }
3345
3346
    /**
3347
     * Check if the string contains any html-tags <lall>.
3348
     *
3349
     * @param string $str <p>The input string.</p>
3350
     *
3351
     * @return bool
3352
     */
3353 3
    public static function is_html(string $str): bool
3354
    {
3355 3
        if ($str === '') {
3356 3
            return false;
3357
        }
3358
3359
        // init
3360 3
        $matches = [];
3361
3362 3
        $str = self::emoji_encode($str); // hack for emoji support :/
3363
3364 3
        \preg_match("/<\\/?\\w+(?:(?:\\s+\\w+(?:\\s*=\\s*(?:\".*?\"|'.*?'|[^'\">\\s]+))?)*\\s*|\\s*)\\/?>/u", $str, $matches);
3365
3366 3
        return \count($matches) !== 0;
3367
    }
3368
3369
    /**
3370
     * Try to check if "$str" is an json-string.
3371
     *
3372
     * @param string $str                              <p>The input string.</p>
3373
     * @param bool   $onlyArrayOrObjectResultsAreValid [optional] <p>Only array and objects are valid json results.</p>
3374
     *
3375
     * @return bool
3376
     */
3377 42
    public static function is_json(string $str, $onlyArrayOrObjectResultsAreValid = true): bool
3378
    {
3379 42
        if ($str === '') {
3380 4
            return false;
3381
        }
3382
3383 40
        if (self::$SUPPORT['json'] === false) {
3384
            throw new \RuntimeException('ext-json: is not installed');
3385
        }
3386
3387 40
        $json = self::json_decode($str);
3388 40
        if ($json === null && \strtoupper($str) !== 'NULL') {
3389 18
            return false;
3390
        }
3391
3392
        if (
3393 24
            $onlyArrayOrObjectResultsAreValid === true
3394
            &&
3395 24
            \is_object($json) === false
3396
            &&
3397 24
            \is_array($json) === false
3398
        ) {
3399 5
            return false;
3400
        }
3401
3402
        /** @noinspection PhpComposerExtensionStubsInspection */
3403 19
        return \json_last_error() === \JSON_ERROR_NONE;
3404
    }
3405
3406
    /**
3407
     * @param string $str
3408
     *
3409
     * @return bool
3410
     */
3411 8
    public static function is_lowercase(string $str): bool
3412
    {
3413 8
        if (self::$SUPPORT['mbstring'] === true) {
3414
            /** @noinspection PhpComposerExtensionStubsInspection */
3415 8
            return \mb_ereg_match('^[[:lower:]]*$', $str);
3416
        }
3417
3418
        return self::str_matches_pattern($str, '^[[:lower:]]*$');
3419
    }
3420
3421
    /**
3422
     * Returns true if the string is serialized, false otherwise.
3423
     *
3424
     * @param string $str
3425
     *
3426
     * @return bool whether or not $str is serialized
3427
     */
3428 7
    public static function is_serialized(string $str): bool
3429
    {
3430 7
        if ($str === '') {
3431 1
            return false;
3432
        }
3433
3434
        /** @noinspection PhpUsageOfSilenceOperatorInspection */
3435
        /** @noinspection UnserializeExploitsInspection */
3436 6
        return $str === 'b:0;'
3437
               ||
3438 6
               @\unserialize($str) !== false;
3439
    }
3440
3441
    /**
3442
     * Returns true if the string contains only lower case chars, false
3443
     * otherwise.
3444
     *
3445
     * @param string $str <p>The input string.</p>
3446
     *
3447
     * @return bool
3448
     *              Whether or not $str contains only lower case characters
3449
     */
3450 8
    public static function is_uppercase(string $str): bool
3451
    {
3452 8
        if (self::$SUPPORT['mbstring'] === true) {
3453
            /** @noinspection PhpComposerExtensionStubsInspection */
3454 8
            return \mb_ereg_match('^[[:upper:]]*$', $str);
3455
        }
3456
3457
        return self::str_matches_pattern($str, '^[[:upper:]]*$');
3458
    }
3459
3460
    /**
3461
     * Check if the string is UTF-16.
3462
     *
3463
     * @param mixed $str                   <p>The input string.</p>
3464
     * @param bool  $checkIfStringIsBinary
3465
     *
3466
     * @return false|int
3467
     *                   <strong>false</strong> if is't not UTF-16,<br>
3468
     *                   <strong>1</strong> for UTF-16LE,<br>
3469
     *                   <strong>2</strong> for UTF-16BE
3470
     */
3471 22
    public static function is_utf16($str, $checkIfStringIsBinary = true)
3472
    {
3473
        // init
3474 22
        $str = (string) $str;
3475 22
        $strChars = [];
3476
3477
        if (
3478 22
            $checkIfStringIsBinary === true
3479
            &&
3480 22
            self::is_binary($str, true) === false
3481
        ) {
3482 2
            return false;
3483
        }
3484
3485 22
        if (self::$SUPPORT['mbstring'] === false) {
3486 3
            \trigger_error('UTF8::is_utf16() without mbstring may did not work correctly', \E_USER_WARNING);
3487
        }
3488
3489 22
        $str = self::remove_bom($str);
3490
3491 22
        $maybeUTF16LE = 0;
3492 22
        $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-16LE');
3493 22
        if ($test) {
3494 15
            $test2 = \mb_convert_encoding($test, 'UTF-16LE', 'UTF-8');
3495 15
            $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-16LE');
3496 15
            if ($test3 === $test) {
3497 15
                if (\count($strChars) === 0) {
3498 15
                    $strChars = self::count_chars($str, true, false);
3499
                }
3500 15
                foreach (self::count_chars($test3) as $test3char => &$test3charEmpty) {
0 ignored issues
show
Bug introduced by
The expression self::count_chars($test3) cannot be used as a reference.

Let?s assume that you have the following foreach statement:

foreach ($array as &$itemValue) { }

$itemValue is assigned by reference. This is possible because the expression (in the example $array) can be used as a reference target.

However, if we were to replace $array with something different like the result of a function call as in

foreach (getArray() as &$itemValue) { }

then assigning by reference is not possible anymore as there is no target that could be modified.

Available Fixes

1. Do not assign by reference
foreach (getArray() as $itemValue) { }
2. Assign to a local variable first
$array = getArray();
foreach ($array as &$itemValue) {}
3. Return a reference
function &getArray() { $array = array(); return $array; }

foreach (getArray() as &$itemValue) { }
Loading history...
3501 15
                    if (\in_array($test3char, $strChars, true) === true) {
3502 15
                        ++$maybeUTF16LE;
3503
                    }
3504
                }
3505 15
                unset($test3charEmpty);
3506
            }
3507
        }
3508
3509 22
        $maybeUTF16BE = 0;
3510 22
        $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-16BE');
3511 22
        if ($test) {
3512 15
            $test2 = \mb_convert_encoding($test, 'UTF-16BE', 'UTF-8');
3513 15
            $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-16BE');
3514 15
            if ($test3 === $test) {
3515 15
                if (\count($strChars) === 0) {
3516 7
                    $strChars = self::count_chars($str, true, false);
3517
                }
3518 15
                foreach (self::count_chars($test3) as $test3char => &$test3charEmpty) {
0 ignored issues
show
Bug introduced by
The expression self::count_chars($test3) cannot be used as a reference.

Let?s assume that you have the following foreach statement:

foreach ($array as &$itemValue) { }

$itemValue is assigned by reference. This is possible because the expression (in the example $array) can be used as a reference target.

However, if we were to replace $array with something different like the result of a function call as in

foreach (getArray() as &$itemValue) { }

then assigning by reference is not possible anymore as there is no target that could be modified.

Available Fixes

1. Do not assign by reference
foreach (getArray() as $itemValue) { }
2. Assign to a local variable first
$array = getArray();
foreach ($array as &$itemValue) {}
3. Return a reference
function &getArray() { $array = array(); return $array; }

foreach (getArray() as &$itemValue) { }
Loading history...
3519 15
                    if (\in_array($test3char, $strChars, true) === true) {
3520 15
                        ++$maybeUTF16BE;
3521
                    }
3522
                }
3523 15
                unset($test3charEmpty);
3524
            }
3525
        }
3526
3527 22
        if ($maybeUTF16BE !== $maybeUTF16LE) {
3528 6
            if ($maybeUTF16LE > $maybeUTF16BE) {
3529 4
                return 1;
3530
            }
3531
3532 6
            return 2;
3533
        }
3534
3535 18
        return false;
3536
    }
3537
3538
    /**
3539
     * Check if the string is UTF-32.
3540
     *
3541
     * @param mixed $str                   <p>The input string.</p>
3542
     * @param bool  $checkIfStringIsBinary
3543
     *
3544
     * @return false|int
3545
     *                   <strong>false</strong> if is't not UTF-32,<br>
3546
     *                   <strong>1</strong> for UTF-32LE,<br>
3547
     *                   <strong>2</strong> for UTF-32BE
3548
     */
3549 18
    public static function is_utf32($str, $checkIfStringIsBinary = true)
3550
    {
3551
        // init
3552 18
        $str = (string) $str;
3553 18
        $strChars = [];
3554
3555
        if (
3556 18
            $checkIfStringIsBinary === true
3557
            &&
3558 18
            self::is_binary($str, true) === false
3559
        ) {
3560 2
            return false;
3561
        }
3562
3563 18
        if (self::$SUPPORT['mbstring'] === false) {
3564 3
            \trigger_error('UTF8::is_utf32() without mbstring may did not work correctly', \E_USER_WARNING);
3565
        }
3566
3567 18
        $str = self::remove_bom($str);
3568
3569 18
        $maybeUTF32LE = 0;
3570 18
        $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-32LE');
3571 18
        if ($test) {
3572 11
            $test2 = \mb_convert_encoding($test, 'UTF-32LE', 'UTF-8');
3573 11
            $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-32LE');
3574 11
            if ($test3 === $test) {
3575 11
                if (\count($strChars) === 0) {
3576 11
                    $strChars = self::count_chars($str, true, false);
3577
                }
3578 11
                foreach (self::count_chars($test3) as $test3char => &$test3charEmpty) {
0 ignored issues
show
Bug introduced by
The expression self::count_chars($test3) cannot be used as a reference.

Let?s assume that you have the following foreach statement:

foreach ($array as &$itemValue) { }

$itemValue is assigned by reference. This is possible because the expression (in the example $array) can be used as a reference target.

However, if we were to replace $array with something different like the result of a function call as in

foreach (getArray() as &$itemValue) { }

then assigning by reference is not possible anymore as there is no target that could be modified.

Available Fixes

1. Do not assign by reference
foreach (getArray() as $itemValue) { }
2. Assign to a local variable first
$array = getArray();
foreach ($array as &$itemValue) {}
3. Return a reference
function &getArray() { $array = array(); return $array; }

foreach (getArray() as &$itemValue) { }
Loading history...
3579 11
                    if (\in_array($test3char, $strChars, true) === true) {
3580 11
                        ++$maybeUTF32LE;
3581
                    }
3582
                }
3583 11
                unset($test3charEmpty);
3584
            }
3585
        }
3586
3587 18
        $maybeUTF32BE = 0;
3588 18
        $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-32BE');
3589 18
        if ($test) {
3590 11
            $test2 = \mb_convert_encoding($test, 'UTF-32BE', 'UTF-8');
3591 11
            $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-32BE');
3592 11
            if ($test3 === $test) {
3593 11
                if (\count($strChars) === 0) {
3594 7
                    $strChars = self::count_chars($str, true, false);
3595
                }
3596 11
                foreach (self::count_chars($test3) as $test3char => &$test3charEmpty) {
0 ignored issues
show
Bug introduced by
The expression self::count_chars($test3) cannot be used as a reference.

Let?s assume that you have the following foreach statement:

foreach ($array as &$itemValue) { }

$itemValue is assigned by reference. This is possible because the expression (in the example $array) can be used as a reference target.

However, if we were to replace $array with something different like the result of a function call as in

foreach (getArray() as &$itemValue) { }

then assigning by reference is not possible anymore as there is no target that could be modified.

Available Fixes

1. Do not assign by reference
foreach (getArray() as $itemValue) { }
2. Assign to a local variable first
$array = getArray();
foreach ($array as &$itemValue) {}
3. Return a reference
function &getArray() { $array = array(); return $array; }

foreach (getArray() as &$itemValue) { }
Loading history...
3597 11
                    if (\in_array($test3char, $strChars, true) === true) {
3598 11
                        ++$maybeUTF32BE;
3599
                    }
3600
                }
3601 11
                unset($test3charEmpty);
3602
            }
3603
        }
3604
3605 18
        if ($maybeUTF32BE !== $maybeUTF32LE) {
3606 2
            if ($maybeUTF32LE > $maybeUTF32BE) {
3607 2
                return 1;
3608
            }
3609
3610 2
            return 2;
3611
        }
3612
3613 18
        return false;
3614
    }
3615
3616
    /**
3617
     * Checks whether the passed string contains only byte sequences that appear valid UTF-8 characters.
3618
     *
3619
     * @see http://hsivonen.iki.fi/php-utf8/
3620
     *
3621
     * @param string|string[] $str    <p>The string to be checked.</p>
3622
     * @param bool            $strict <p>Check also if the string is not UTF-16 or UTF-32.</p>
3623
     *
3624
     * @return bool
3625
     */
3626 106
    public static function is_utf8($str, bool $strict = false): bool
3627
    {
3628 106
        if (\is_array($str) === true) {
3629 2
            foreach ($str as &$v) {
3630 2
                if (self::is_utf8($v, $strict) === false) {
3631 2
                    return false;
3632
                }
3633
            }
3634
3635
            return true;
3636
        }
3637
3638 106
        if ($str === '') {
3639 12
            return true;
3640
        }
3641
3642 102
        if ($strict === true) {
3643 2
            $isBinary = self::is_binary($str, true);
3644
3645 2
            if ($isBinary && self::is_utf16($str, false) !== false) {
3646 2
                return false;
3647
            }
3648
3649
            if ($isBinary && self::is_utf32($str, false) !== false) {
3650
                return false;
3651
            }
3652
        }
3653
3654 102
        if (self::pcre_utf8_support() !== true) {
3655
3656
            // If even just the first character can be matched, when the /u
3657
            // modifier is used, then it's valid UTF-8. If the UTF-8 is somehow
3658
            // invalid, nothing at all will match, even if the string contains
3659
            // some valid sequences
3660
            return \preg_match('/^.{1}/us', $str, $ar) === 1;
3661
        }
3662
3663 102
        $mState = 0; // cached expected number of octets after the current octet
3664
        // until the beginning of the next UTF8 character sequence
3665 102
        $mUcs4 = 0; // cached Unicode character
3666 102
        $mBytes = 1; // cached expected number of octets in the current sequence
3667
3668 102
        if (self::$ORD === null) {
3669
            self::$ORD = self::getData('ord');
3670
        }
3671
3672 102
        $len = \strlen((string) $str);
3673
        /** @noinspection ForeachInvariantsInspection */
3674 102
        for ($i = 0; $i < $len; ++$i) {
3675 102
            $in = self::$ORD[$str[$i]];
3676 102
            if ($mState === 0) {
3677
                // When mState is zero we expect either a US-ASCII character or a
3678
                // multi-octet sequence.
3679 102
                if ((0x80 & $in) === 0) {
3680
                    // US-ASCII, pass straight through.
3681 97
                    $mBytes = 1;
3682 83
                } elseif ((0xE0 & $in) === 0xC0) {
3683
                    // First octet of 2 octet sequence.
3684 73
                    $mUcs4 = $in;
3685 73
                    $mUcs4 = ($mUcs4 & 0x1F) << 6;
3686 73
                    $mState = 1;
3687 73
                    $mBytes = 2;
3688 58
                } elseif ((0xF0 & $in) === 0xE0) {
3689
                    // First octet of 3 octet sequence.
3690 42
                    $mUcs4 = $in;
3691 42
                    $mUcs4 = ($mUcs4 & 0x0F) << 12;
3692 42
                    $mState = 2;
3693 42
                    $mBytes = 3;
3694 29
                } elseif ((0xF8 & $in) === 0xF0) {
3695
                    // First octet of 4 octet sequence.
3696 18
                    $mUcs4 = $in;
3697 18
                    $mUcs4 = ($mUcs4 & 0x07) << 18;
3698 18
                    $mState = 3;
3699 18
                    $mBytes = 4;
3700 13
                } elseif ((0xFC & $in) === 0xF8) {
3701
                    /* First octet of 5 octet sequence.
3702
                     *
3703
                     * This is illegal because the encoded codepoint must be either
3704
                     * (a) not the shortest form or
3705
                     * (b) outside the Unicode range of 0-0x10FFFF.
3706
                     * Rather than trying to resynchronize, we will carry on until the end
3707
                     * of the sequence and let the later error handling code catch it.
3708
                     */
3709 5
                    $mUcs4 = $in;
3710 5
                    $mUcs4 = ($mUcs4 & 0x03) << 24;
3711 5
                    $mState = 4;
3712 5
                    $mBytes = 5;
3713 10
                } elseif ((0xFE & $in) === 0xFC) {
3714
                    // First octet of 6 octet sequence, see comments for 5 octet sequence.
3715 5
                    $mUcs4 = $in;
3716 5
                    $mUcs4 = ($mUcs4 & 1) << 30;
3717 5
                    $mState = 5;
3718 5
                    $mBytes = 6;
3719
                } else {
3720
                    // Current octet is neither in the US-ASCII range nor a legal first
3721
                    // octet of a multi-octet sequence.
3722 102
                    return false;
3723
                }
3724 83
            } elseif ((0xC0 & $in) === 0x80) {
3725
3726
                // When mState is non-zero, we expect a continuation of the multi-octet
3727
                // sequence
3728
3729
                // Legal continuation.
3730 75
                $shift = ($mState - 1) * 6;
3731 75
                $tmp = $in;
3732 75
                $tmp = ($tmp & 0x0000003F) << $shift;
3733 75
                $mUcs4 |= $tmp;
3734
                // Prefix: End of the multi-octet sequence. mUcs4 now contains the final
3735
                // Unicode code point to be output.
3736 75
                if (--$mState === 0) {
3737
                    // Check for illegal sequences and code points.
3738
                    //
3739
                    // From Unicode 3.1, non-shortest form is illegal
3740
                    if (
3741 75
                        ($mBytes === 2 && $mUcs4 < 0x0080)
3742
                        ||
3743 75
                        ($mBytes === 3 && $mUcs4 < 0x0800)
3744
                        ||
3745 75
                        ($mBytes === 4 && $mUcs4 < 0x10000)
3746
                        ||
3747 75
                        ($mBytes > 4)
3748
                        ||
3749
                        // From Unicode 3.2, surrogate characters are illegal.
3750 75
                        (($mUcs4 & 0xFFFFF800) === 0xD800)
3751
                        ||
3752
                        // Code points outside the Unicode range are illegal.
3753 75
                        ($mUcs4 > 0x10FFFF)
3754
                    ) {
3755 8
                        return false;
3756
                    }
3757
                    // initialize UTF8 cache
3758 75
                    $mState = 0;
3759 75
                    $mUcs4 = 0;
3760 75
                    $mBytes = 1;
3761
                }
3762
            } else {
3763
                // ((0xC0 & (*in) != 0x80) && (mState != 0))
3764
                // Incomplete multi-octet sequence.
3765 35
                return false;
3766
            }
3767
        }
3768
3769 67
        return true;
3770
    }
3771
3772
    /**
3773
     * (PHP 5 &gt;= 5.2.0, PECL json &gt;= 1.2.0)<br/>
3774
     * Decodes a JSON string
3775
     *
3776
     * @see http://php.net/manual/en/function.json-decode.php
3777
     *
3778
     * @param string $json    <p>
3779
     *                        The <i>json</i> string being decoded.
3780
     *                        </p>
3781
     *                        <p>
3782
     *                        This function only works with UTF-8 encoded strings.
3783
     *                        </p>
3784
     *                        <p>PHP implements a superset of
3785
     *                        JSON - it will also encode and decode scalar types and <b>NULL</b>. The JSON standard
3786
     *                        only supports these values when they are nested inside an array or an object.
3787
     *                        </p>
3788
     * @param bool   $assoc   [optional] <p>
3789
     *                        When <b>TRUE</b>, returned objects will be converted into
3790
     *                        associative arrays.
3791
     *                        </p>
3792
     * @param int    $depth   [optional] <p>
3793
     *                        User specified recursion depth.
3794
     *                        </p>
3795
     * @param int    $options [optional] <p>
3796
     *                        Bitmask of JSON decode options. Currently only
3797
     *                        <b>JSON_BIGINT_AS_STRING</b>
3798
     *                        is supported (default is to cast large integers as floats)
3799
     *                        </p>
3800
     *
3801
     * @return mixed
3802
     *               The value encoded in <i>json</i> in appropriate PHP type. Values true, false and
3803
     *               null (case-insensitive) are returned as <b>TRUE</b>, <b>FALSE</b> and <b>NULL</b> respectively.
3804
     *               <b>NULL</b> is returned if the <i>json</i> cannot be decoded or if the encoded data
3805
     *               is deeper than the recursion limit.
3806
     */
3807 43
    public static function json_decode(
3808
        string $json,
3809
        bool $assoc = false,
3810
        int $depth = 512,
3811
        int $options = 0
3812
    ) {
3813 43
        $json = self::filter($json);
3814
3815 43
        if (self::$SUPPORT['json'] === false) {
3816
            throw new \RuntimeException('ext-json: is not installed');
3817
        }
3818
3819
        /** @noinspection PhpComposerExtensionStubsInspection */
3820 43
        return \json_decode($json, $assoc, $depth, $options);
3821
    }
3822
3823
    /**
3824
     * (PHP 5 &gt;= 5.2.0, PECL json &gt;= 1.2.0)<br/>
3825
     * Returns the JSON representation of a value.
3826
     *
3827
     * @see http://php.net/manual/en/function.json-encode.php
3828
     *
3829
     * @param mixed $value   <p>
3830
     *                       The <i>value</i> being encoded. Can be any type except
3831
     *                       a resource.
3832
     *                       </p>
3833
     *                       <p>
3834
     *                       All string data must be UTF-8 encoded.
3835
     *                       </p>
3836
     *                       <p>PHP implements a superset of
3837
     *                       JSON - it will also encode and decode scalar types and <b>NULL</b>. The JSON standard
3838
     *                       only supports these values when they are nested inside an array or an object.
3839
     *                       </p>
3840
     * @param int   $options [optional] <p>
3841
     *                       Bitmask consisting of <b>JSON_HEX_QUOT</b>,
3842
     *                       <b>JSON_HEX_TAG</b>,
3843
     *                       <b>JSON_HEX_AMP</b>,
3844
     *                       <b>JSON_HEX_APOS</b>,
3845
     *                       <b>JSON_NUMERIC_CHECK</b>,
3846
     *                       <b>JSON_PRETTY_PRINT</b>,
3847
     *                       <b>JSON_UNESCAPED_SLASHES</b>,
3848
     *                       <b>JSON_FORCE_OBJECT</b>,
3849
     *                       <b>JSON_UNESCAPED_UNICODE</b>. The behaviour of these
3850
     *                       constants is described on
3851
     *                       the JSON constants page.
3852
     *                       </p>
3853
     * @param int   $depth   [optional] <p>
3854
     *                       Set the maximum depth. Must be greater than zero.
3855
     *                       </p>
3856
     *
3857
     * @return false|string
3858
     *                      A JSON encoded <strong>string</strong> on success or<br>
3859
     *                      <strong>FALSE</strong> on failure
3860
     */
3861 5
    public static function json_encode($value, int $options = 0, int $depth = 512)
3862
    {
3863 5
        $value = self::filter($value);
3864
3865 5
        if (self::$SUPPORT['json'] === false) {
3866
            throw new \RuntimeException('ext-json: is not installed');
3867
        }
3868
3869
        /** @noinspection PhpComposerExtensionStubsInspection */
3870 5
        return \json_encode($value, $options, $depth);
3871
    }
3872
3873
    /**
3874
     * Checks whether JSON is available on the server.
3875
     *
3876
     * @return bool
3877
     *              <strong>true</strong> if available, <strong>false</strong> otherwise
3878
     */
3879
    public static function json_loaded(): bool
3880
    {
3881
        return \function_exists('json_decode');
3882
    }
3883
3884
    /**
3885
     * Makes string's first char lowercase.
3886
     *
3887
     * @param string      $str                   <p>The input string</p>
3888
     * @param string      $encoding              [optional] <p>Set the charset for e.g. "mb_" function</p>
3889
     * @param bool        $cleanUtf8             [optional] <p>Remove non UTF-8 chars from the string.</p>
3890
     * @param string|null $lang                  [optional] <p>Set the language for special cases: az, el, lt, tr</p>
3891
     * @param bool        $tryToKeepStringLength [optional] <p>true === try to keep the string length: e.g. ẞ -> ß</p>
3892
     *
3893
     * @return string the resulting string
3894
     */
3895 46
    public static function lcfirst(
3896
        string $str,
3897
        string $encoding = 'UTF-8',
3898
        bool $cleanUtf8 = false,
3899
        string $lang = null,
3900
        bool $tryToKeepStringLength = false
3901
    ): string {
3902 46
        if ($cleanUtf8 === true) {
3903
            $str = self::clean($str);
3904
        }
3905
3906 46
        $useMbFunction = $lang === null && $tryToKeepStringLength === false;
3907
3908 46
        if ($encoding === 'UTF-8') {
3909 43
            $strPartTwo = (string) \mb_substr($str, 1);
3910
3911 43
            if ($useMbFunction === true) {
3912 43
                $strPartOne = \mb_strtolower(
3913 43
                    (string) \mb_substr($str, 0, 1)
3914
                );
3915
            } else {
3916
                $strPartOne = self::strtolower(
3917
                    (string) \mb_substr($str, 0, 1),
3918
                    $encoding,
3919
                    false,
3920
                    $lang,
3921 43
                    $tryToKeepStringLength
3922
                );
3923
            }
3924
        } else {
3925 3
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
3926
3927 3
            $strPartTwo = (string) self::substr($str, 1, null, $encoding);
3928
3929 3
            $strPartOne = self::strtolower(
3930 3
                (string) self::substr($str, 0, 1, $encoding),
3931 3
                $encoding,
3932 3
                false,
3933 3
                $lang,
3934 3
                $tryToKeepStringLength
3935
            );
3936
        }
3937
3938 46
        return $strPartOne . $strPartTwo;
3939
    }
3940
3941
    /**
3942
     * alias for "UTF8::lcfirst()"
3943
     *
3944
     * @param string      $str
3945
     * @param string      $encoding
3946
     * @param bool        $cleanUtf8
3947
     * @param string|null $lang
3948
     * @param bool        $tryToKeepStringLength
3949
     *
3950
     * @return string
3951
     *
3952
     * @see UTF8::lcfirst()
3953
     */
3954 2
    public static function lcword(
3955
        string $str,
3956
        string $encoding = 'UTF-8',
3957
        bool $cleanUtf8 = false,
3958
        string $lang = null,
3959
        bool $tryToKeepStringLength = false
3960
    ): string {
3961 2
        return self::lcfirst($str, $encoding, $cleanUtf8, $lang, $tryToKeepStringLength);
3962
    }
3963
3964
    /**
3965
     * Lowercase for all words in the string.
3966
     *
3967
     * @param string      $str                   <p>The input string.</p>
3968
     * @param string[]    $exceptions            [optional] <p>Exclusion for some words.</p>
3969
     * @param string      $charlist              [optional] <p>Additional chars that contains to words and do not start
3970
     *                                           a new word.</p>
3971
     * @param string      $encoding              [optional] <p>Set the charset.</p>
3972
     * @param bool        $cleanUtf8             [optional] <p>Remove non UTF-8 chars from the string.</p>
3973
     * @param string|null $lang                  [optional] <p>Set the language for special cases: az, el, lt, tr</p>
3974
     * @param bool        $tryToKeepStringLength [optional] <p>true === try to keep the string length: e.g. ẞ -> ß</p>
3975
     *
3976
     * @return string
3977
     */
3978 2
    public static function lcwords(
3979
        string $str,
3980
        array $exceptions = [],
3981
        string $charlist = '',
3982
        string $encoding = 'UTF-8',
3983
        bool $cleanUtf8 = false,
3984
        string $lang = null,
3985
        bool $tryToKeepStringLength = false
3986
    ): string {
3987 2
        if (!$str) {
3988 2
            return '';
3989
        }
3990
3991 2
        $words = self::str_to_words($str, $charlist);
3992 2
        $useExceptions = \count($exceptions) > 0;
3993
3994 2
        foreach ($words as &$word) {
3995 2
            if (!$word) {
3996 2
                continue;
3997
            }
3998
3999
            if (
4000 2
                $useExceptions === false
4001
                ||
4002 2
                !\in_array($word, $exceptions, true)
4003
            ) {
4004 2
                $word = self::lcfirst($word, $encoding, $cleanUtf8, $lang, $tryToKeepStringLength);
4005
            }
4006
        }
4007
4008 2
        return \implode('', $words);
4009
    }
4010
4011
    /**
4012
     * alias for "UTF8::lcfirst()"
4013
     *
4014
     * @param string      $str
4015
     * @param string      $encoding
4016
     * @param bool        $cleanUtf8
4017
     * @param string|null $lang
4018
     * @param bool        $tryToKeepStringLength
4019
     *
4020
     * @return string
4021
     *
4022
     * @see UTF8::lcfirst()
4023
     */
4024 5
    public static function lowerCaseFirst(
4025
        string $str,
4026
        string $encoding = 'UTF-8',
4027
        bool $cleanUtf8 = false,
4028
        string $lang = null,
4029
        bool $tryToKeepStringLength = false
4030
    ): string {
4031 5
        return self::lcfirst($str, $encoding, $cleanUtf8, $lang, $tryToKeepStringLength);
4032
    }
4033
4034
    /**
4035
     * Strip whitespace or other characters from beginning of a UTF-8 string.
4036
     *
4037
     * @param string      $str   <p>The string to be trimmed</p>
4038
     * @param string|null $chars <p>Optional characters to be stripped</p>
4039
     *
4040
     * @return string the string with unwanted characters stripped from the left
4041
     */
4042 22
    public static function ltrim(string $str = '', string $chars = null): string
4043
    {
4044 22
        if ($str === '') {
4045 3
            return '';
4046
        }
4047
4048 21
        if ($chars) {
4049 10
            $chars = \preg_quote($chars, '/');
4050 10
            $pattern = "^[${chars}]+";
4051
        } else {
4052 14
            $pattern = '^[\\s]+';
4053
        }
4054
4055 21
        if (self::$SUPPORT['mbstring'] === true) {
4056
            /** @noinspection PhpComposerExtensionStubsInspection */
4057 21
            return (string) \mb_ereg_replace($pattern, '', $str);
4058
        }
4059
4060
        return self::regex_replace($str, $pattern, '', '', '/');
4061
    }
4062
4063
    /**
4064
     * Returns the UTF-8 character with the maximum code point in the given data.
4065
     *
4066
     * @param array<string>|string $arg <p>A UTF-8 encoded string or an array of such strings.</p>
4067
     *
4068
     * @return string|null the character with the highest code point than others, returns null on failure or empty input
4069
     */
4070 2
    public static function max($arg)
4071
    {
4072 2
        if (\is_array($arg) === true) {
4073 2
            $arg = \implode('', $arg);
4074
        }
4075
4076 2
        $codepoints = self::codepoints($arg, false);
4077 2
        if (\count($codepoints) === 0) {
4078 2
            return null;
4079
        }
4080
4081 2
        $codepoint_max = \max($codepoints);
4082
4083 2
        return self::chr($codepoint_max);
4084
    }
4085
4086
    /**
4087
     * Calculates and returns the maximum number of bytes taken by any
4088
     * UTF-8 encoded character in the given string.
4089
     *
4090
     * @param string $str <p>The original Unicode string.</p>
4091
     *
4092
     * @return int max byte lengths of the given chars
4093
     */
4094 2
    public static function max_chr_width(string $str): int
4095
    {
4096 2
        $bytes = self::chr_size_list($str);
4097 2
        if (\count($bytes) > 0) {
4098 2
            return (int) \max($bytes);
4099
        }
4100
4101 2
        return 0;
4102
    }
4103
4104
    /**
4105
     * Checks whether mbstring is available on the server.
4106
     *
4107
     * @return bool
4108
     *              <strong>true</strong> if available, <strong>false</strong> otherwise
4109
     */
4110 27
    public static function mbstring_loaded(): bool
4111
    {
4112 27
        return \extension_loaded('mbstring');
4113
    }
4114
4115
    /**
4116
     * Returns the UTF-8 character with the minimum code point in the given data.
4117
     *
4118
     * @param mixed $arg <strong>A UTF-8 encoded string or an array of such strings.</strong>
4119
     *
4120
     * @return string|null the character with the lowest code point than others, returns null on failure or empty input
4121
     */
4122 2
    public static function min($arg)
4123
    {
4124 2
        if (\is_array($arg) === true) {
4125 2
            $arg = \implode('', $arg);
4126
        }
4127
4128 2
        $codepoints = self::codepoints($arg, false);
4129 2
        if (\count($codepoints) === 0) {
4130 2
            return null;
4131
        }
4132
4133 2
        $codepoint_min = \min($codepoints);
4134
4135 2
        return self::chr($codepoint_min);
4136
    }
4137
4138
    /**
4139
     * alias for "UTF8::normalize_encoding()"
4140
     *
4141
     * @param mixed $encoding
4142
     * @param mixed $fallback
4143
     *
4144
     * @return mixed
4145
     *
4146
     * @see UTF8::normalize_encoding()
4147
     * @deprecated <p>use "UTF8::normalize_encoding()"</p>
4148
     */
4149 2
    public static function normalizeEncoding($encoding, $fallback = '')
4150
    {
4151 2
        return self::normalize_encoding($encoding, $fallback);
4152
    }
4153
4154
    /**
4155
     * Normalize the encoding-"name" input.
4156
     *
4157
     * @param mixed $encoding <p>e.g.: ISO, UTF8, WINDOWS-1251 etc.</p>
4158
     * @param mixed $fallback <p>e.g.: UTF-8</p>
4159
     *
4160
     * @return mixed e.g.: ISO-8859-1, UTF-8, WINDOWS-1251 etc.<br>Will return a empty string as fallback (by default)
4161
     */
4162 326
    public static function normalize_encoding($encoding, $fallback = '')
4163
    {
4164 326
        static $STATIC_NORMALIZE_ENCODING_CACHE = [];
4165
4166
        // init
4167 326
        $encoding = (string) $encoding;
4168
4169 326
        if (!$encoding) {
4170 281
            return $fallback;
4171
        }
4172
4173
        if (
4174 50
            $encoding === 'UTF-8'
4175
            ||
4176 50
            $encoding === 'UTF8'
4177
        ) {
4178 24
            return 'UTF-8';
4179
        }
4180
4181
        if (
4182 43
            $encoding === '8BIT'
4183
            ||
4184 43
            $encoding === 'BINARY'
4185
        ) {
4186
            return 'CP850';
4187
        }
4188
4189
        if (
4190 43
            $encoding === 'HTML'
4191
            ||
4192 43
            $encoding === 'HTML-ENTITIES'
4193
        ) {
4194 2
            return 'HTML-ENTITIES';
4195
        }
4196
4197
        if (
4198 43
            $encoding === '1' // only a fallback, for non "strict_types" usage ...
4199
            ||
4200 43
            $encoding === '0' // only a fallback, for non "strict_types" usage ...
4201
        ) {
4202 1
            return $fallback;
4203
        }
4204
4205 42
        if (isset($STATIC_NORMALIZE_ENCODING_CACHE[$encoding])) {
4206 40
            return $STATIC_NORMALIZE_ENCODING_CACHE[$encoding];
4207
        }
4208
4209 6
        if (self::$ENCODINGS === null) {
4210 1
            self::$ENCODINGS = self::getData('encodings');
4211
        }
4212
4213 6
        if (\in_array($encoding, self::$ENCODINGS, true)) {
4214 4
            $STATIC_NORMALIZE_ENCODING_CACHE[$encoding] = $encoding;
4215
4216 4
            return $encoding;
4217
        }
4218
4219 5
        $encodingOrig = $encoding;
4220 5
        $encoding = \strtoupper($encoding);
4221 5
        $encodingUpperHelper = (string) \preg_replace('/[^a-zA-Z0-9]/u', '', $encoding);
4222
4223
        $equivalences = [
4224 5
            'ISO8859'     => 'ISO-8859-1',
4225
            'ISO88591'    => 'ISO-8859-1',
4226
            'ISO'         => 'ISO-8859-1',
4227
            'LATIN'       => 'ISO-8859-1',
4228
            'LATIN1'      => 'ISO-8859-1', // Western European
4229
            'ISO88592'    => 'ISO-8859-2',
4230
            'LATIN2'      => 'ISO-8859-2', // Central European
4231
            'ISO88593'    => 'ISO-8859-3',
4232
            'LATIN3'      => 'ISO-8859-3', // Southern European
4233
            'ISO88594'    => 'ISO-8859-4',
4234
            'LATIN4'      => 'ISO-8859-4', // Northern European
4235
            'ISO88595'    => 'ISO-8859-5',
4236
            'ISO88596'    => 'ISO-8859-6', // Greek
4237
            'ISO88597'    => 'ISO-8859-7',
4238
            'ISO88598'    => 'ISO-8859-8', // Hebrew
4239
            'ISO88599'    => 'ISO-8859-9',
4240
            'LATIN5'      => 'ISO-8859-9', // Turkish
4241
            'ISO885911'   => 'ISO-8859-11',
4242
            'TIS620'      => 'ISO-8859-11', // Thai
4243
            'ISO885910'   => 'ISO-8859-10',
4244
            'LATIN6'      => 'ISO-8859-10', // Nordic
4245
            'ISO885913'   => 'ISO-8859-13',
4246
            'LATIN7'      => 'ISO-8859-13', // Baltic
4247
            'ISO885914'   => 'ISO-8859-14',
4248
            'LATIN8'      => 'ISO-8859-14', // Celtic
4249
            'ISO885915'   => 'ISO-8859-15',
4250
            'LATIN9'      => 'ISO-8859-15', // Western European (with some extra chars e.g. €)
4251
            'ISO885916'   => 'ISO-8859-16',
4252
            'LATIN10'     => 'ISO-8859-16', // Southeast European
4253
            'CP1250'      => 'WINDOWS-1250',
4254
            'WIN1250'     => 'WINDOWS-1250',
4255
            'WINDOWS1250' => 'WINDOWS-1250',
4256
            'CP1251'      => 'WINDOWS-1251',
4257
            'WIN1251'     => 'WINDOWS-1251',
4258
            'WINDOWS1251' => 'WINDOWS-1251',
4259
            'CP1252'      => 'WINDOWS-1252',
4260
            'WIN1252'     => 'WINDOWS-1252',
4261
            'WINDOWS1252' => 'WINDOWS-1252',
4262
            'CP1253'      => 'WINDOWS-1253',
4263
            'WIN1253'     => 'WINDOWS-1253',
4264
            'WINDOWS1253' => 'WINDOWS-1253',
4265
            'CP1254'      => 'WINDOWS-1254',
4266
            'WIN1254'     => 'WINDOWS-1254',
4267
            'WINDOWS1254' => 'WINDOWS-1254',
4268
            'CP1255'      => 'WINDOWS-1255',
4269
            'WIN1255'     => 'WINDOWS-1255',
4270
            'WINDOWS1255' => 'WINDOWS-1255',
4271
            'CP1256'      => 'WINDOWS-1256',
4272
            'WIN1256'     => 'WINDOWS-1256',
4273
            'WINDOWS1256' => 'WINDOWS-1256',
4274
            'CP1257'      => 'WINDOWS-1257',
4275
            'WIN1257'     => 'WINDOWS-1257',
4276
            'WINDOWS1257' => 'WINDOWS-1257',
4277
            'CP1258'      => 'WINDOWS-1258',
4278
            'WIN1258'     => 'WINDOWS-1258',
4279
            'WINDOWS1258' => 'WINDOWS-1258',
4280
            'UTF16'       => 'UTF-16',
4281
            'UTF32'       => 'UTF-32',
4282
            'UTF8'        => 'UTF-8',
4283
            'UTF'         => 'UTF-8',
4284
            'UTF7'        => 'UTF-7',
4285
            '8BIT'        => 'CP850',
4286
            'BINARY'      => 'CP850',
4287
        ];
4288
4289 5
        if (!empty($equivalences[$encodingUpperHelper])) {
4290 4
            $encoding = $equivalences[$encodingUpperHelper];
4291
        }
4292
4293 5
        $STATIC_NORMALIZE_ENCODING_CACHE[$encodingOrig] = $encoding;
4294
4295 5
        return $encoding;
4296
    }
4297
4298
    /**
4299
     * Standardize line ending to unix-like.
4300
     *
4301
     * @param string $str
4302
     *
4303
     * @return string
4304
     */
4305 5
    public static function normalize_line_ending(string $str): string
4306
    {
4307 5
        return \str_replace(["\r\n", "\r"], "\n", $str);
4308
    }
4309
4310
    /**
4311
     * Normalize some MS Word special characters.
4312
     *
4313
     * @param string $str <p>The string to be normalized.</p>
4314
     *
4315
     * @return string
4316
     */
4317 38
    public static function normalize_msword(string $str): string
4318
    {
4319 38
        if ($str === '') {
4320 2
            return '';
4321
        }
4322
4323
        $keys = [
4324 38
            "\xc2\xab", // « (U+00AB) in UTF-8
4325
            "\xc2\xbb", // » (U+00BB) in UTF-8
4326
            "\xe2\x80\x98", // ‘ (U+2018) in UTF-8
4327
            "\xe2\x80\x99", // ’ (U+2019) in UTF-8
4328
            "\xe2\x80\x9a", // ‚ (U+201A) in UTF-8
4329
            "\xe2\x80\x9b", // ‛ (U+201B) in UTF-8
4330
            "\xe2\x80\x9c", // “ (U+201C) in UTF-8
4331
            "\xe2\x80\x9d", // ” (U+201D) in UTF-8
4332
            "\xe2\x80\x9e", // „ (U+201E) in UTF-8
4333
            "\xe2\x80\x9f", // ‟ (U+201F) in UTF-8
4334
            "\xe2\x80\xb9", // ‹ (U+2039) in UTF-8
4335
            "\xe2\x80\xba", // › (U+203A) in UTF-8
4336
            "\xe2\x80\x93", // – (U+2013) in UTF-8
4337
            "\xe2\x80\x94", // — (U+2014) in UTF-8
4338
            "\xe2\x80\xa6", // … (U+2026) in UTF-8
4339
        ];
4340
4341
        $values = [
4342 38
            '"', // « (U+00AB) in UTF-8
4343
            '"', // » (U+00BB) in UTF-8
4344
            "'", // ‘ (U+2018) in UTF-8
4345
            "'", // ’ (U+2019) in UTF-8
4346
            "'", // ‚ (U+201A) in UTF-8
4347
            "'", // ‛ (U+201B) in UTF-8
4348
            '"', // “ (U+201C) in UTF-8
4349
            '"', // ” (U+201D) in UTF-8
4350
            '"', // „ (U+201E) in UTF-8
4351
            '"', // ‟ (U+201F) in UTF-8
4352
            "'", // ‹ (U+2039) in UTF-8
4353
            "'", // › (U+203A) in UTF-8
4354
            '-', // – (U+2013) in UTF-8
4355
            '-', // — (U+2014) in UTF-8
4356
            '...', // … (U+2026) in UTF-8
4357
        ];
4358
4359 38
        return \str_replace($keys, $values, $str);
4360
    }
4361
4362
    /**
4363
     * Normalize the whitespace.
4364
     *
4365
     * @param string $str                     <p>The string to be normalized.</p>
4366
     * @param bool   $keepNonBreakingSpace    [optional] <p>Set to true, to keep non-breaking-spaces.</p>
4367
     * @param bool   $keepBidiUnicodeControls [optional] <p>Set to true, to keep non-printable (for the web)
4368
     *                                        bidirectional text chars.</p>
4369
     *
4370
     * @return string
4371
     */
4372 88
    public static function normalize_whitespace(string $str, bool $keepNonBreakingSpace = false, bool $keepBidiUnicodeControls = false): string
4373
    {
4374 88
        if ($str === '') {
4375 9
            return '';
4376
        }
4377
4378 88
        static $WHITESPACE_CACHE = [];
4379 88
        $cacheKey = (int) $keepNonBreakingSpace;
4380
4381 88
        if (!isset($WHITESPACE_CACHE[$cacheKey])) {
4382 2
            $WHITESPACE_CACHE[$cacheKey] = self::$WHITESPACE_TABLE;
4383
4384 2
            if ($keepNonBreakingSpace === true) {
4385 1
                unset($WHITESPACE_CACHE[$cacheKey]['NO-BREAK SPACE']);
4386
            }
4387
4388 2
            $WHITESPACE_CACHE[$cacheKey] = \array_values($WHITESPACE_CACHE[$cacheKey]);
4389
        }
4390
4391 88
        if ($keepBidiUnicodeControls === false) {
4392 88
            static $BIDI_UNICODE_CONTROLS_CACHE = null;
4393
4394 88
            if ($BIDI_UNICODE_CONTROLS_CACHE === null) {
4395 1
                $BIDI_UNICODE_CONTROLS_CACHE = \array_values(self::$BIDI_UNI_CODE_CONTROLS_TABLE);
4396
            }
4397
4398 88
            $str = \str_replace($BIDI_UNICODE_CONTROLS_CACHE, '', $str);
4399
        }
4400
4401 88
        return \str_replace($WHITESPACE_CACHE[$cacheKey], ' ', $str);
4402
    }
4403
4404
    /**
4405
     * Calculates Unicode code point of the given UTF-8 encoded character.
4406
     *
4407
     * INFO: opposite to UTF8::chr()
4408
     *
4409
     * @param string $chr      <p>The character of which to calculate code point.<p/>
4410
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
4411
     *
4412
     * @return int
4413
     *             Unicode code point of the given character,<br>
4414
     *             0 on invalid UTF-8 byte sequence
4415
     */
4416 30
    public static function ord($chr, string $encoding = 'UTF-8'): int
4417
    {
4418 30
        static $CHAR_CACHE = [];
4419
4420
        // init
4421 30
        $chr = (string) $chr;
4422
4423 30
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
4424 5
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
4425
        }
4426
4427 30
        $cacheKey = $chr . $encoding;
4428 30
        if (isset($CHAR_CACHE[$cacheKey]) === true) {
4429 30
            return $CHAR_CACHE[$cacheKey];
4430
        }
4431
4432
        // check again, if it's still not UTF-8
4433 12
        if ($encoding !== 'UTF-8') {
4434 3
            $chr = self::encode($encoding, $chr);
4435
        }
4436
4437 12
        if (self::$ORD === null) {
4438
            self::$ORD = self::getData('ord');
4439
        }
4440
4441 12
        if (isset(self::$ORD[$chr])) {
4442 12
            return $CHAR_CACHE[$cacheKey] = self::$ORD[$chr];
4443
        }
4444
4445
        //
4446
        // fallback via "IntlChar"
4447
        //
4448
4449 6
        if (self::$SUPPORT['intlChar'] === true) {
4450
            /** @noinspection PhpComposerExtensionStubsInspection */
4451 5
            $code = \IntlChar::ord($chr);
4452 5
            if ($code) {
4453 5
                return $CHAR_CACHE[$cacheKey] = $code;
4454
            }
4455
        }
4456
4457
        //
4458
        // fallback via vanilla php
4459
        //
4460
4461
        /** @noinspection CallableParameterUseCaseInTypeContextInspection */
4462 1
        $chr = \unpack('C*', (string) \substr($chr, 0, 4));
4463
        /** @noinspection OffsetOperationsInspection */
4464 1
        $code = $chr ? $chr[1] : 0;
4465
4466
        /** @noinspection OffsetOperationsInspection */
4467 1
        if ($code >= 0xF0 && isset($chr[4])) {
4468
            /** @noinspection UnnecessaryCastingInspection */
4469
            /** @noinspection OffsetOperationsInspection */
4470
            return $CHAR_CACHE[$cacheKey] = (int) ((($code - 0xF0) << 18) + (($chr[2] - 0x80) << 12) + (($chr[3] - 0x80) << 6) + $chr[4] - 0x80);
4471
        }
4472
4473
        /** @noinspection OffsetOperationsInspection */
4474 1
        if ($code >= 0xE0 && isset($chr[3])) {
4475
            /** @noinspection UnnecessaryCastingInspection */
4476
            /** @noinspection OffsetOperationsInspection */
4477 1
            return $CHAR_CACHE[$cacheKey] = (int) ((($code - 0xE0) << 12) + (($chr[2] - 0x80) << 6) + $chr[3] - 0x80);
4478
        }
4479
4480
        /** @noinspection OffsetOperationsInspection */
4481 1
        if ($code >= 0xC0 && isset($chr[2])) {
4482
            /** @noinspection UnnecessaryCastingInspection */
4483
            /** @noinspection OffsetOperationsInspection */
4484 1
            return $CHAR_CACHE[$cacheKey] = (int) ((($code - 0xC0) << 6) + $chr[2] - 0x80);
4485
        }
4486
4487
        return $CHAR_CACHE[$cacheKey] = $code;
4488
    }
4489
4490
    /**
4491
     * Parses the string into an array (into the the second parameter).
4492
     *
4493
     * WARNING: Instead of "parse_str()" this method do not (re-)placing variables in the current scope,
4494
     *          if the second parameter is not set!
4495
     *
4496
     * @see http://php.net/manual/en/function.parse-str.php
4497
     *
4498
     * @param string $str       <p>The input string.</p>
4499
     * @param array  $result    <p>The result will be returned into this reference parameter.</p>
4500
     * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
4501
     *
4502
     * @return bool
4503
     *              Will return <strong>false</strong> if php can't parse the string and we haven't any $result
4504
     */
4505 2
    public static function parse_str(string $str, &$result, bool $cleanUtf8 = false): bool
4506
    {
4507 2
        if ($cleanUtf8 === true) {
4508 2
            $str = self::clean($str);
4509
        }
4510
4511 2
        if (self::$SUPPORT['mbstring'] === true) {
4512 2
            $return = \mb_parse_str($str, $result);
4513
4514 2
            return $return !== false && $result !== [];
4515
        }
4516
4517
        /** @noinspection PhpVoidFunctionResultUsedInspection */
4518
        \parse_str($str, $result);
4519
4520
        return $result !== [];
4521
    }
4522
4523
    /**
4524
     * Checks if \u modifier is available that enables Unicode support in PCRE.
4525
     *
4526
     * @return bool
4527
     *              <strong>true</strong> if support is available,<br>
4528
     *              <strong>false</strong> otherwise
4529
     */
4530 102
    public static function pcre_utf8_support(): bool
4531
    {
4532
        /** @noinspection PhpUsageOfSilenceOperatorInspection */
4533 102
        return (bool) @\preg_match('//u', '');
4534
    }
4535
4536
    /**
4537
     * Create an array containing a range of UTF-8 characters.
4538
     *
4539
     * @param mixed $var1 <p>Numeric or hexadecimal code points, or a UTF-8 character to start from.</p>
4540
     * @param mixed $var2 <p>Numeric or hexadecimal code points, or a UTF-8 character to end at.</p>
4541
     *
4542
     * @return string[]
4543
     */
4544 2
    public static function range($var1, $var2): array
4545
    {
4546 2
        if (!$var1 || !$var2) {
4547 2
            return [];
4548
        }
4549
4550 2
        if (self::$SUPPORT['ctype'] === false) {
4551
            throw new \RuntimeException('ext-ctype: is not installed');
4552
        }
4553
4554
        /** @noinspection PhpComposerExtensionStubsInspection */
4555 2
        if (\ctype_digit((string) $var1)) {
4556 2
            $start = (int) $var1;
4557 2
        } /** @noinspection PhpComposerExtensionStubsInspection */ elseif (\ctype_xdigit($var1)) {
4558
            $start = (int) self::hex_to_int($var1);
4559
        } else {
4560 2
            $start = self::ord($var1);
4561
        }
4562
4563 2
        if (!$start) {
4564
            return [];
4565
        }
4566
4567
        /** @noinspection PhpComposerExtensionStubsInspection */
4568 2
        if (\ctype_digit((string) $var2)) {
4569 2
            $end = (int) $var2;
4570 2
        } /** @noinspection PhpComposerExtensionStubsInspection */ elseif (\ctype_xdigit($var2)) {
4571
            $end = (int) self::hex_to_int($var2);
4572
        } else {
4573 2
            $end = self::ord($var2);
4574
        }
4575
4576 2
        if (!$end) {
4577
            return [];
4578
        }
4579
4580 2
        return \array_map(
4581
            static function (int $i): string {
4582 2
                return (string) self::chr($i);
4583 2
            },
4584 2
            \range($start, $end)
4585
        );
4586
    }
4587
4588
    /**
4589
     * Multi decode html entity & fix urlencoded-win1252-chars.
4590
     *
4591
     * e.g:
4592
     * 'test+test'                     => 'test+test'
4593
     * 'D&#252;sseldorf'               => 'Düsseldorf'
4594
     * 'D%FCsseldorf'                  => 'Düsseldorf'
4595
     * 'D&#xFC;sseldorf'               => 'Düsseldorf'
4596
     * 'D%26%23xFC%3Bsseldorf'         => 'Düsseldorf'
4597
     * 'Düsseldorf'                   => 'Düsseldorf'
4598
     * 'D%C3%BCsseldorf'               => 'Düsseldorf'
4599
     * 'D%C3%83%C2%BCsseldorf'         => 'Düsseldorf'
4600
     * 'D%25C3%2583%25C2%25BCsseldorf' => 'Düsseldorf'
4601
     *
4602
     * @param string $str          <p>The input string.</p>
4603
     * @param bool   $multi_decode <p>Decode as often as possible.</p>
4604
     *
4605
     * @return string
4606
     */
4607 6
    public static function rawurldecode(string $str, bool $multi_decode = true): string
4608
    {
4609 6
        if ($str === '') {
4610 4
            return '';
4611
        }
4612
4613
        if (
4614 6
            \strpos($str, '&') === false
4615
            &&
4616 6
            \strpos($str, '%') === false
4617
            &&
4618 6
            \strpos($str, '+') === false
4619
            &&
4620 6
            \strpos($str, '\u') === false
4621
        ) {
4622 4
            return self::fix_simple_utf8($str);
4623
        }
4624
4625 6
        $str = self::urldecode_unicode_helper($str);
4626
4627
        do {
4628 6
            $str_compare = $str;
4629
4630
            /**
4631
             * @psalm-suppress PossiblyInvalidArgument
4632
             */
4633 6
            $str = self::fix_simple_utf8(
4634 6
                \rawurldecode(
4635 6
                    self::html_entity_decode(
4636 6
                        self::to_utf8($str),
4637 6
                        \ENT_QUOTES | \ENT_HTML5
4638
                    )
4639
                )
4640
            );
4641 6
        } while ($multi_decode === true && $str_compare !== $str);
4642
4643 6
        return $str;
4644
    }
4645
4646
    /**
4647
     * Replaces all occurrences of $pattern in $str by $replacement.
4648
     *
4649
     * @param string $str         <p>The input string.</p>
4650
     * @param string $pattern     <p>The regular expression pattern.</p>
4651
     * @param string $replacement <p>The string to replace with.</p>
4652
     * @param string $options     [optional] <p>Matching conditions to be used.</p>
4653
     * @param string $delimiter   [optional] <p>Delimiter the the regex. Default: '/'</p>
4654
     *
4655
     * @return string
4656
     */
4657 18
    public static function regex_replace(
4658
        string $str,
4659
        string $pattern,
4660
        string $replacement,
4661
        string $options = '',
4662
        string $delimiter = '/'
4663
    ): string {
4664 18
        if ($options === 'msr') {
4665 9
            $options = 'ms';
4666
        }
4667
4668
        // fallback
4669 18
        if (!$delimiter) {
4670
            $delimiter = '/';
4671
        }
4672
4673 18
        return (string) \preg_replace(
4674 18
            $delimiter . $pattern . $delimiter . 'u' . $options,
4675 18
            $replacement,
4676 18
            $str
4677
        );
4678
    }
4679
4680
    /**
4681
     * alias for "UTF8::remove_bom()"
4682
     *
4683
     * @param string $str
4684
     *
4685
     * @return string
4686
     *
4687
     * @see UTF8::remove_bom()
4688
     * @deprecated <p>use "UTF8::remove_bom()"</p>
4689
     */
4690
    public static function removeBOM(string $str): string
4691
    {
4692
        return self::remove_bom($str);
4693
    }
4694
4695
    /**
4696
     * Remove the BOM from UTF-8 / UTF-16 / UTF-32 strings.
4697
     *
4698
     * @param string $str <p>The input string.</p>
4699
     *
4700
     * @return string string without UTF-BOM
4701
     */
4702 82
    public static function remove_bom(string $str): string
4703
    {
4704 82
        if ($str === '') {
4705 9
            return '';
4706
        }
4707
4708 82
        $strLength = \strlen($str);
4709 82
        foreach (self::$BOM as $bomString => $bomByteLength) {
4710 82
            if (\strpos($str, $bomString, 0) === 0) {
4711 10
                $strTmp = \substr($str, $bomByteLength, $strLength);
4712 10
                if ($strTmp === false) {
4713
                    return '';
4714
                }
4715
4716 10
                $strLength -= (int) $bomByteLength;
4717
4718 82
                $str = (string) $strTmp;
4719
            }
4720
        }
4721
4722 82
        return $str;
4723
    }
4724
4725
    /**
4726
     * Removes duplicate occurrences of a string in another string.
4727
     *
4728
     * @param string          $str  <p>The base string.</p>
4729
     * @param string|string[] $what <p>String to search for in the base string.</p>
4730
     *
4731
     * @return string the result string with removed duplicates
4732
     */
4733 2
    public static function remove_duplicates(string $str, $what = ' '): string
4734
    {
4735 2
        if (\is_string($what) === true) {
4736 2
            $what = [$what];
4737
        }
4738
4739 2
        if (\is_array($what) === true) {
0 ignored issues
show
introduced by
The condition is_array($what) === true is always true.
Loading history...
4740
            /** @noinspection ForeachSourceInspection */
4741 2
            foreach ($what as $item) {
4742 2
                $str = (string) \preg_replace('/(' . \preg_quote($item, '/u') . ')+/u', $item, $str);
4743
            }
4744
        }
4745
4746 2
        return $str;
4747
    }
4748
4749
    /**
4750
     * Remove html via "strip_tags()" from the string.
4751
     *
4752
     * @param string $str
4753
     * @param string $allowableTags [optional] <p>You can use the optional second parameter to specify tags which should
4754
     *                              not be stripped. Default: null
4755
     *                              </p>
4756
     *
4757
     * @return string
4758
     */
4759 6
    public static function remove_html(string $str, string $allowableTags = ''): string
4760
    {
4761 6
        return \strip_tags($str, $allowableTags);
4762
    }
4763
4764
    /**
4765
     * Remove all breaks [<br> | \r\n | \r | \n | ...] from the string.
4766
     *
4767
     * @param string $str
4768
     * @param string $replacement [optional] <p>Default is a empty string.</p>
4769
     *
4770
     * @return string
4771
     */
4772 6
    public static function remove_html_breaks(string $str, string $replacement = ''): string
4773
    {
4774 6
        return (string) \preg_replace("#/\r\n|\r|\n|<br.*/?>#isU", $replacement, $str);
4775
    }
4776
4777
    /**
4778
     * Remove invisible characters from a string.
4779
     *
4780
     * e.g.: This prevents sandwiching null characters between ascii characters, like Java\0script.
4781
     *
4782
     * copy&past from https://github.com/bcit-ci/CodeIgniter/blob/develop/system/core/Common.php
4783
     *
4784
     * @param string $str
4785
     * @param bool   $url_encoded
4786
     * @param string $replacement
4787
     *
4788
     * @return string
4789
     */
4790 116
    public static function remove_invisible_characters(string $str, bool $url_encoded = true, string $replacement = ''): string
4791
    {
4792
        // init
4793 116
        $non_displayables = [];
4794
4795
        // every control character except newline (dec 10),
4796
        // carriage return (dec 13) and horizontal tab (dec 09)
4797 116
        if ($url_encoded) {
4798 116
            $non_displayables[] = '/%0[0-8bcefBCEF]/'; // url encoded 00-08, 11, 12, 14, 15
4799 116
            $non_displayables[] = '/%1[0-9a-fA-F]/'; // url encoded 16-31
4800
        }
4801
4802 116
        $non_displayables[] = '/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]+/S'; // 00-08, 11, 12, 14-31, 127
4803
4804
        do {
4805 116
            $str = (string) \preg_replace($non_displayables, $replacement, $str, -1, $count);
4806 116
        } while ($count !== 0);
4807
4808 116
        return $str;
4809
    }
4810
4811
    /**
4812
     * Returns a new string with the prefix $substring removed, if present.
4813
     *
4814
     * @param string $str
4815
     * @param string $substring <p>The prefix to remove.</p>
4816
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
4817
     *
4818
     * @return string string without the prefix $substring
4819
     */
4820 12
    public static function remove_left(string $str, string $substring, string $encoding = 'UTF-8'): string
4821
    {
4822 12
        if ($substring && \strpos($str, $substring) === 0) {
4823 6
            if ($encoding === 'UTF-8') {
4824 4
                return (string) \mb_substr(
4825 4
                    $str,
4826 4
                    (int) \mb_strlen($substring)
4827
                );
4828
            }
4829
4830 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
4831
4832 2
            return (string) self::substr(
4833 2
                $str,
4834 2
                (int) self::strlen($substring, $encoding),
4835 2
                null,
4836 2
                $encoding
4837
            );
4838
        }
4839
4840 6
        return $str;
4841
    }
4842
4843
    /**
4844
     * Returns a new string with the suffix $substring removed, if present.
4845
     *
4846
     * @param string $str
4847
     * @param string $substring <p>The suffix to remove.</p>
4848
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
4849
     *
4850
     * @return string string having a $str without the suffix $substring
4851
     */
4852 12
    public static function remove_right(string $str, string $substring, string $encoding = 'UTF-8'): string
4853
    {
4854 12
        if ($substring && \substr($str, -\strlen($substring)) === $substring) {
4855 6
            if ($encoding === 'UTF-8') {
4856 4
                return (string) \mb_substr(
4857 4
                    $str,
4858 4
                    0,
4859 4
                    (int) \mb_strlen($str) - (int) \mb_strlen($substring)
4860
                );
4861
            }
4862
4863 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
4864
4865 2
            return (string) self::substr(
4866 2
                $str,
4867 2
                0,
4868 2
                (int) self::strlen($str, $encoding) - (int) self::strlen($substring, $encoding),
4869 2
                $encoding
4870
            );
4871
        }
4872
4873 6
        return $str;
4874
    }
4875
4876
    /**
4877
     * Replaces all occurrences of $search in $str by $replacement.
4878
     *
4879
     * @param string $str           <p>The input string.</p>
4880
     * @param string $search        <p>The needle to search for.</p>
4881
     * @param string $replacement   <p>The string to replace with.</p>
4882
     * @param bool   $caseSensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
4883
     *
4884
     * @return string string after the replacements
4885
     */
4886 29
    public static function replace(
4887
        string $str,
4888
        string $search,
4889
        string $replacement,
4890
        bool $caseSensitive = true
4891
    ): string {
4892 29
        if ($caseSensitive) {
4893 22
            return \str_replace($search, $replacement, $str);
4894
        }
4895
4896 7
        return self::str_ireplace($search, $replacement, $str);
4897
    }
4898
4899
    /**
4900
     * Replaces all occurrences of $search in $str by $replacement.
4901
     *
4902
     * @param string       $str           <p>The input string.</p>
4903
     * @param array        $search        <p>The elements to search for.</p>
4904
     * @param array|string $replacement   <p>The string to replace with.</p>
4905
     * @param bool         $caseSensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
4906
     *
4907
     * @return string string after the replacements
4908
     */
4909 30
    public static function replace_all(
4910
        string $str,
4911
        array $search,
4912
        $replacement,
4913
        bool $caseSensitive = true
4914
    ): string {
4915 30
        if ($caseSensitive) {
4916 23
            return \str_replace($search, $replacement, $str);
4917
        }
4918
4919 7
        return self::str_ireplace($search, $replacement, $str);
4920
    }
4921
4922
    /**
4923
     * Replace the diamond question mark (�) and invalid-UTF8 chars with the replacement.
4924
     *
4925
     * @param string $str                <p>The input string</p>
4926
     * @param string $replacementChar    <p>The replacement character.</p>
4927
     * @param bool   $processInvalidUtf8 <p>Convert invalid UTF-8 chars </p>
4928
     *
4929
     * @return string
4930
     */
4931 62
    public static function replace_diamond_question_mark(string $str, string $replacementChar = '', bool $processInvalidUtf8 = true): string
4932
    {
4933 62
        if ($str === '') {
4934 9
            return '';
4935
        }
4936
4937 62
        if ($processInvalidUtf8 === true) {
4938 62
            $replacementCharHelper = $replacementChar;
4939 62
            if ($replacementChar === '') {
4940 62
                $replacementCharHelper = 'none';
4941
            }
4942
4943 62
            if (self::$SUPPORT['mbstring'] === false) {
4944
                // if there is no native support for "mbstring",
4945
                // then we need to clean the string before ...
4946
                $str = self::clean($str);
4947
            }
4948
4949 62
            $save = \mb_substitute_character();
4950 62
            \mb_substitute_character($replacementCharHelper);
4951
            // the polyfill maybe return false, so cast to string
4952 62
            $str = (string) \mb_convert_encoding($str, 'UTF-8', 'UTF-8');
4953 62
            \mb_substitute_character($save);
4954
        }
4955
4956 62
        return \str_replace(
4957
            [
4958 62
                "\xEF\xBF\xBD",
4959
                '�',
4960
            ],
4961
            [
4962 62
                $replacementChar,
4963 62
                $replacementChar,
4964
            ],
4965 62
            $str
4966
        );
4967
    }
4968
4969
    /**
4970
     * Strip whitespace or other characters from end of a UTF-8 string.
4971
     *
4972
     * @param string      $str   <p>The string to be trimmed.</p>
4973
     * @param string|null $chars <p>Optional characters to be stripped.</p>
4974
     *
4975
     * @return string the string with unwanted characters stripped from the right
4976
     */
4977 20
    public static function rtrim(string $str = '', string $chars = null): string
4978
    {
4979 20
        if ($str === '') {
4980 3
            return '';
4981
        }
4982
4983 19
        if ($chars) {
4984 8
            $chars = \preg_quote($chars, '/');
4985 8
            $pattern = "[${chars}]+$";
4986
        } else {
4987 14
            $pattern = '[\\s]+$';
4988
        }
4989
4990 19
        if (self::$SUPPORT['mbstring'] === true) {
4991
            /** @noinspection PhpComposerExtensionStubsInspection */
4992 19
            return (string) \mb_ereg_replace($pattern, '', $str);
4993
        }
4994
4995
        return self::regex_replace($str, $pattern, '', '', '/');
4996
    }
4997
4998
    /**
4999
     * WARNING: Print native UTF-8 support (libs), e.g. for debugging.
5000
     *
5001
     * @psalm-suppress MissingReturnType
5002
     */
5003 2
    public static function showSupport()
5004
    {
5005 2
        echo '<pre>';
5006 2
        foreach (self::$SUPPORT as $key => &$value) {
5007 2
            echo $key . ' - ' . \print_r($value, true) . "\n<br>";
5008
        }
5009 2
        unset($value);
5010 2
        echo '</pre>';
5011 2
    }
5012
5013
    /**
5014
     * Converts a UTF-8 character to HTML Numbered Entity like "&#123;".
5015
     *
5016
     * @param string $char           <p>The Unicode character to be encoded as numbered entity.</p>
5017
     * @param bool   $keepAsciiChars <p>Set to <strong>true</strong> to keep ASCII chars.</>
5018
     * @param string $encoding       [optional] <p>Set the charset for e.g. "mb_" function</p>
5019
     *
5020
     * @return string the HTML numbered entity
5021
     */
5022 2
    public static function single_chr_html_encode(string $char, bool $keepAsciiChars = false, string $encoding = 'UTF-8'): string
5023
    {
5024 2
        if ($char === '') {
5025 2
            return '';
5026
        }
5027
5028
        if (
5029 2
            $keepAsciiChars === true
5030
            &&
5031 2
            self::is_ascii($char) === true
5032
        ) {
5033 2
            return $char;
5034
        }
5035
5036 2
        return '&#' . self::ord($char, $encoding) . ';';
5037
    }
5038
5039
    /**
5040
     * @param string $str
5041
     * @param int    $tabLength
5042
     *
5043
     * @return string
5044
     */
5045 5
    public static function spaces_to_tabs(string $str, int $tabLength = 4): string
5046
    {
5047 5
        if ($tabLength === 4) {
5048 3
            $tab = '    ';
5049 2
        } elseif ($tabLength === 2) {
5050 1
            $tab = '  ';
5051
        } else {
5052 1
            $tab = \str_repeat(' ', $tabLength);
5053
        }
5054
5055 5
        return \str_replace($tab, "\t", $str);
5056
    }
5057
5058
    /**
5059
     * alias for "UTF8::str_split()"
5060
     *
5061
     * @param string|string[] $str
5062
     * @param int             $length
5063
     * @param bool            $cleanUtf8
5064
     *
5065
     * @return string[]
5066
     *
5067
     * @see UTF8::str_split()
5068
     */
5069 9
    public static function split(
5070
        $str,
5071
        int $length = 1,
5072
        bool $cleanUtf8 = false
5073
    ): array {
5074 9
        return self::str_split($str, $length, $cleanUtf8);
5075
    }
5076
5077
    /**
5078
     * alias for "UTF8::str_starts_with()"
5079
     *
5080
     * @param string $haystack
5081
     * @param string $needle
5082
     *
5083
     * @return bool
5084
     *
5085
     * @see UTF8::str_starts_with()
5086
     */
5087
    public static function str_begins(string $haystack, string $needle): bool
5088
    {
5089
        return self::str_starts_with($haystack, $needle);
5090
    }
5091
5092
    /**
5093
     * Returns a camelCase version of the string. Trims surrounding spaces,
5094
     * capitalizes letters following digits, spaces, dashes and underscores,
5095
     * and removes spaces, dashes, as well as underscores.
5096
     *
5097
     * @param string      $str                   <p>The input string.</p>
5098
     * @param string      $encoding              [optional] <p>Default: 'UTF-8'</p>
5099
     * @param bool        $cleanUtf8             [optional] <p>Remove non UTF-8 chars from the string.</p>
5100
     * @param string|null $lang                  [optional] <p>Set the language for special cases: az, el, lt, tr</p>
5101
     * @param bool        $tryToKeepStringLength [optional] <p>true === try to keep the string length: e.g. ẞ -> ß</p>
5102
     *
5103
     * @return string
5104
     */
5105 32
    public static function str_camelize(
5106
        string $str,
5107
        string $encoding = 'UTF-8',
5108
        bool $cleanUtf8 = false,
5109
        string $lang = null,
5110
        bool $tryToKeepStringLength = false
5111
    ): string {
5112 32
        if ($cleanUtf8 === true) {
5113
            $str = self::clean($str);
5114
        }
5115
5116 32
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
5117 26
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
5118
        }
5119
5120 32
        $str = self::lcfirst(
5121 32
            \trim($str),
5122 32
            $encoding,
5123 32
            false,
5124 32
            $lang,
5125 32
            $tryToKeepStringLength
5126
        );
5127 32
        $str = (string) \preg_replace('/^[-_]+/', '', $str);
5128
5129 32
        $useMbFunction = $lang === null && $tryToKeepStringLength === false;
5130
5131 32
        $str = (string) \preg_replace_callback(
5132 32
            '/[-_\\s]+(.)?/u',
5133
            /**
5134
             * @param array $match
5135
             *
5136
             * @return string
5137
             */
5138
            static function (array $match) use ($useMbFunction, $encoding, $lang, $tryToKeepStringLength): string {
5139 27
                if (isset($match[1])) {
5140 27
                    if ($useMbFunction === true) {
5141 27
                        if ($encoding === 'UTF-8') {
5142 27
                            return \mb_strtoupper($match[1]);
5143
                        }
5144
5145
                        return \mb_strtoupper($match[1], $encoding);
5146
                    }
5147
5148
                    return self::strtoupper($match[1], $encoding, false, $lang, $tryToKeepStringLength);
5149
                }
5150
5151 1
                return '';
5152 32
            },
5153 32
            $str
5154
        );
5155
5156 32
        return (string) \preg_replace_callback(
5157 32
            '/[\\p{N}]+(.)?/u',
5158
            /**
5159
             * @param array $match
5160
             *
5161
             * @return string
5162
             */
5163
            static function (array $match) use ($useMbFunction, $encoding, $cleanUtf8, $lang, $tryToKeepStringLength): string {
5164 6
                if ($useMbFunction === true) {
5165 6
                    if ($encoding === 'UTF-8') {
5166 6
                        return \mb_strtoupper($match[0]);
5167
                    }
5168
5169
                    return \mb_strtoupper($match[0], $encoding);
5170
                }
5171
5172
                return self::strtoupper($match[0], $encoding, $cleanUtf8, $lang, $tryToKeepStringLength);
5173 32
            },
5174 32
            $str
5175
        );
5176
    }
5177
5178
    /**
5179
     * Returns the string with the first letter of each word capitalized,
5180
     * except for when the word is a name which shouldn't be capitalized.
5181
     *
5182
     * @param string $str
5183
     *
5184
     * @return string string with $str capitalized
5185
     */
5186 1
    public static function str_capitalize_name(string $str): string
5187
    {
5188 1
        return self::str_capitalize_name_helper(
5189 1
            self::str_capitalize_name_helper(
5190 1
                self::collapse_whitespace($str),
5191 1
                ' '
5192
            ),
5193 1
            '-'
5194
        );
5195
    }
5196
5197
    /**
5198
     * Returns true if the string contains $needle, false otherwise. By default
5199
     * the comparison is case-sensitive, but can be made insensitive by setting
5200
     * $caseSensitive to false.
5201
     *
5202
     * @param string $haystack      <p>The input string.</p>
5203
     * @param string $needle        <p>Substring to look for.</p>
5204
     * @param bool   $caseSensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
5205
     *
5206
     * @return bool whether or not $haystack contains $needle
5207
     */
5208 21
    public static function str_contains(
5209
        string $haystack,
5210
        string $needle,
5211
        bool $caseSensitive = true
5212
    ): bool {
5213 21
        if ($caseSensitive) {
5214 11
            return \strpos($haystack, $needle) !== false;
5215
        }
5216
5217 10
        return \mb_stripos($haystack, $needle) !== false;
5218
    }
5219
5220
    /**
5221
     * Returns true if the string contains all $needles, false otherwise. By
5222
     * default the comparison is case-sensitive, but can be made insensitive by
5223
     * setting $caseSensitive to false.
5224
     *
5225
     * @param string $haystack      <p>The input string.</p>
5226
     * @param array  $needles       <p>SubStrings to look for.</p>
5227
     * @param bool   $caseSensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
5228
     *
5229
     * @return bool whether or not $haystack contains $needle
5230
     */
5231 44
    public static function str_contains_all(
5232
        string $haystack,
5233
        array $needles,
5234
        bool $caseSensitive = true
5235
    ): bool {
5236 44
        if ($haystack === '' || $needles === []) {
5237 1
            return false;
5238
        }
5239
5240
        /** @noinspection LoopWhichDoesNotLoopInspection */
5241 43
        foreach ($needles as &$needle) {
5242 43
            if (!$needle) {
5243 1
                return false;
5244
            }
5245
5246 42
            if ($caseSensitive) {
5247 22
                return \strpos($haystack, $needle) !== false;
5248
            }
5249
5250 20
            return \mb_stripos($haystack, $needle) !== false;
5251
        }
5252
5253
        return true;
5254
    }
5255
5256
    /**
5257
     * Returns true if the string contains any $needles, false otherwise. By
5258
     * default the comparison is case-sensitive, but can be made insensitive by
5259
     * setting $caseSensitive to false.
5260
     *
5261
     * @param string $haystack      <p>The input string.</p>
5262
     * @param array  $needles       <p>SubStrings to look for.</p>
5263
     * @param bool   $caseSensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
5264
     *
5265
     * @return bool
5266
     *              Whether or not $str contains $needle
5267
     */
5268 46
    public static function str_contains_any(
5269
        string $haystack,
5270
        array $needles,
5271
        bool $caseSensitive = true
5272
    ): bool {
5273 46
        if ($haystack === '' || $needles === []) {
5274 1
            return false;
5275
        }
5276
5277
        /** @noinspection LoopWhichDoesNotLoopInspection */
5278 45
        foreach ($needles as &$needle) {
5279 45
            if (!$needle) {
5280
                continue;
5281
            }
5282
5283 45
            if ($caseSensitive) {
5284 25
                if (\strpos($haystack, $needle) !== false) {
5285 14
                    return true;
5286
                }
5287
5288 13
                continue;
5289
            }
5290
5291 20
            if (\mb_stripos($haystack, $needle) !== false) {
5292 20
                return true;
5293
            }
5294
        }
5295
5296 19
        return false;
5297
    }
5298
5299
    /**
5300
     * Returns a lowercase and trimmed string separated by dashes. Dashes are
5301
     * inserted before uppercase characters (with the exception of the first
5302
     * character of the string), and in place of spaces as well as underscores.
5303
     *
5304
     * @param string $str      <p>The input string.</p>
5305
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
5306
     *
5307
     * @return string
5308
     */
5309 19
    public static function str_dasherize(string $str, string $encoding = 'UTF-8'): string
5310
    {
5311 19
        return self::str_delimit($str, '-', $encoding);
5312
    }
5313
5314
    /**
5315
     * Returns a lowercase and trimmed string separated by the given delimiter.
5316
     * Delimiters are inserted before uppercase characters (with the exception
5317
     * of the first character of the string), and in place of spaces, dashes,
5318
     * and underscores. Alpha delimiters are not converted to lowercase.
5319
     *
5320
     * @param string      $str                   <p>The input string.</p>
5321
     * @param string      $delimiter             <p>Sequence used to separate parts of the string.</p>
5322
     * @param string      $encoding              [optional] <p>Set the charset for e.g. "mb_" function</p>
5323
     * @param bool        $cleanUtf8             [optional] <p>Remove non UTF-8 chars from the string.</p>
5324
     * @param string|null $lang                  [optional] <p>Set the language for special cases: az, el, lt,
5325
     *                                           tr</p>
5326
     * @param bool        $tryToKeepStringLength [optional] <p>true === try to keep the string length: e.g. ẞ ->
5327
     *                                           ß</p>
5328
     *
5329
     * @return string
5330
     */
5331 49
    public static function str_delimit(
5332
        string $str,
5333
        string $delimiter,
5334
        string $encoding = 'UTF-8',
5335
        bool $cleanUtf8 = false,
5336
        string $lang = null,
5337
        bool $tryToKeepStringLength = false
5338
    ): string {
5339 49
        if (self::$SUPPORT['mbstring'] === true) {
5340
            /** @noinspection PhpComposerExtensionStubsInspection */
5341 49
            $str = (string) \mb_ereg_replace('\\B(\\p{Lu})', '-\1', \trim($str));
5342
5343 49
            $useMbFunction = $lang === null && $tryToKeepStringLength === false;
5344 49
            if ($useMbFunction === true && $encoding === 'UTF-8') {
5345 22
                $str = \mb_strtolower($str);
5346
            } else {
5347 27
                $str = self::strtolower($str, $encoding, $cleanUtf8, $lang, $tryToKeepStringLength);
5348
            }
5349
5350
            /** @noinspection PhpComposerExtensionStubsInspection */
5351 49
            return (string) \mb_ereg_replace('[\\-_\\s]+', $delimiter, $str);
5352
        }
5353
5354
        $str = (string) \preg_replace('/\\B(\\p{Lu})/u', '-\1', \trim($str));
5355
5356
        $useMbFunction = $lang === null && $tryToKeepStringLength === false;
5357
        if ($useMbFunction === true && $encoding === 'UTF-8') {
5358
            $str = \mb_strtolower($str);
5359
        } else {
5360
            $str = self::strtolower($str, $encoding, $cleanUtf8, $lang, $tryToKeepStringLength);
5361
        }
5362
5363
        return (string) \preg_replace('/[\\-_\\s]+/u', $delimiter, $str);
5364
    }
5365
5366
    /**
5367
     * Optimized "mb_detect_encoding()"-function -> with support for UTF-16 and UTF-32.
5368
     *
5369
     * @param string $str <p>The input string.</p>
5370
     *
5371
     * @return false|string
5372
     *                      The detected string-encoding e.g. UTF-8 or UTF-16BE,<br>
5373
     *                      otherwise it will return false e.g. for BINARY or not detected encoding.
5374
     */
5375 30
    public static function str_detect_encoding($str)
5376
    {
5377
        // init
5378 30
        $str = (string) $str;
5379
5380
        //
5381
        // 1.) check binary strings (010001001...) like UTF-16 / UTF-32 / PDF / Images / ...
5382
        //
5383
5384 30
        if (self::is_binary($str, true) === true) {
5385 11
            $isUtf16 = self::is_utf16($str, false);
5386 11
            if ($isUtf16 === 1) {
5387 2
                return 'UTF-16LE';
5388
            }
5389 11
            if ($isUtf16 === 2) {
5390 2
                return 'UTF-16BE';
5391
            }
5392
5393 9
            $isUtf32 = self::is_utf32($str, false);
5394 9
            if ($isUtf32 === 1) {
5395
                return 'UTF-32LE';
5396
            }
5397 9
            if ($isUtf32 === 2) {
5398
                return 'UTF-32BE';
5399
            }
5400
5401
            // is binary but not "UTF-16" or "UTF-32"
5402 9
            return false;
5403
        }
5404
5405
        //
5406
        // 2.) simple check for ASCII chars
5407
        //
5408
5409 26
        if (self::is_ascii($str) === true) {
5410 10
            return 'ASCII';
5411
        }
5412
5413
        //
5414
        // 3.) simple check for UTF-8 chars
5415
        //
5416
5417 26
        if (self::is_utf8($str) === true) {
5418 19
            return 'UTF-8';
5419
        }
5420
5421
        //
5422
        // 4.) check via "mb_detect_encoding()"
5423
        //
5424
        // INFO: UTF-16, UTF-32, UCS2 and UCS4, encoding detection will fail always with "mb_detect_encoding()"
5425
5426 15
        if (self::$SUPPORT['mbstring'] === true) {
5427
            // info: do not use the symfony polyfill here
5428 15
            $encoding = \mb_detect_encoding($str, \mb_detect_order(), true);
5429 15
            if ($encoding) {
5430
                return $encoding;
5431
            }
5432
        }
5433
5434
        //
5435
        // 5.) check via "iconv()"
5436
        //
5437
5438 15
        if (self::$ENCODINGS === null) {
5439
            self::$ENCODINGS = self::getData('encodings');
5440
        }
5441
5442 15
        foreach (self::$ENCODINGS as $encodingTmp) {
5443
            // INFO: //IGNORE but still throw notice
5444
            /** @noinspection PhpUsageOfSilenceOperatorInspection */
5445 15
            if ((string) @\iconv($encodingTmp, $encodingTmp . '//IGNORE', $str) === $str) {
5446 15
                return $encodingTmp;
5447
            }
5448
        }
5449
5450
        return false;
5451
    }
5452
5453
    /**
5454
     * alias for "UTF8::str_ends_with()"
5455
     *
5456
     * @param string $haystack
5457
     * @param string $needle
5458
     *
5459
     * @return bool
5460
     *
5461
     * @see UTF8::str_ends_with()
5462
     */
5463
    public static function str_ends(string $haystack, string $needle): bool
5464
    {
5465
        return self::str_ends_with($haystack, $needle);
5466
    }
5467
5468
    /**
5469
     * Check if the string ends with the given substring.
5470
     *
5471
     * @param string $haystack <p>The string to search in.</p>
5472
     * @param string $needle   <p>The substring to search for.</p>
5473
     *
5474
     * @return bool
5475
     */
5476 9
    public static function str_ends_with(string $haystack, string $needle): bool
5477
    {
5478 9
        if ($needle === '') {
5479 2
            return true;
5480
        }
5481
5482 9
        if ($haystack === '') {
5483
            return false;
5484
        }
5485
5486 9
        return \substr($haystack, -\strlen($needle)) === $needle;
5487
    }
5488
5489
    /**
5490
     * Returns true if the string ends with any of $substrings, false otherwise.
5491
     *
5492
     * - case-sensitive
5493
     *
5494
     * @param string   $str        <p>The input string.</p>
5495
     * @param string[] $substrings <p>Substrings to look for.</p>
5496
     *
5497
     * @return bool whether or not $str ends with $substring
5498
     */
5499 7
    public static function str_ends_with_any(string $str, array $substrings): bool
5500
    {
5501 7
        if ($substrings === []) {
5502
            return false;
5503
        }
5504
5505 7
        foreach ($substrings as &$substring) {
5506 7
            if (\substr($str, -\strlen($substring)) === $substring) {
5507 7
                return true;
5508
            }
5509
        }
5510
5511 6
        return false;
5512
    }
5513
5514
    /**
5515
     * Ensures that the string begins with $substring. If it doesn't, it's
5516
     * prepended.
5517
     *
5518
     * @param string $str       <p>The input string.</p>
5519
     * @param string $substring <p>The substring to add if not present.</p>
5520
     *
5521
     * @return string
5522
     */
5523 10
    public static function str_ensure_left(string $str, string $substring): string
5524
    {
5525
        if (
5526 10
            $substring !== ''
5527
            &&
5528 10
            \strpos($str, $substring) === 0
5529
        ) {
5530 6
            return $str;
5531
        }
5532
5533 4
        return $substring . $str;
5534
    }
5535
5536
    /**
5537
     * Ensures that the string ends with $substring. If it doesn't, it's appended.
5538
     *
5539
     * @param string $str       <p>The input string.</p>
5540
     * @param string $substring <p>The substring to add if not present.</p>
5541
     *
5542
     * @return string
5543
     */
5544 10
    public static function str_ensure_right(string $str, string $substring): string
5545
    {
5546
        if (
5547 10
            $str === ''
5548
            ||
5549 10
            $substring === ''
5550
            ||
5551 10
            \substr($str, -\strlen($substring)) !== $substring
5552
        ) {
5553 4
            $str .= $substring;
5554
        }
5555
5556 10
        return $str;
5557
    }
5558
5559
    /**
5560
     * Capitalizes the first word of the string, replaces underscores with
5561
     * spaces, and strips '_id'.
5562
     *
5563
     * @param string $str
5564
     *
5565
     * @return string
5566
     */
5567 3
    public static function str_humanize($str): string
5568
    {
5569 3
        $str = \str_replace(
5570
            [
5571 3
                '_id',
5572
                '_',
5573
            ],
5574
            [
5575 3
                '',
5576
                ' ',
5577
            ],
5578 3
            $str
5579
        );
5580
5581 3
        return self::ucfirst(\trim($str));
5582
    }
5583
5584
    /**
5585
     * alias for "UTF8::str_istarts_with()"
5586
     *
5587
     * @param string $haystack
5588
     * @param string $needle
5589
     *
5590
     * @return bool
5591
     *
5592
     * @see UTF8::str_istarts_with()
5593
     */
5594
    public static function str_ibegins(string $haystack, string $needle): bool
5595
    {
5596
        return self::str_istarts_with($haystack, $needle);
5597
    }
5598
5599
    /**
5600
     * alias for "UTF8::str_iends_with()"
5601
     *
5602
     * @param string $haystack
5603
     * @param string $needle
5604
     *
5605
     * @return bool
5606
     *
5607
     * @see UTF8::str_iends_with()
5608
     */
5609
    public static function str_iends(string $haystack, string $needle): bool
5610
    {
5611
        return self::str_iends_with($haystack, $needle);
5612
    }
5613
5614
    /**
5615
     * Check if the string ends with the given substring, case insensitive.
5616
     *
5617
     * @param string $haystack <p>The string to search in.</p>
5618
     * @param string $needle   <p>The substring to search for.</p>
5619
     *
5620
     * @return bool
5621
     */
5622 12
    public static function str_iends_with(string $haystack, string $needle): bool
5623
    {
5624 12
        if ($needle === '') {
5625 2
            return true;
5626
        }
5627
5628 12
        if ($haystack === '') {
5629
            return false;
5630
        }
5631
5632 12
        return self::strcasecmp(\substr($haystack, -\strlen($needle)), $needle) === 0;
5633
    }
5634
5635
    /**
5636
     * Returns true if the string ends with any of $substrings, false otherwise.
5637
     *
5638
     * - case-insensitive
5639
     *
5640
     * @param string   $str        <p>The input string.</p>
5641
     * @param string[] $substrings <p>Substrings to look for.</p>
5642
     *
5643
     * @return bool whether or not $str ends with $substring
5644
     */
5645 4
    public static function str_iends_with_any(string $str, array $substrings): bool
5646
    {
5647 4
        if ($substrings === []) {
5648
            return false;
5649
        }
5650
5651 4
        foreach ($substrings as &$substring) {
5652 4
            if (self::str_iends_with($str, $substring)) {
5653 4
                return true;
5654
            }
5655
        }
5656
5657
        return false;
5658
    }
5659
5660
    /**
5661
     * Returns the index of the first occurrence of $needle in the string,
5662
     * and false if not found. Accepts an optional offset from which to begin
5663
     * the search.
5664
     *
5665
     * @param string $str      <p>The input string.</p>
5666
     * @param string $needle   <p>Substring to look for.</p>
5667
     * @param int    $offset   [optional] <p>Offset from which to search. Default: 0</p>
5668
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
5669
     *
5670
     * @return false|int
5671
     *                   The occurrence's <strong>index</strong> if found, otherwise <strong>false</strong>
5672
     */
5673 2
    public static function str_iindex_first(
5674
        string $str,
5675
        string $needle,
5676
        int $offset = 0,
5677
        string $encoding = 'UTF-8'
5678
    ) {
5679 2
        return self::stripos(
5680 2
            $str,
5681 2
            $needle,
5682 2
            $offset,
5683 2
            $encoding
5684
        );
5685
    }
5686
5687
    /**
5688
     * Returns the index of the last occurrence of $needle in the string,
5689
     * and false if not found. Accepts an optional offset from which to begin
5690
     * the search. Offsets may be negative to count from the last character
5691
     * in the string.
5692
     *
5693
     * @param string $str      <p>The input string.</p>
5694
     * @param string $needle   <p>Substring to look for.</p>
5695
     * @param int    $offset   [optional] <p>Offset from which to search. Default: 0</p>
5696
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
5697
     *
5698
     * @return false|int
5699
     *                   The last occurrence's <strong>index</strong> if found, otherwise <strong>false</strong>
5700
     */
5701
    public static function str_iindex_last(
5702
        string $str,
5703
        string $needle,
5704
        int $offset = 0,
5705
        string $encoding = 'UTF-8'
5706
    ) {
5707
        return self::strripos(
5708
            $str,
5709
            $needle,
5710
            $offset,
5711
            $encoding
5712
        );
5713
    }
5714
5715
    /**
5716
     * Returns the index of the first occurrence of $needle in the string,
5717
     * and false if not found. Accepts an optional offset from which to begin
5718
     * the search.
5719
     *
5720
     * @param string $str      <p>The input string.</p>
5721
     * @param string $needle   <p>Substring to look for.</p>
5722
     * @param int    $offset   [optional] <p>Offset from which to search. Default: 0</p>
5723
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
5724
     *
5725
     * @return false|int
5726
     *                   The occurrence's <strong>index</strong> if found, otherwise <strong>false</strong>
5727
     */
5728 10
    public static function str_index_first(
5729
        string $str,
5730
        string $needle,
5731
        int $offset = 0,
5732
        string $encoding = 'UTF-8'
5733
    ) {
5734 10
        return self::strpos(
5735 10
            $str,
5736 10
            $needle,
5737 10
            $offset,
5738 10
            $encoding
5739
        );
5740
    }
5741
5742
    /**
5743
     * Returns the index of the last occurrence of $needle in the string,
5744
     * and false if not found. Accepts an optional offset from which to begin
5745
     * the search. Offsets may be negative to count from the last character
5746
     * in the string.
5747
     *
5748
     * @param string $str      <p>The input string.</p>
5749
     * @param string $needle   <p>Substring to look for.</p>
5750
     * @param int    $offset   [optional] <p>Offset from which to search. Default: 0</p>
5751
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
5752
     *
5753
     * @return false|int
5754
     *                   The last occurrence's <strong>index</strong> if found, otherwise <strong>false</strong>
5755
     */
5756 10
    public static function str_index_last(
5757
        string $str,
5758
        string $needle,
5759
        int $offset = 0,
5760
        string $encoding = 'UTF-8'
5761
    ) {
5762 10
        return self::strrpos(
5763 10
            $str,
5764 10
            $needle,
5765 10
            $offset,
5766 10
            $encoding
5767
        );
5768
    }
5769
5770
    /**
5771
     * Inserts $substring into the string at the $index provided.
5772
     *
5773
     * @param string $str       <p>The input string.</p>
5774
     * @param string $substring <p>String to be inserted.</p>
5775
     * @param int    $index     <p>The index at which to insert the substring.</p>
5776
     * @param string $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
5777
     *
5778
     * @return string
5779
     */
5780 8
    public static function str_insert(
5781
        string $str,
5782
        string $substring,
5783
        int $index,
5784
        string $encoding = 'UTF-8'
5785
    ): string {
5786 8
        if ($encoding === 'UTF-8') {
5787 4
            $len = (int) \mb_strlen($str);
5788 4
            if ($index > $len) {
5789
                return $str;
5790
            }
5791
5792
            /** @noinspection UnnecessaryCastingInspection */
5793 4
            return (string) \mb_substr($str, 0, $index) .
5794 4
                   $substring .
5795 4
                   (string) \mb_substr($str, $index, $len);
5796
        }
5797
5798 4
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
5799
5800 4
        $len = (int) self::strlen($str, $encoding);
5801 4
        if ($index > $len) {
5802 1
            return $str;
5803
        }
5804
5805 3
        return ((string) self::substr($str, 0, $index, $encoding)) .
5806 3
               $substring .
5807 3
               ((string) self::substr($str, $index, $len, $encoding));
5808
    }
5809
5810
    /**
5811
     * Case-insensitive and UTF-8 safe version of <function>str_replace</function>.
5812
     *
5813
     * @see http://php.net/manual/en/function.str-ireplace.php
5814
     *
5815
     * @param mixed $search  <p>
5816
     *                       Every replacement with search array is
5817
     *                       performed on the result of previous replacement.
5818
     *                       </p>
5819
     * @param mixed $replace <p>
5820
     *                       </p>
5821
     * @param mixed $subject <p>
5822
     *                       If subject is an array, then the search and
5823
     *                       replace is performed with every entry of
5824
     *                       subject, and the return value is an array as
5825
     *                       well.
5826
     *                       </p>
5827
     * @param int   $count   [optional] <p>
5828
     *                       The number of matched and replaced needles will
5829
     *                       be returned in count which is passed by
5830
     *                       reference.
5831
     *                       </p>
5832
     *
5833
     * @return mixed a string or an array of replacements
5834
     */
5835 29
    public static function str_ireplace($search, $replace, $subject, &$count = null)
5836
    {
5837 29
        $search = (array) $search;
5838
5839
        /** @noinspection AlterInForeachInspection */
5840 29
        foreach ($search as &$s) {
5841 29
            $s = (string) $s;
5842 29
            if ($s === '') {
5843 6
                $s = '/^(?<=.)$/';
5844
            } else {
5845 29
                $s = '/' . \preg_quote($s, '/') . '/ui';
5846
            }
5847
        }
5848
5849 29
        $subject = \preg_replace($search, $replace, $subject, -1, $replace);
5850 29
        $count = $replace; // used as reference parameter
5851
5852 29
        return $subject;
5853
    }
5854
5855
    /**
5856
     * Replaces $search from the beginning of string with $replacement.
5857
     *
5858
     * @param string $str         <p>The input string.</p>
5859
     * @param string $search      <p>The string to search for.</p>
5860
     * @param string $replacement <p>The replacement.</p>
5861
     *
5862
     * @return string string after the replacements
5863
     */
5864 17
    public static function str_ireplace_beginning(string $str, string $search, string $replacement): string
5865
    {
5866 17
        if ($str === '') {
5867 4
            if ($replacement === '') {
5868 2
                return '';
5869
            }
5870
5871 2
            if ($search === '') {
5872 2
                return $replacement;
5873
            }
5874
        }
5875
5876 13
        if ($search === '') {
5877 2
            return $str . $replacement;
5878
        }
5879
5880 11
        if (\stripos($str, $search) === 0) {
5881 10
            return $replacement . \substr($str, \strlen($search));
5882
        }
5883
5884 1
        return $str;
5885
    }
5886
5887
    /**
5888
     * Replaces $search from the ending of string with $replacement.
5889
     *
5890
     * @param string $str         <p>The input string.</p>
5891
     * @param string $search      <p>The string to search for.</p>
5892
     * @param string $replacement <p>The replacement.</p>
5893
     *
5894
     * @return string string after the replacements
5895
     */
5896 17
    public static function str_ireplace_ending(string $str, string $search, string $replacement): string
5897
    {
5898 17
        if ($str === '') {
5899 4
            if ($replacement === '') {
5900 2
                return '';
5901
            }
5902
5903 2
            if ($search === '') {
5904 2
                return $replacement;
5905
            }
5906
        }
5907
5908 13
        if ($search === '') {
5909 2
            return $str . $replacement;
5910
        }
5911
5912 11
        if (\stripos($str, $search, \strlen($str) - \strlen($search)) !== false) {
5913 9
            $str = \substr($str, 0, -\strlen($search)) . $replacement;
5914
        }
5915
5916 11
        return $str;
5917
    }
5918
5919
    /**
5920
     * Check if the string starts with the given substring, case insensitive.
5921
     *
5922
     * @param string $haystack <p>The string to search in.</p>
5923
     * @param string $needle   <p>The substring to search for.</p>
5924
     *
5925
     * @return bool
5926
     */
5927 12
    public static function str_istarts_with(string $haystack, string $needle): bool
5928
    {
5929 12
        if ($needle === '') {
5930 2
            return true;
5931
        }
5932
5933 12
        if ($haystack === '') {
5934
            return false;
5935
        }
5936
5937 12
        return self::stripos($haystack, $needle) === 0;
5938
    }
5939
5940
    /**
5941
     * Returns true if the string begins with any of $substrings, false otherwise.
5942
     *
5943
     * - case-insensitive
5944
     *
5945
     * @param string $str        <p>The input string.</p>
5946
     * @param array  $substrings <p>Substrings to look for.</p>
5947
     *
5948
     * @return bool whether or not $str starts with $substring
5949
     */
5950 4
    public static function str_istarts_with_any(string $str, array $substrings): bool
5951
    {
5952 4
        if ($str === '') {
5953
            return false;
5954
        }
5955
5956 4
        if ($substrings === []) {
5957
            return false;
5958
        }
5959
5960 4
        foreach ($substrings as &$substring) {
5961 4
            if (self::str_istarts_with($str, $substring)) {
5962 4
                return true;
5963
            }
5964
        }
5965
5966
        return false;
5967
    }
5968
5969
    /**
5970
     * Gets the substring after the first occurrence of a separator.
5971
     *
5972
     * @param string $str       <p>The input string.</p>
5973
     * @param string $separator <p>The string separator.</p>
5974
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
5975
     *
5976
     * @return string
5977
     */
5978 1
    public static function str_isubstr_after_first_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
5979
    {
5980 1
        if ($separator === '' || $str === '') {
5981 1
            return '';
5982
        }
5983
5984 1
        $offset = self::str_iindex_first($str, $separator);
5985 1
        if ($offset === false) {
5986 1
            return '';
5987
        }
5988
5989 1
        if ($encoding === 'UTF-8') {
5990 1
            return (string) \mb_substr(
5991 1
                $str,
5992 1
                $offset + (int) \mb_strlen($separator)
5993
            );
5994
        }
5995
5996
        return (string) self::substr(
5997
            $str,
5998
            $offset + (int) self::strlen($separator, $encoding),
5999
            null,
6000
            $encoding
6001
        );
6002
    }
6003
6004
    /**
6005
     * Gets the substring after the last occurrence of a separator.
6006
     *
6007
     * @param string $str       <p>The input string.</p>
6008
     * @param string $separator <p>The string separator.</p>
6009
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
6010
     *
6011
     * @return string
6012
     */
6013 1
    public static function str_isubstr_after_last_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
6014
    {
6015 1
        if ($separator === '' || $str === '') {
6016 1
            return '';
6017
        }
6018
6019 1
        $offset = self::strripos($str, $separator);
6020 1
        if ($offset === false) {
6021 1
            return '';
6022
        }
6023
6024 1
        if ($encoding === 'UTF-8') {
6025 1
            return (string) \mb_substr(
6026 1
                $str,
6027 1
                $offset + (int) self::strlen($separator)
6028
            );
6029
        }
6030
6031
        return (string) self::substr(
6032
            $str,
6033
            $offset + (int) self::strlen($separator, $encoding),
6034
            null,
6035
            $encoding
6036
        );
6037
    }
6038
6039
    /**
6040
     * Gets the substring before the first occurrence of a separator.
6041
     *
6042
     * @param string $str       <p>The input string.</p>
6043
     * @param string $separator <p>The string separator.</p>
6044
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
6045
     *
6046
     * @return string
6047
     */
6048 1
    public static function str_isubstr_before_first_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
6049
    {
6050 1
        if ($separator === '' || $str === '') {
6051 1
            return '';
6052
        }
6053
6054 1
        $offset = self::str_iindex_first($str, $separator);
6055 1
        if ($offset === false) {
6056 1
            return '';
6057
        }
6058
6059 1
        if ($encoding === 'UTF-8') {
6060 1
            return (string) \mb_substr($str, 0, $offset);
6061
        }
6062
6063
        return (string) self::substr($str, 0, $offset, $encoding);
6064
    }
6065
6066
    /**
6067
     * Gets the substring before the last occurrence of a separator.
6068
     *
6069
     * @param string $str       <p>The input string.</p>
6070
     * @param string $separator <p>The string separator.</p>
6071
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
6072
     *
6073
     * @return string
6074
     */
6075 1
    public static function str_isubstr_before_last_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
6076
    {
6077 1
        if ($separator === '' || $str === '') {
6078 1
            return '';
6079
        }
6080
6081 1
        if ($encoding === 'UTF-8') {
6082 1
            $offset = \mb_strripos($str, $separator);
6083 1
            if ($offset === false) {
6084 1
                return '';
6085
            }
6086
6087 1
            return (string) \mb_substr($str, 0, $offset);
6088
        }
6089
6090
        $offset = self::strripos($str, $separator, 0, $encoding);
6091
        if ($offset === false) {
6092
            return '';
6093
        }
6094
6095
        return (string) self::substr($str, 0, $offset, $encoding);
6096
    }
6097
6098
    /**
6099
     * Gets the substring after (or before via "$beforeNeedle") the first occurrence of the "$needle".
6100
     *
6101
     * @param string $str          <p>The input string.</p>
6102
     * @param string $needle       <p>The string to look for.</p>
6103
     * @param bool   $beforeNeedle [optional] <p>Default: false</p>
6104
     * @param string $encoding     [optional] <p>Default: 'UTF-8'</p>
6105
     *
6106
     * @return string
6107
     */
6108 2
    public static function str_isubstr_first(
6109
        string $str,
6110
        string $needle,
6111
        bool $beforeNeedle = false,
6112
        string $encoding = 'UTF-8'
6113
    ): string {
6114
        if (
6115 2
            $needle === ''
6116
            ||
6117 2
            $str === ''
6118
        ) {
6119 2
            return '';
6120
        }
6121
6122 2
        $part = self::stristr(
6123 2
            $str,
6124 2
            $needle,
6125 2
            $beforeNeedle,
6126 2
            $encoding
6127
        );
6128 2
        if ($part === false) {
6129 2
            return '';
6130
        }
6131
6132 2
        return $part;
6133
    }
6134
6135
    /**
6136
     * Gets the substring after (or before via "$beforeNeedle") the last occurrence of the "$needle".
6137
     *
6138
     * @param string $str          <p>The input string.</p>
6139
     * @param string $needle       <p>The string to look for.</p>
6140
     * @param bool   $beforeNeedle [optional] <p>Default: false</p>
6141
     * @param string $encoding     [optional] <p>Default: 'UTF-8'</p>
6142
     *
6143
     * @return string
6144
     */
6145 1
    public static function str_isubstr_last(
6146
        string $str,
6147
        string $needle,
6148
        bool $beforeNeedle = false,
6149
        string $encoding = 'UTF-8'
6150
    ): string {
6151
        if (
6152 1
            $needle === ''
6153
            ||
6154 1
            $str === ''
6155
        ) {
6156 1
            return '';
6157
        }
6158
6159 1
        $part = self::strrichr($str, $needle, $beforeNeedle, $encoding);
6160 1
        if ($part === false) {
6161 1
            return '';
6162
        }
6163
6164 1
        return $part;
6165
    }
6166
6167
    /**
6168
     * Returns the last $n characters of the string.
6169
     *
6170
     * @param string $str      <p>The input string.</p>
6171
     * @param int    $n        <p>Number of characters to retrieve from the end.</p>
6172
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6173
     *
6174
     * @return string
6175
     */
6176 12
    public static function str_last_char(string $str, int $n = 1, string $encoding = 'UTF-8'): string
6177
    {
6178 12
        if ($str === '' || $n <= 0) {
6179 4
            return '';
6180
        }
6181
6182 8
        if ($encoding === 'UTF-8') {
6183 4
            return (string) \mb_substr($str, -$n);
6184
        }
6185
6186 4
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
6187
6188 4
        return (string) self::substr($str, -$n, null, $encoding);
6189
    }
6190
6191
    /**
6192
     * Limit the number of characters in a string.
6193
     *
6194
     * @param string $str      <p>The input string.</p>
6195
     * @param int    $length   [optional] <p>Default: 100</p>
6196
     * @param string $strAddOn [optional] <p>Default: …</p>
6197
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6198
     *
6199
     * @return string
6200
     */
6201 2
    public static function str_limit(
6202
        string $str,
6203
        int $length = 100,
6204
        string $strAddOn = '…',
6205
        string $encoding = 'UTF-8'
6206
    ): string {
6207 2
        if ($str === '' || $length <= 0) {
6208 2
            return '';
6209
        }
6210
6211 2
        if ($encoding === 'UTF-8') {
6212 2
            if ((int) \mb_strlen($str) <= $length) {
6213 2
                return $str;
6214
            }
6215
6216
            /** @noinspection UnnecessaryCastingInspection */
6217 2
            return (string) \mb_substr($str, 0, $length - (int) self::strlen($strAddOn)) . $strAddOn;
6218
        }
6219
6220
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
6221
6222
        if ((int) self::strlen($str, $encoding) <= $length) {
6223
            return $str;
6224
        }
6225
6226
        return ((string) self::substr($str, 0, $length - (int) self::strlen($strAddOn), $encoding)) . $strAddOn;
6227
    }
6228
6229
    /**
6230
     * Limit the number of characters in a string, but also after the next word.
6231
     *
6232
     * @param string $str      <p>The input string.</p>
6233
     * @param int    $length   [optional] <p>Default: 100</p>
6234
     * @param string $strAddOn [optional] <p>Default: …</p>
6235
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6236
     *
6237
     * @return string
6238
     */
6239 6
    public static function str_limit_after_word(
6240
        string $str,
6241
        int $length = 100,
6242
        string $strAddOn = '…',
6243
        string $encoding = 'UTF-8'
6244
    ): string {
6245 6
        if ($str === '' || $length <= 0) {
6246 2
            return '';
6247
        }
6248
6249 6
        if ($encoding === 'UTF-8') {
6250
            /** @noinspection UnnecessaryCastingInspection */
6251 2
            if ((int) \mb_strlen($str) <= $length) {
6252 2
                return $str;
6253
            }
6254
6255 2
            if (\mb_substr($str, $length - 1, 1) === ' ') {
6256 2
                return ((string) \mb_substr($str, 0, $length - 1)) . $strAddOn;
6257
            }
6258
6259 2
            $str = \mb_substr($str, 0, $length);
6260
6261 2
            $array = \explode(' ', $str);
6262 2
            \array_pop($array);
6263 2
            $new_str = \implode(' ', $array);
6264
6265 2
            if ($new_str === '') {
6266 2
                return ((string) \mb_substr($str, 0, $length - 1)) . $strAddOn;
6267
            }
6268
        } else {
6269 4
            if ((int) self::strlen($str, $encoding) <= $length) {
6270
                return $str;
6271
            }
6272
6273 4
            if (self::substr($str, $length - 1, 1, $encoding) === ' ') {
6274 3
                return ((string) self::substr($str, 0, $length - 1, $encoding)) . $strAddOn;
6275
            }
6276
6277 1
            $str = self::substr($str, 0, $length, $encoding);
6278 1
            if ($str === false) {
6279
                return '' . $strAddOn;
6280
            }
6281
6282 1
            $array = \explode(' ', $str);
6283 1
            \array_pop($array);
6284 1
            $new_str = \implode(' ', $array);
6285
6286 1
            if ($new_str === '') {
6287
                return ((string) self::substr($str, 0, $length - 1, $encoding)) . $strAddOn;
6288
            }
6289
        }
6290
6291 3
        return $new_str . $strAddOn;
6292
    }
6293
6294
    /**
6295
     * Returns the longest common prefix between the string and $otherStr.
6296
     *
6297
     * @param string $str      <p>The input sting.</p>
6298
     * @param string $otherStr <p>Second string for comparison.</p>
6299
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6300
     *
6301
     * @return string
6302
     */
6303 10
    public static function str_longest_common_prefix(string $str, string $otherStr, string $encoding = 'UTF-8'): string
6304
    {
6305
        // init
6306 10
        $longestCommonPrefix = '';
6307
6308 10
        if ($encoding === 'UTF-8') {
6309 5
            $maxLength = (int) \min(
6310 5
                \mb_strlen($str),
6311 5
                \mb_strlen($otherStr)
6312
            );
6313
6314 5
            for ($i = 0; $i < $maxLength; ++$i) {
6315 4
                $char = \mb_substr($str, $i, 1);
6316
6317
                if (
6318 4
                    $char !== false
6319
                    &&
6320 4
                    $char === \mb_substr($otherStr, $i, 1)
6321
                ) {
6322 3
                    $longestCommonPrefix .= $char;
6323
                } else {
6324 3
                    break;
6325
                }
6326
            }
6327
        } else {
6328 5
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
6329
6330 5
            $maxLength = (int) \min(
6331 5
                self::strlen($str, $encoding),
6332 5
                self::strlen($otherStr, $encoding)
6333
            );
6334
6335 5
            for ($i = 0; $i < $maxLength; ++$i) {
6336 4
                $char = self::substr($str, $i, 1, $encoding);
6337
6338
                if (
6339 4
                    $char !== false
6340
                    &&
6341 4
                    $char === self::substr($otherStr, $i, 1, $encoding)
6342
                ) {
6343 3
                    $longestCommonPrefix .= $char;
6344
                } else {
6345 3
                    break;
6346
                }
6347
            }
6348
        }
6349
6350 10
        return $longestCommonPrefix;
6351
    }
6352
6353
    /**
6354
     * Returns the longest common substring between the string and $otherStr.
6355
     * In the case of ties, it returns that which occurs first.
6356
     *
6357
     * @param string $str
6358
     * @param string $otherStr <p>Second string for comparison.</p>
6359
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6360
     *
6361
     * @return string string with its $str being the longest common substring
6362
     */
6363 11
    public static function str_longest_common_substring(string $str, string $otherStr, string $encoding = 'UTF-8'): string
6364
    {
6365 11
        if ($str === '' || $otherStr === '') {
6366 2
            return '';
6367
        }
6368
6369
        // Uses dynamic programming to solve
6370
        // http://en.wikipedia.org/wiki/Longest_common_substring_problem
6371
6372 9
        if ($encoding === 'UTF-8') {
6373 4
            $strLength = (int) \mb_strlen($str);
6374 4
            $otherLength = (int) \mb_strlen($otherStr);
6375
        } else {
6376 5
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
6377
6378 5
            $strLength = (int) self::strlen($str, $encoding);
6379 5
            $otherLength = (int) self::strlen($otherStr, $encoding);
6380
        }
6381
6382
        // Return if either string is empty
6383 9
        if ($strLength === 0 || $otherLength === 0) {
6384
            return '';
6385
        }
6386
6387 9
        $len = 0;
6388 9
        $end = 0;
6389 9
        $table = \array_fill(
6390 9
            0,
6391 9
            $strLength + 1,
6392 9
            \array_fill(0, $otherLength + 1, 0)
6393
        );
6394
6395 9
        if ($encoding === 'UTF-8') {
6396 9
            for ($i = 1; $i <= $strLength; ++$i) {
6397 9
                for ($j = 1; $j <= $otherLength; ++$j) {
6398 9
                    $strChar = \mb_substr($str, $i - 1, 1);
6399 9
                    $otherChar = \mb_substr($otherStr, $j - 1, 1);
6400
6401 9
                    if ($strChar === $otherChar) {
6402 8
                        $table[$i][$j] = $table[$i - 1][$j - 1] + 1;
6403 8
                        if ($table[$i][$j] > $len) {
6404 8
                            $len = $table[$i][$j];
6405 8
                            $end = $i;
6406
                        }
6407
                    } else {
6408 9
                        $table[$i][$j] = 0;
6409
                    }
6410
                }
6411
            }
6412
        } else {
6413
            for ($i = 1; $i <= $strLength; ++$i) {
6414
                for ($j = 1; $j <= $otherLength; ++$j) {
6415
                    $strChar = self::substr($str, $i - 1, 1, $encoding);
6416
                    $otherChar = self::substr($otherStr, $j - 1, 1, $encoding);
6417
6418
                    if ($strChar === $otherChar) {
6419
                        $table[$i][$j] = $table[$i - 1][$j - 1] + 1;
6420
                        if ($table[$i][$j] > $len) {
6421
                            $len = $table[$i][$j];
6422
                            $end = $i;
6423
                        }
6424
                    } else {
6425
                        $table[$i][$j] = 0;
6426
                    }
6427
                }
6428
            }
6429
        }
6430
6431 9
        if ($encoding === 'UTF-8') {
6432 9
            return (string) \mb_substr($str, $end - $len, $len);
6433
        }
6434
6435
        return (string) self::substr($str, $end - $len, $len, $encoding);
6436
    }
6437
6438
    /**
6439
     * Returns the longest common suffix between the string and $otherStr.
6440
     *
6441
     * @param string $str
6442
     * @param string $otherStr <p>Second string for comparison.</p>
6443
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6444
     *
6445
     * @return string
6446
     */
6447 10
    public static function str_longest_common_suffix(string $str, string $otherStr, string $encoding = 'UTF-8'): string
6448
    {
6449 10
        if ($str === '' || $otherStr === '') {
6450 2
            return '';
6451
        }
6452
6453 8
        if ($encoding === 'UTF-8') {
6454 4
            $maxLength = (int) \min(
6455 4
                \mb_strlen($str, $encoding),
6456 4
                \mb_strlen($otherStr, $encoding)
6457
            );
6458
6459 4
            $longestCommonSuffix = '';
6460 4
            for ($i = 1; $i <= $maxLength; ++$i) {
6461 4
                $char = \mb_substr($str, -$i, 1);
6462
6463
                if (
6464 4
                    $char !== false
6465
                    &&
6466 4
                    $char === \mb_substr($otherStr, -$i, 1)
6467
                ) {
6468 3
                    $longestCommonSuffix = $char . $longestCommonSuffix;
6469
                } else {
6470 3
                    break;
6471
                }
6472
            }
6473
        } else {
6474 4
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
6475
6476 4
            $maxLength = (int) \min(
6477 4
                self::strlen($str, $encoding),
6478 4
                self::strlen($otherStr, $encoding)
6479
            );
6480
6481 4
            $longestCommonSuffix = '';
6482 4
            for ($i = 1; $i <= $maxLength; ++$i) {
6483 4
                $char = self::substr($str, -$i, 1, $encoding);
6484
6485
                if (
6486 4
                    $char !== false
6487
                    &&
6488 4
                    $char === self::substr($otherStr, -$i, 1, $encoding)
6489
                ) {
6490 3
                    $longestCommonSuffix = $char . $longestCommonSuffix;
6491
                } else {
6492 3
                    break;
6493
                }
6494
            }
6495
        }
6496
6497 8
        return $longestCommonSuffix;
6498
    }
6499
6500
    /**
6501
     * Returns true if $str matches the supplied pattern, false otherwise.
6502
     *
6503
     * @param string $str     <p>The input string.</p>
6504
     * @param string $pattern <p>Regex pattern to match against.</p>
6505
     *
6506
     * @return bool whether or not $str matches the pattern
6507
     */
6508
    public static function str_matches_pattern(string $str, string $pattern): bool
6509
    {
6510
        return (bool) \preg_match('/' . $pattern . '/u', $str);
6511
    }
6512
6513
    /**
6514
     * Returns whether or not a character exists at an index. Offsets may be
6515
     * negative to count from the last character in the string. Implements
6516
     * part of the ArrayAccess interface.
6517
     *
6518
     * @param string $str      <p>The input string.</p>
6519
     * @param int    $offset   <p>The index to check.</p>
6520
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6521
     *
6522
     * @return bool whether or not the index exists
6523
     */
6524 6
    public static function str_offset_exists(string $str, int $offset, string $encoding = 'UTF-8'): bool
6525
    {
6526
        // init
6527 6
        $length = (int) self::strlen($str, $encoding);
6528
6529 6
        if ($offset >= 0) {
6530 3
            return $length > $offset;
6531
        }
6532
6533 3
        return $length >= \abs($offset);
6534
    }
6535
6536
    /**
6537
     * Returns the character at the given index. Offsets may be negative to
6538
     * count from the last character in the string. Implements part of the
6539
     * ArrayAccess interface, and throws an OutOfBoundsException if the index
6540
     * does not exist.
6541
     *
6542
     * @param string $str      <p>The input string.</p>
6543
     * @param int    $index    <p>The <strong>index</strong> from which to retrieve the char.</p>
6544
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6545
     *
6546
     * @throws \OutOfBoundsException if the positive or negative offset does not exist
6547
     *
6548
     * @return string the character at the specified index
6549
     */
6550 2
    public static function str_offset_get(string $str, int $index, string $encoding = 'UTF-8'): string
6551
    {
6552
        // init
6553 2
        $length = (int) self::strlen($str);
6554
6555
        if (
6556 2
            ($index >= 0 && $length <= $index)
6557
            ||
6558 2
            $length < \abs($index)
6559
        ) {
6560 1
            throw new \OutOfBoundsException('No character exists at the index');
6561
        }
6562
6563 1
        return self::char_at($str, $index, $encoding);
6564
    }
6565
6566
    /**
6567
     * Pad a UTF-8 string to given length with another string.
6568
     *
6569
     * @param string     $str        <p>The input string.</p>
6570
     * @param int        $pad_length <p>The length of return string.</p>
6571
     * @param string     $pad_string [optional] <p>String to use for padding the input string.</p>
6572
     * @param int|string $pad_type   [optional] <p>
6573
     *                               Can be <strong>STR_PAD_RIGHT</strong> (default), [or string "right"]<br>
6574
     *                               <strong>STR_PAD_LEFT</strong> [or string "left"] or<br>
6575
     *                               <strong>STR_PAD_BOTH</strong> [or string "both"]
6576
     *                               </p>
6577
     * @param string     $encoding   [optional] <p>Default: 'UTF-8'</p>
6578
     *
6579
     * @return string returns the padded string
6580
     */
6581 41
    public static function str_pad(
6582
        string $str,
6583
        int $pad_length,
6584
        string $pad_string = ' ',
6585
        $pad_type = \STR_PAD_RIGHT,
6586
        string $encoding = 'UTF-8'
6587
    ): string {
6588 41
        if ($pad_length === 0 || $pad_string === '') {
6589 1
            return $str;
6590
        }
6591
6592 41
        if ($pad_type !== (int) $pad_type) {
6593 13
            if ($pad_type === 'left') {
6594 3
                $pad_type = \STR_PAD_LEFT;
6595 10
            } elseif ($pad_type === 'right') {
6596 6
                $pad_type = \STR_PAD_RIGHT;
6597 4
            } elseif ($pad_type === 'both') {
6598 3
                $pad_type = \STR_PAD_BOTH;
6599
            } else {
6600 1
                throw new \InvalidArgumentException(
6601 1
                    'Pad expects $padType to be "STR_PAD_*" or ' . "to be one of 'left', 'right' or 'both'"
6602
                );
6603
            }
6604
        }
6605
6606 40
        if ($encoding === 'UTF-8') {
6607 25
            $str_length = (int) \mb_strlen($str);
6608
6609 25
            if ($pad_length >= $str_length) {
6610
                switch ($pad_type) {
6611 25
                    case \STR_PAD_LEFT:
6612 8
                        $ps_length = (int) \mb_strlen($pad_string);
6613
6614 8
                        $diff = ($pad_length - $str_length);
6615
6616 8
                        $pre = (string) \mb_substr(
6617 8
                            \str_repeat($pad_string, (int) \ceil($diff / $ps_length)),
6618 8
                            0,
6619 8
                            $diff
6620
                        );
6621 8
                        $post = '';
6622
6623 8
                        break;
6624
6625 20
                    case \STR_PAD_BOTH:
6626 14
                        $diff = ($pad_length - $str_length);
6627
6628 14
                        $ps_length_left = (int) \floor($diff / 2);
6629
6630 14
                        $ps_length_right = (int) \ceil($diff / 2);
6631
6632 14
                        $pre = (string) \mb_substr(
6633 14
                            \str_repeat($pad_string, $ps_length_left),
6634 14
                            0,
6635 14
                            $ps_length_left
6636
                        );
6637 14
                        $post = (string) \mb_substr(
6638 14
                            \str_repeat($pad_string, $ps_length_right),
6639 14
                            0,
6640 14
                            $ps_length_right
6641
                        );
6642
6643 14
                        break;
6644
6645 9
                    case \STR_PAD_RIGHT:
6646
                    default:
6647 9
                        $ps_length = (int) \mb_strlen($pad_string);
6648
6649 9
                        $diff = ($pad_length - $str_length);
6650
6651 9
                        $post = (string) \mb_substr(
6652 9
                            \str_repeat($pad_string, (int) \ceil($diff / $ps_length)),
6653 9
                            0,
6654 9
                            $diff
6655
                        );
6656 9
                        $pre = '';
6657
                }
6658
6659 25
                return $pre . $str . $post;
6660
            }
6661
6662 3
            return $str;
6663
        }
6664
6665 15
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
6666
6667 15
        $str_length = (int) self::strlen($str, $encoding);
6668
6669 15
        if ($pad_length >= $str_length) {
6670
            switch ($pad_type) {
6671 14
                case \STR_PAD_LEFT:
6672 5
                    $ps_length = (int) self::strlen($pad_string, $encoding);
6673
6674 5
                    $diff = ($pad_length - $str_length);
6675
6676 5
                    $pre = (string) self::substr(
6677 5
                        \str_repeat($pad_string, (int) \ceil($diff / $ps_length)),
6678 5
                        0,
6679 5
                        $diff,
6680 5
                        $encoding
6681
                    );
6682 5
                    $post = '';
6683
6684 5
                    break;
6685
6686 9
                case \STR_PAD_BOTH:
6687 3
                    $diff = ($pad_length - $str_length);
6688
6689 3
                    $ps_length_left = (int) \floor($diff / 2);
6690
6691 3
                    $ps_length_right = (int) \ceil($diff / 2);
6692
6693 3
                    $pre = (string) self::substr(
6694 3
                        \str_repeat($pad_string, $ps_length_left),
6695 3
                        0,
6696 3
                        $ps_length_left,
6697 3
                        $encoding
6698
                    );
6699 3
                    $post = (string) self::substr(
6700 3
                        \str_repeat($pad_string, $ps_length_right),
6701 3
                        0,
6702 3
                        $ps_length_right,
6703 3
                        $encoding
6704
                    );
6705
6706 3
                    break;
6707
6708 6
                case \STR_PAD_RIGHT:
6709
                default:
6710 6
                    $ps_length = (int) self::strlen($pad_string, $encoding);
6711
6712 6
                    $diff = ($pad_length - $str_length);
6713
6714 6
                    $post = (string) self::substr(
6715 6
                        \str_repeat($pad_string, (int) \ceil($diff / $ps_length)),
6716 6
                        0,
6717 6
                        $diff,
6718 6
                        $encoding
6719
                    );
6720 6
                    $pre = '';
6721
            }
6722
6723 14
            return $pre . $str . $post;
6724
        }
6725
6726 1
        return $str;
6727
    }
6728
6729
    /**
6730
     * Returns a new string of a given length such that both sides of the
6731
     * string are padded. Alias for pad() with a $padType of 'both'.
6732
     *
6733
     * @param string $str
6734
     * @param int    $length   <p>Desired string length after padding.</p>
6735
     * @param string $padStr   [optional] <p>String used to pad, defaults to space. Default: ' '</p>
6736
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6737
     *
6738
     * @return string string with padding applied
6739
     */
6740 11
    public static function str_pad_both(
6741
        string $str,
6742
        int $length,
6743
        string $padStr = ' ',
6744
        string $encoding = 'UTF-8'
6745
    ): string {
6746 11
        return self::str_pad($str, $length, $padStr, \STR_PAD_BOTH, $encoding);
6747
    }
6748
6749
    /**
6750
     * Returns a new string of a given length such that the beginning of the
6751
     * string is padded. Alias for pad() with a $padType of 'left'.
6752
     *
6753
     * @param string $str
6754
     * @param int    $length   <p>Desired string length after padding.</p>
6755
     * @param string $padStr   [optional] <p>String used to pad, defaults to space. Default: ' '</p>
6756
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6757
     *
6758
     * @return string string with left padding
6759
     */
6760 7
    public static function str_pad_left(
6761
        string $str,
6762
        int $length,
6763
        string $padStr = ' ',
6764
        string $encoding = 'UTF-8'
6765
    ): string {
6766 7
        return self::str_pad($str, $length, $padStr, \STR_PAD_LEFT, $encoding);
6767
    }
6768
6769
    /**
6770
     * Returns a new string of a given length such that the end of the string
6771
     * is padded. Alias for pad() with a $padType of 'right'.
6772
     *
6773
     * @param string $str
6774
     * @param int    $length   <p>Desired string length after padding.</p>
6775
     * @param string $padStr   [optional] <p>String used to pad, defaults to space. Default: ' '</p>
6776
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6777
     *
6778
     * @return string string with right padding
6779
     */
6780 7
    public static function str_pad_right(
6781
        string $str,
6782
        int $length,
6783
        string $padStr = ' ',
6784
        string $encoding = 'UTF-8'
6785
    ): string {
6786 7
        return self::str_pad($str, $length, $padStr, \STR_PAD_RIGHT, $encoding);
6787
    }
6788
6789
    /**
6790
     * Repeat a string.
6791
     *
6792
     * @param string $str        <p>
6793
     *                           The string to be repeated.
6794
     *                           </p>
6795
     * @param int    $multiplier <p>
6796
     *                           Number of time the input string should be
6797
     *                           repeated.
6798
     *                           </p>
6799
     *                           <p>
6800
     *                           multiplier has to be greater than or equal to 0.
6801
     *                           If the multiplier is set to 0, the function
6802
     *                           will return an empty string.
6803
     *                           </p>
6804
     *
6805
     * @return string the repeated string
6806
     */
6807 9
    public static function str_repeat(string $str, int $multiplier): string
6808
    {
6809 9
        $str = self::filter($str);
6810
6811 9
        return \str_repeat($str, $multiplier);
6812
    }
6813
6814
    /**
6815
     * INFO: This is only a wrapper for "str_replace()"  -> the original functions is already UTF-8 safe.
6816
     *
6817
     * Replace all occurrences of the search string with the replacement string
6818
     *
6819
     * @see http://php.net/manual/en/function.str-replace.php
6820
     *
6821
     * @param mixed $search  <p>
6822
     *                       The value being searched for, otherwise known as the needle.
6823
     *                       An array may be used to designate multiple needles.
6824
     *                       </p>
6825
     * @param mixed $replace <p>
6826
     *                       The replacement value that replaces found search
6827
     *                       values. An array may be used to designate multiple replacements.
6828
     *                       </p>
6829
     * @param mixed $subject <p>
6830
     *                       The string or array being searched and replaced on,
6831
     *                       otherwise known as the haystack.
6832
     *                       </p>
6833
     *                       <p>
6834
     *                       If subject is an array, then the search and
6835
     *                       replace is performed with every entry of
6836
     *                       subject, and the return value is an array as
6837
     *                       well.
6838
     *                       </p>
6839
     * @param int   $count   [optional] If passed, this will hold the number of matched and replaced needles
6840
     *
6841
     * @return mixed this function returns a string or an array with the replaced values
6842
     */
6843 12
    public static function str_replace(
6844
        $search,
6845
        $replace,
6846
        $subject,
6847
        int &$count = null
6848
    ) {
6849
        /**
6850
         * @psalm-suppress PossiblyNullArgument
6851
         */
6852 12
        return \str_replace(
6853 12
            $search,
6854 12
            $replace,
6855 12
            $subject,
6856 12
            $count
6857
        );
6858
    }
6859
6860
    /**
6861
     * Replaces $search from the beginning of string with $replacement.
6862
     *
6863
     * @param string $str         <p>The input string.</p>
6864
     * @param string $search      <p>The string to search for.</p>
6865
     * @param string $replacement <p>The replacement.</p>
6866
     *
6867
     * @return string string after the replacements
6868
     */
6869 17
    public static function str_replace_beginning(string $str, string $search, string $replacement): string
6870
    {
6871 17
        if ($str === '') {
6872 4
            if ($replacement === '') {
6873 2
                return '';
6874
            }
6875
6876 2
            if ($search === '') {
6877 2
                return $replacement;
6878
            }
6879
        }
6880
6881 13
        if ($search === '') {
6882 2
            return $str . $replacement;
6883
        }
6884
6885 11
        if (\strpos($str, $search) === 0) {
6886 9
            return $replacement . \substr($str, \strlen($search));
6887
        }
6888
6889 2
        return $str;
6890
    }
6891
6892
    /**
6893
     * Replaces $search from the ending of string with $replacement.
6894
     *
6895
     * @param string $str         <p>The input string.</p>
6896
     * @param string $search      <p>The string to search for.</p>
6897
     * @param string $replacement <p>The replacement.</p>
6898
     *
6899
     * @return string string after the replacements
6900
     */
6901 17
    public static function str_replace_ending(string $str, string $search, string $replacement): string
6902
    {
6903 17
        if ($str === '') {
6904 4
            if ($replacement === '') {
6905 2
                return '';
6906
            }
6907
6908 2
            if ($search === '') {
6909 2
                return $replacement;
6910
            }
6911
        }
6912
6913 13
        if ($search === '') {
6914 2
            return $str . $replacement;
6915
        }
6916
6917 11
        if (\strpos($str, $search, \strlen($str) - \strlen($search)) !== false) {
6918 8
            $str = \substr($str, 0, -\strlen($search)) . $replacement;
6919
        }
6920
6921 11
        return $str;
6922
    }
6923
6924
    /**
6925
     * Replace the first "$search"-term with the "$replace"-term.
6926
     *
6927
     * @param string $search
6928
     * @param string $replace
6929
     * @param string $subject
6930
     *
6931
     * @return string
6932
     *
6933
     * @psalm-suppress InvalidReturnType
6934
     */
6935 2
    public static function str_replace_first(string $search, string $replace, string $subject): string
6936
    {
6937 2
        $pos = self::strpos($subject, $search);
6938
6939 2
        if ($pos !== false) {
6940
            /**
6941
             * @psalm-suppress InvalidReturnStatement
6942
             */
6943 2
            return self::substr_replace(
0 ignored issues
show
Bug Best Practice introduced by
The expression return self::substr_repl...)self::strlen($search)) could return the type string[] which is incompatible with the type-hinted return string. Consider adding an additional type-check to rule them out.
Loading history...
6944 2
                $subject,
6945 2
                $replace,
6946 2
                $pos,
6947 2
                (int) self::strlen($search)
6948
            );
6949
        }
6950
6951 2
        return $subject;
6952
    }
6953
6954
    /**
6955
     * Replace the last "$search"-term with the "$replace"-term.
6956
     *
6957
     * @param string $search
6958
     * @param string $replace
6959
     * @param string $subject
6960
     *
6961
     * @return string
6962
     *
6963
     * @psalm-suppress InvalidReturnType
6964
     */
6965 2
    public static function str_replace_last(
6966
        string $search,
6967
        string $replace,
6968
        string $subject
6969
    ): string {
6970 2
        $pos = self::strrpos($subject, $search);
6971 2
        if ($pos !== false) {
6972
            /**
6973
             * @psalm-suppress InvalidReturnStatement
6974
             */
6975 2
            return self::substr_replace(
0 ignored issues
show
Bug Best Practice introduced by
The expression return self::substr_repl...)self::strlen($search)) could return the type string[] which is incompatible with the type-hinted return string. Consider adding an additional type-check to rule them out.
Loading history...
6976 2
                $subject,
6977 2
                $replace,
6978 2
                $pos,
6979 2
                (int) self::strlen($search)
6980
            );
6981
        }
6982
6983 2
        return $subject;
6984
    }
6985
6986
    /**
6987
     * Shuffles all the characters in the string.
6988
     *
6989
     * PS: uses random algorithm which is weak for cryptography purposes
6990
     *
6991
     * @param string $str      <p>The input string</p>
6992
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6993
     *
6994
     * @return string the shuffled string
6995
     */
6996 5
    public static function str_shuffle(string $str, string $encoding = 'UTF-8'): string
6997
    {
6998 5
        if ($encoding === 'UTF-8') {
6999 5
            $indexes = \range(0, (int) \mb_strlen($str) - 1);
7000
            /** @noinspection NonSecureShuffleUsageInspection */
7001 5
            \shuffle($indexes);
7002
7003
            // init
7004 5
            $shuffledStr = '';
7005
7006 5
            foreach ($indexes as &$i) {
7007 5
                $tmpSubStr = \mb_substr($str, $i, 1);
7008 5
                if ($tmpSubStr !== false) {
7009 5
                    $shuffledStr .= $tmpSubStr;
7010
                }
7011
            }
7012
        } else {
7013
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
7014
7015
            $indexes = \range(0, (int) self::strlen($str, $encoding) - 1);
7016
            /** @noinspection NonSecureShuffleUsageInspection */
7017
            \shuffle($indexes);
7018
7019
            // init
7020
            $shuffledStr = '';
7021
7022
            foreach ($indexes as &$i) {
7023
                $tmpSubStr = self::substr($str, $i, 1, $encoding);
7024
                if ($tmpSubStr !== false) {
7025
                    $shuffledStr .= $tmpSubStr;
7026
                }
7027
            }
7028
        }
7029
7030 5
        return $shuffledStr;
7031
    }
7032
7033
    /**
7034
     * Returns the substring beginning at $start, and up to, but not including
7035
     * the index specified by $end. If $end is omitted, the function extracts
7036
     * the remaining string. If $end is negative, it is computed from the end
7037
     * of the string.
7038
     *
7039
     * @param string $str
7040
     * @param int    $start    <p>Initial index from which to begin extraction.</p>
7041
     * @param int    $end      [optional] <p>Index at which to end extraction. Default: null</p>
7042
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
7043
     *
7044
     * @return false|string
7045
     *                      <p>The extracted substring.</p><p>If <i>str</i> is shorter than <i>start</i>
7046
     *                      characters long, <b>FALSE</b> will be returned.
7047
     */
7048 18
    public static function str_slice(
7049
        string $str,
7050
        int $start,
7051
        int $end = null,
7052
        string $encoding = 'UTF-8'
7053
    ) {
7054 18
        if ($encoding === 'UTF-8') {
7055 7
            if ($end === null) {
7056 1
                $length = (int) \mb_strlen($str);
7057 6
            } elseif ($end >= 0 && $end <= $start) {
7058 2
                return '';
7059 4
            } elseif ($end < 0) {
7060 1
                $length = (int) \mb_strlen($str) + $end - $start;
7061
            } else {
7062 3
                $length = $end - $start;
7063
            }
7064
7065 5
            return \mb_substr($str, $start, $length);
7066
        }
7067
7068 11
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
7069
7070 11
        if ($end === null) {
7071 5
            $length = (int) self::strlen($str, $encoding);
7072 6
        } elseif ($end >= 0 && $end <= $start) {
7073 2
            return '';
7074 4
        } elseif ($end < 0) {
7075 1
            $length = (int) self::strlen($str, $encoding) + $end - $start;
7076
        } else {
7077 3
            $length = $end - $start;
7078
        }
7079
7080 9
        return self::substr($str, $start, $length, $encoding);
7081
    }
7082
7083
    /**
7084
     * Convert a string to e.g.: "snake_case"
7085
     *
7086
     * @param string $str
7087
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
7088
     *
7089
     * @return string string in snake_case
7090
     */
7091 22
    public static function str_snakeize(string $str, string $encoding = 'UTF-8'): string
7092
    {
7093 22
        if ($str === '') {
7094
            return '';
7095
        }
7096
7097 22
        $str = \str_replace(
7098 22
            '-',
7099 22
            '_',
7100 22
            self::normalize_whitespace($str)
7101
        );
7102
7103 22
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
7104 19
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
7105
        }
7106
7107 22
        $str = (string) \preg_replace_callback(
7108 22
            '/([\\p{N}|\\p{Lu}])/u',
7109
            /**
7110
             * @param string[] $matches
7111
             *
7112
             * @return string
7113
             */
7114
            static function (array $matches) use ($encoding): string {
7115 9
                $match = $matches[1];
7116 9
                $matchInt = (int) $match;
7117
7118 9
                if ((string) $matchInt === $match) {
7119 4
                    return '_' . $match . '_';
7120
                }
7121
7122 5
                if ($encoding === 'UTF-8') {
7123 5
                    return '_' . \mb_strtolower($match);
7124
                }
7125
7126
                return '_' . self::strtolower($match, $encoding);
7127 22
            },
7128 22
            $str
7129
        );
7130
7131 22
        $str = (string) \preg_replace(
7132
            [
7133 22
                '/\\s+/u',           // convert spaces to "_"
7134
                '/^\\s+|\\s+$/u', // trim leading & trailing spaces
7135
                '/_+/',                 // remove double "_"
7136
            ],
7137
            [
7138 22
                '_',
7139
                '',
7140
                '_',
7141
            ],
7142 22
            $str
7143
        );
7144
7145 22
        return \trim(\trim($str, '_')); // trim leading & trailing "_" + whitespace
7146
    }
7147
7148
    /**
7149
     * Sort all characters according to code points.
7150
     *
7151
     * @param string $str    <p>A UTF-8 string.</p>
7152
     * @param bool   $unique <p>Sort unique. If <strong>true</strong>, repeated characters are ignored.</p>
7153
     * @param bool   $desc   <p>If <strong>true</strong>, will sort characters in reverse code point order.</p>
7154
     *
7155
     * @return string string of sorted characters
7156
     */
7157 2
    public static function str_sort(string $str, bool $unique = false, bool $desc = false): string
7158
    {
7159 2
        $array = self::codepoints($str);
7160
7161 2
        if ($unique) {
7162 2
            $array = \array_flip(\array_flip($array));
7163
        }
7164
7165 2
        if ($desc) {
7166 2
            \arsort($array);
0 ignored issues
show
Bug introduced by
It seems like $array can also be of type null; however, parameter $array of arsort() does only seem to accept array, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

7166
            \arsort(/** @scrutinizer ignore-type */ $array);
Loading history...
7167
        } else {
7168 2
            \asort($array);
0 ignored issues
show
Bug introduced by
It seems like $array can also be of type null; however, parameter $array of asort() does only seem to accept array, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

7168
            \asort(/** @scrutinizer ignore-type */ $array);
Loading history...
7169
        }
7170
7171 2
        return self::string($array);
7172
    }
7173
7174
    /**
7175
     * Convert a string to an array of Unicode characters.
7176
     *
7177
     * @param int|int[]|string|string[] $str                <p>The string to split into array.</p>
7178
     * @param int                       $length             [optional] <p>Max character length of each array
7179
     *                                                      element.</p>
7180
     * @param bool                      $cleanUtf8          [optional] <p>Remove non UTF-8 chars from the string.</p>
7181
     * @param bool                      $tryToUseMbFunction [optional] <p>Set to false, if you don't want to use
7182
     *                                                      "mb_substr"</p>
7183
     *
7184
     * @return array
7185
     *               <p>An array containing chunks of the input.</p>
7186
     */
7187 89
    public static function str_split(
7188
        $str,
7189
        int $length = 1,
7190
        bool $cleanUtf8 = false,
7191
        bool $tryToUseMbFunction = true
7192
    ): array {
7193 89
        if ($length <= 0) {
7194 3
            return [];
7195
        }
7196
7197 88
        if (\is_array($str) === true) {
7198 2
            foreach ($str as $k => &$v) {
7199 2
                $v = self::str_split(
7200 2
                    $v,
7201 2
                    $length,
7202 2
                    $cleanUtf8,
7203 2
                    $tryToUseMbFunction
7204
                );
7205
            }
7206
7207 2
            return $str;
7208
        }
7209
7210
        // init
7211 88
        $str = (string) $str;
7212
7213 88
        if ($str === '') {
7214 13
            return [];
7215
        }
7216
7217 85
        if ($cleanUtf8 === true) {
7218 19
            $str = self::clean($str);
7219
        }
7220
7221
        if (
7222 85
            $tryToUseMbFunction === true
7223
            &&
7224 85
            self::$SUPPORT['mbstring'] === true
7225
        ) {
7226 81
            $iMax = \mb_strlen($str);
7227 81
            if ($iMax <= 127) {
7228 75
                $ret = [];
7229 75
                for ($i = 0; $i < $iMax; ++$i) {
7230 75
                    $ret[] = \mb_substr($str, $i, 1);
7231
                }
7232
            } else {
7233 15
                $retArray = [];
7234 15
                \preg_match_all('/./us', $str, $retArray);
7235 81
                $ret = $retArray[0] ?? [];
7236
            }
7237 23
        } elseif (self::$SUPPORT['pcre_utf8'] === true) {
7238 17
            $retArray = [];
7239 17
            \preg_match_all('/./us', $str, $retArray);
7240 17
            $ret = $retArray[0] ?? [];
7241
        } else {
7242
7243
            // fallback
7244
7245 8
            $ret = [];
7246 8
            $len = \strlen($str);
7247
7248
            /** @noinspection ForeachInvariantsInspection */
7249 8
            for ($i = 0; $i < $len; ++$i) {
7250 8
                if (($str[$i] & "\x80") === "\x00") {
7251 8
                    $ret[] = $str[$i];
7252
                } elseif (
7253 8
                    isset($str[$i + 1])
7254
                    &&
7255 8
                    ($str[$i] & "\xE0") === "\xC0"
7256
                ) {
7257 4
                    if (($str[$i + 1] & "\xC0") === "\x80") {
7258 4
                        $ret[] = $str[$i] . $str[$i + 1];
7259
7260 4
                        ++$i;
7261
                    }
7262
                } elseif (
7263 6
                    isset($str[$i + 2])
7264
                    &&
7265 6
                    ($str[$i] & "\xF0") === "\xE0"
7266
                ) {
7267
                    if (
7268 6
                        ($str[$i + 1] & "\xC0") === "\x80"
7269
                        &&
7270 6
                        ($str[$i + 2] & "\xC0") === "\x80"
7271
                    ) {
7272 6
                        $ret[] = $str[$i] . $str[$i + 1] . $str[$i + 2];
7273
7274 6
                        $i += 2;
7275
                    }
7276
                } elseif (
7277
                    isset($str[$i + 3])
7278
                    &&
7279
                    ($str[$i] & "\xF8") === "\xF0"
7280
                ) {
7281
                    if (
7282
                        ($str[$i + 1] & "\xC0") === "\x80"
7283
                        &&
7284
                        ($str[$i + 2] & "\xC0") === "\x80"
7285
                        &&
7286
                        ($str[$i + 3] & "\xC0") === "\x80"
7287
                    ) {
7288
                        $ret[] = $str[$i] . $str[$i + 1] . $str[$i + 2] . $str[$i + 3];
7289
7290
                        $i += 3;
7291
                    }
7292
                }
7293
            }
7294
        }
7295
7296 85
        if ($length > 1) {
7297 11
            $ret = \array_chunk($ret, $length);
7298
7299 11
            return \array_map(
7300
                static function (array &$item): string {
7301 11
                    return \implode('', $item);
7302 11
                },
7303 11
                $ret
7304
            );
7305
        }
7306
7307 78
        if (isset($ret[0]) && $ret[0] === '') {
7308
            return [];
7309
        }
7310
7311 78
        return $ret;
7312
    }
7313
7314
    /**
7315
     * Splits the string with the provided regular expression, returning an
7316
     * array of Stringy objects. An optional integer $limit will truncate the
7317
     * results.
7318
     *
7319
     * @param string $str
7320
     * @param string $pattern <p>The regex with which to split the string.</p>
7321
     * @param int    $limit   [optional] <p>Maximum number of results to return. Default: -1 === no limit</p>
7322
     *
7323
     * @return string[] an array of strings
7324
     */
7325 16
    public static function str_split_pattern(string $str, string $pattern, int $limit = -1): array
7326
    {
7327 16
        if ($limit === 0) {
7328 2
            return [];
7329
        }
7330
7331 14
        if ($pattern === '') {
7332 1
            return [$str];
7333
        }
7334
7335 13
        if (self::$SUPPORT['mbstring'] === true) {
7336 13
            if ($limit >= 0) {
7337
                /** @noinspection PhpComposerExtensionStubsInspection */
7338 8
                $resultTmp = \mb_split($pattern, $str);
7339
7340 8
                $result = [];
7341 8
                foreach ($resultTmp as $itemTmp) {
7342 8
                    if ($limit === 0) {
7343 4
                        break;
7344
                    }
7345 8
                    --$limit;
7346
7347 8
                    $result[] = $itemTmp;
7348
                }
7349
7350 8
                return $result;
7351
            }
7352
7353
            /** @noinspection PhpComposerExtensionStubsInspection */
7354 5
            return \mb_split($pattern, $str);
7355
        }
7356
7357
        if ($limit > 0) {
7358
            ++$limit;
7359
        } else {
7360
            $limit = -1;
7361
        }
7362
7363
        $array = \preg_split('/' . \preg_quote($pattern, '/') . '/u', $str, $limit);
7364
7365
        if ($array === false) {
7366
            return [];
7367
        }
7368
7369
        if ($limit > 0 && \count($array) === $limit) {
7370
            \array_pop($array);
7371
        }
7372
7373
        return $array;
7374
    }
7375
7376
    /**
7377
     * Check if the string starts with the given substring.
7378
     *
7379
     * @param string $haystack <p>The string to search in.</p>
7380
     * @param string $needle   <p>The substring to search for.</p>
7381
     *
7382
     * @return bool
7383
     */
7384 19
    public static function str_starts_with(string $haystack, string $needle): bool
7385
    {
7386 19
        if ($needle === '') {
7387 2
            return true;
7388
        }
7389
7390 19
        if ($haystack === '') {
7391
            return false;
7392
        }
7393
7394 19
        return \strpos($haystack, $needle) === 0;
7395
    }
7396
7397
    /**
7398
     * Returns true if the string begins with any of $substrings, false otherwise.
7399
     *
7400
     * - case-sensitive
7401
     *
7402
     * @param string $str        <p>The input string.</p>
7403
     * @param array  $substrings <p>Substrings to look for.</p>
7404
     *
7405
     * @return bool whether or not $str starts with $substring
7406
     */
7407 8
    public static function str_starts_with_any(string $str, array $substrings): bool
7408
    {
7409 8
        if ($str === '') {
7410
            return false;
7411
        }
7412
7413 8
        if ($substrings === []) {
7414
            return false;
7415
        }
7416
7417 8
        foreach ($substrings as &$substring) {
7418 8
            if (self::str_starts_with($str, $substring)) {
7419 8
                return true;
7420
            }
7421
        }
7422
7423 6
        return false;
7424
    }
7425
7426
    /**
7427
     * Gets the substring after the first occurrence of a separator.
7428
     *
7429
     * @param string $str       <p>The input string.</p>
7430
     * @param string $separator <p>The string separator.</p>
7431
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
7432
     *
7433
     * @return string
7434
     */
7435 1
    public static function str_substr_after_first_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
7436
    {
7437 1
        if ($separator === '' || $str === '') {
7438 1
            return '';
7439
        }
7440
7441 1
        if ($encoding === 'UTF-8') {
7442 1
            $offset = \mb_strpos($str, $separator);
7443 1
            if ($offset === false) {
7444 1
                return '';
7445
            }
7446
7447 1
            return (string) \mb_substr(
7448 1
                $str,
7449 1
                $offset + (int) \mb_strlen($separator)
7450
            );
7451
        }
7452
7453
        $offset = self::strpos($str, $separator, 0, $encoding);
7454
        if ($offset === false) {
7455
            return '';
7456
        }
7457
7458
        return (string) \mb_substr(
7459
            $str,
7460
            $offset + (int) self::strlen($separator, $encoding),
7461
            null,
7462
            $encoding
7463
        );
7464
    }
7465
7466
    /**
7467
     * Gets the substring after the last occurrence of a separator.
7468
     *
7469
     * @param string $str       <p>The input string.</p>
7470
     * @param string $separator <p>The string separator.</p>
7471
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
7472
     *
7473
     * @return string
7474
     */
7475 1
    public static function str_substr_after_last_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
7476
    {
7477 1
        if ($separator === '' || $str === '') {
7478 1
            return '';
7479
        }
7480
7481 1
        if ($encoding === 'UTF-8') {
7482 1
            $offset = \mb_strrpos($str, $separator);
7483 1
            if ($offset === false) {
7484 1
                return '';
7485
            }
7486
7487 1
            return (string) \mb_substr(
7488 1
                $str,
7489 1
                $offset + (int) \mb_strlen($separator)
7490
            );
7491
        }
7492
7493
        $offset = self::strrpos($str, $separator, 0, $encoding);
7494
        if ($offset === false) {
7495
            return '';
7496
        }
7497
7498
        return (string) self::substr(
7499
            $str,
7500
            $offset + (int) self::strlen($separator, $encoding),
7501
            null,
7502
            $encoding
7503
        );
7504
    }
7505
7506
    /**
7507
     * Gets the substring before the first occurrence of a separator.
7508
     *
7509
     * @param string $str       <p>The input string.</p>
7510
     * @param string $separator <p>The string separator.</p>
7511
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
7512
     *
7513
     * @return string
7514
     */
7515 1
    public static function str_substr_before_first_separator(
7516
        string $str,
7517
        string $separator,
7518
        string $encoding = 'UTF-8'
7519
    ): string {
7520 1
        if ($separator === '' || $str === '') {
7521 1
            return '';
7522
        }
7523
7524 1
        if ($encoding === 'UTF-8') {
7525 1
            $offset = \mb_strpos($str, $separator);
7526 1
            if ($offset === false) {
7527 1
                return '';
7528
            }
7529
7530 1
            return (string) \mb_substr(
7531 1
                $str,
7532 1
                0,
7533 1
                $offset
7534
            );
7535
        }
7536
7537
        $offset = self::strpos($str, $separator, 0, $encoding);
7538
        if ($offset === false) {
7539
            return '';
7540
        }
7541
7542
        return (string) self::substr(
7543
            $str,
7544
            0,
7545
            $offset,
7546
            $encoding
7547
        );
7548
    }
7549
7550
    /**
7551
     * Gets the substring before the last occurrence of a separator.
7552
     *
7553
     * @param string $str       <p>The input string.</p>
7554
     * @param string $separator <p>The string separator.</p>
7555
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
7556
     *
7557
     * @return string
7558
     */
7559 1
    public static function str_substr_before_last_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
7560
    {
7561 1
        if ($separator === '' || $str === '') {
7562 1
            return '';
7563
        }
7564
7565 1
        if ($encoding === 'UTF-8') {
7566 1
            $offset = \mb_strrpos($str, $separator);
7567 1
            if ($offset === false) {
7568 1
                return '';
7569
            }
7570
7571 1
            return (string) \mb_substr(
7572 1
                $str,
7573 1
                0,
7574 1
                $offset
7575
            );
7576
        }
7577
7578
        $offset = self::strrpos($str, $separator, 0, $encoding);
7579
        if ($offset === false) {
7580
            return '';
7581
        }
7582
7583
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
7584
7585
        return (string) self::substr(
7586
            $str,
7587
            0,
7588
            $offset,
7589
            $encoding
7590
        );
7591
    }
7592
7593
    /**
7594
     * Gets the substring after (or before via "$beforeNeedle") the first occurrence of the "$needle".
7595
     *
7596
     * @param string $str          <p>The input string.</p>
7597
     * @param string $needle       <p>The string to look for.</p>
7598
     * @param bool   $beforeNeedle [optional] <p>Default: false</p>
7599
     * @param string $encoding     [optional] <p>Default: 'UTF-8'</p>
7600
     *
7601
     * @return string
7602
     */
7603 2
    public static function str_substr_first(
7604
        string $str,
7605
        string $needle,
7606
        bool $beforeNeedle = false,
7607
        string $encoding = 'UTF-8'
7608
    ): string {
7609 2
        if ($str === '' || $needle === '') {
7610 2
            return '';
7611
        }
7612
7613 2
        if ($encoding === 'UTF-8') {
7614 2
            if ($beforeNeedle === true) {
7615 1
                $part = \mb_strstr(
7616 1
                    $str,
7617 1
                    $needle,
7618 1
                    $beforeNeedle
7619
                );
7620
            } else {
7621 1
                $part = \mb_strstr(
7622 1
                    $str,
7623 2
                    $needle
7624
                );
7625
            }
7626
        } else {
7627
            $part = self::strstr(
7628
                $str,
7629
                $needle,
7630
                $beforeNeedle,
7631
                $encoding
7632
            );
7633
        }
7634
7635 2
        return $part === false ? '' : $part;
7636
    }
7637
7638
    /**
7639
     * Gets the substring after (or before via "$beforeNeedle") the last occurrence of the "$needle".
7640
     *
7641
     * @param string $str          <p>The input string.</p>
7642
     * @param string $needle       <p>The string to look for.</p>
7643
     * @param bool   $beforeNeedle [optional] <p>Default: false</p>
7644
     * @param string $encoding     [optional] <p>Default: 'UTF-8'</p>
7645
     *
7646
     * @return string
7647
     */
7648 2
    public static function str_substr_last(
7649
        string $str,
7650
        string $needle,
7651
        bool $beforeNeedle = false,
7652
        string $encoding = 'UTF-8'
7653
    ): string {
7654 2
        if ($str === '' || $needle === '') {
7655 2
            return '';
7656
        }
7657
7658 2
        if ($encoding === 'UTF-8') {
7659 2
            if ($beforeNeedle === true) {
7660 1
                $part = \mb_strrchr(
7661 1
                    $str,
7662 1
                    $needle,
7663 1
                    $beforeNeedle
7664
                );
7665
            } else {
7666 1
                $part = \mb_strrchr(
7667 1
                    $str,
7668 2
                    $needle
7669
                );
7670
            }
7671
        } else {
7672
            $part = self::strrchr(
7673
                $str,
7674
                $needle,
7675
                $beforeNeedle,
7676
                $encoding
7677
            );
7678
        }
7679
7680 2
        return $part === false ? '' : $part;
7681
    }
7682
7683
    /**
7684
     * Surrounds $str with the given substring.
7685
     *
7686
     * @param string $str
7687
     * @param string $substring <p>The substring to add to both sides.</P>
7688
     *
7689
     * @return string string with the substring both prepended and appended
7690
     */
7691 5
    public static function str_surround(string $str, string $substring): string
7692
    {
7693 5
        return $substring . $str . $substring;
7694
    }
7695
7696
    /**
7697
     * Returns a trimmed string with the first letter of each word capitalized.
7698
     * Also accepts an array, $ignore, allowing you to list words not to be
7699
     * capitalized.
7700
     *
7701
     * @param string              $str
7702
     * @param array|string[]|null $ignore                [optional] <p>An array of words not to capitalize or null.
7703
     *                                                   Default: null</p>
7704
     * @param string              $encoding              [optional] <p>Default: 'UTF-8'</p>
7705
     * @param bool                $cleanUtf8             [optional] <p>Remove non UTF-8 chars from the string.</p>
7706
     * @param string|null         $lang                  [optional] <p>Set the language for special cases: az, el, lt,
7707
     *                                                   tr</p>
7708
     * @param bool                $tryToKeepStringLength [optional] <p>true === try to keep the string length: e.g. ẞ ->
7709
     *                                                   ß</p>
7710
     * @param bool                $useTrimFirst          [optional] <p>true === trim the input string, first</p>
7711
     *
7712
     * @return string the titleized string
7713
     */
7714 6
    public static function str_titleize(
7715
        string $str,
7716
        array $ignore = null,
7717
        string $encoding = 'UTF-8',
7718
        bool $cleanUtf8 = false,
7719
        string $lang = null,
7720
        bool $tryToKeepStringLength = false,
7721
        bool $useTrimFirst = true
7722
    ): string {
7723 6
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
7724 5
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
7725
        }
7726
7727 6
        if ($useTrimFirst === true) {
7728 6
            $str = \trim($str);
7729
        }
7730
7731 6
        if ($cleanUtf8 === true) {
7732
            $str = self::clean($str);
7733
        }
7734
7735 6
        $useMbFunction = $lang === null && $tryToKeepStringLength === false;
7736
7737 6
        return (string) \preg_replace_callback(
7738 6
            '/([^\\s]+)/u',
7739
            static function (array $match) use ($tryToKeepStringLength, $lang, $ignore, $useMbFunction, $encoding): string {
7740 6
                if ($ignore !== null && \in_array($match[0], $ignore, true)) {
7741 2
                    return $match[0];
7742
                }
7743
7744 6
                if ($useMbFunction === true) {
7745 6
                    if ($encoding === 'UTF-8') {
7746 6
                        return \mb_strtoupper(\mb_substr($match[0], 0, 1))
7747 6
                               . \mb_strtolower(\mb_substr($match[0], 1));
7748
                    }
7749
7750
                    return \mb_strtoupper(\mb_substr($match[0], 0, 1, $encoding), $encoding)
7751
                           . \mb_strtolower(\mb_substr($match[0], 1, null, $encoding), $encoding);
7752
                }
7753
7754
                return self::ucfirst(
7755
                    self::strtolower(
7756
                        $match[0],
7757
                        $encoding,
7758
                        false,
7759
                        $lang,
7760
                        $tryToKeepStringLength
7761
                    ),
7762
                    $encoding,
7763
                    false,
7764
                    $lang,
7765
                    $tryToKeepStringLength
7766
                );
7767 6
            },
7768 6
            $str
7769
        );
7770
    }
7771
7772
    /**
7773
     * Returns a trimmed string in proper title case.
7774
     *
7775
     * Also accepts an array, $ignore, allowing you to list words not to be
7776
     * capitalized.
7777
     *
7778
     * Adapted from John Gruber's script.
7779
     *
7780
     * @see https://gist.github.com/gruber/9f9e8650d68b13ce4d78
7781
     *
7782
     * @param string $str
7783
     * @param array  $ignore   <p>An array of words not to capitalize.</p>
7784
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
7785
     *
7786
     * @return string the titleized string
7787
     */
7788 35
    public static function str_titleize_for_humans(string $str, array $ignore = [], string $encoding = 'UTF-8'): string
7789
    {
7790 35
        $smallWords = \array_merge(
7791
            [
7792 35
                '(?<!q&)a',
7793
                'an',
7794
                'and',
7795
                'as',
7796
                'at(?!&t)',
7797
                'but',
7798
                'by',
7799
                'en',
7800
                'for',
7801
                'if',
7802
                'in',
7803
                'of',
7804
                'on',
7805
                'or',
7806
                'the',
7807
                'to',
7808
                'v[.]?',
7809
                'via',
7810
                'vs[.]?',
7811
            ],
7812 35
            $ignore
7813
        );
7814
7815 35
        $smallWordsRx = \implode('|', $smallWords);
7816 35
        $apostropheRx = '(?x: [\'’] [[:lower:]]* )?';
7817
7818 35
        $str = \trim($str);
7819
7820 35
        if (self::has_lowercase($str) === false) {
7821 2
            $str = self::strtolower($str, $encoding);
7822
        }
7823
7824
        // the main substitutions
7825 35
        $str = (string) \preg_replace_callback(
7826
            '~\\b (_*) (?:                                                         # 1. Leading underscore and
7827
                        ( (?<=[ ][/\\\\]) [[:alpha:]]+ [-_[:alpha:]/\\\\]+ |              # 2. file path or 
7828 35
                          [-_[:alpha:]]+ [@.:] [-_[:alpha:]@.:/]+ ' . $apostropheRx . ' ) #    URL, domain, or email
7829
                        |
7830 35
                        ( (?i: ' . $smallWordsRx . ' ) ' . $apostropheRx . ' )            # 3. or small word (case-insensitive)
7831
                        |
7832 35
                        ( [[:alpha:]] [[:lower:]\'’()\[\]{}]* ' . $apostropheRx . ' )     # 4. or word w/o internal caps
7833
                        |
7834 35
                        ( [[:alpha:]] [[:alpha:]\'’()\[\]{}]* ' . $apostropheRx . ' )     # 5. or some other word
7835
                      ) (_*) \\b                                                          # 6. With trailing underscore
7836
                    ~ux',
7837
            /**
7838
             * @param string[] $matches
7839
             *
7840
             * @return string
7841
             */
7842
            static function (array $matches) use ($encoding): string {
7843
                // preserve leading underscore
7844 35
                $str = $matches[1];
7845 35
                if ($matches[2]) {
7846
                    // preserve URLs, domains, emails and file paths
7847 5
                    $str .= $matches[2];
7848 35
                } elseif ($matches[3]) {
7849
                    // lower-case small words
7850 25
                    $str .= self::strtolower($matches[3], $encoding);
7851 35
                } elseif ($matches[4]) {
7852
                    // capitalize word w/o internal caps
7853 34
                    $str .= static::str_upper_first($matches[4], $encoding);
7854
                } else {
7855
                    // preserve other kinds of word (iPhone)
7856 7
                    $str .= $matches[5];
7857
                }
7858
                // Preserve trailing underscore
7859 35
                $str .= $matches[6];
7860
7861 35
                return $str;
7862 35
            },
7863 35
            $str
7864
        );
7865
7866
        // Exceptions for small words: capitalize at start of title...
7867 35
        $str = (string) \preg_replace_callback(
7868
            '~(  \\A [[:punct:]]*            # start of title...
7869
                      |  [:.;?!][ ]+                # or of subsentence...
7870
                      |  [ ][\'"“‘(\[][ ]* )        # or of inserted subphrase...
7871 35
                      ( ' . $smallWordsRx . ' ) \\b # ...followed by small word
7872
                     ~uxi',
7873
            /**
7874
             * @param string[] $matches
7875
             *
7876
             * @return string
7877
             */
7878
            static function (array $matches) use ($encoding): string {
7879 11
                return $matches[1] . static::str_upper_first($matches[2], $encoding);
7880 35
            },
7881 35
            $str
7882
        );
7883
7884
        // ...and end of title
7885 35
        $str = (string) \preg_replace_callback(
7886 35
            '~\\b ( ' . $smallWordsRx . ' ) # small word...
7887
                      (?= [[:punct:]]* \Z          # ...at the end of the title...
7888
                      |   [\'"’”)\]] [ ] )         # ...or of an inserted subphrase?
7889
                     ~uxi',
7890
            /**
7891
             * @param string[] $matches
7892
             *
7893
             * @return string
7894
             */
7895
            static function (array $matches) use ($encoding): string {
7896 3
                return static::str_upper_first($matches[1], $encoding);
7897 35
            },
7898 35
            $str
7899
        );
7900
7901
        // Exceptions for small words in hyphenated compound words.
7902
        // e.g. "in-flight" -> In-Flight
7903 35
        $str = (string) \preg_replace_callback(
7904
            '~\\b
7905
                        (?<! -)                   # Negative lookbehind for a hyphen; we do not want to match man-in-the-middle but do want (in-flight)
7906 35
                        ( ' . $smallWordsRx . ' )
7907
                        (?= -[[:alpha:]]+)        # lookahead for "-someword"
7908
                       ~uxi',
7909
            /**
7910
             * @param string[] $matches
7911
             *
7912
             * @return string
7913
             */
7914
            static function (array $matches) use ($encoding): string {
7915
                return static::str_upper_first($matches[1], $encoding);
7916 35
            },
7917 35
            $str
7918
        );
7919
7920
        // e.g. "Stand-in" -> "Stand-In" (Stand is already capped at this point)
7921 35
        $str = (string) \preg_replace_callback(
7922
            '~\\b
7923
                      (?<!…)                    # Negative lookbehind for a hyphen; we do not want to match man-in-the-middle but do want (stand-in)
7924
                      ( [[:alpha:]]+- )         # $1 = first word and hyphen, should already be properly capped
7925 35
                      ( ' . $smallWordsRx . ' ) # ...followed by small word
7926
                      (?!	- )                 # Negative lookahead for another -
7927
                     ~uxi',
7928
            /**
7929
             * @param string[] $matches
7930
             *
7931
             * @return string
7932
             */
7933
            static function (array $matches) use ($encoding): string {
7934
                return $matches[1] . static::str_upper_first($matches[2], $encoding);
7935 35
            },
7936 35
            $str
7937
        );
7938
7939 35
        return $str;
7940
    }
7941
7942
    /**
7943
     * Get a binary representation of a specific string.
7944
     *
7945
     * @param string $str <p>The input string.</p>
7946
     *
7947
     * @return false|string
7948
     *                      <p>false on error</p>
7949
     */
7950 2
    public static function str_to_binary(string $str)
7951
    {
7952 2
        $value = \unpack('H*', $str);
7953 2
        if ($value === false) {
7954
            return false;
7955
        }
7956
7957
        /** @noinspection OffsetOperationsInspection */
7958 2
        return \base_convert($value[1], 16, 2);
7959
    }
7960
7961
    /**
7962
     * @param string   $str
7963
     * @param bool     $removeEmptyValues <p>Remove empty values.</p>
7964
     * @param int|null $removeShortValues <p>The min. string length or null to disable</p>
7965
     *
7966
     * @return string[]
7967
     */
7968 17
    public static function str_to_lines(string $str, bool $removeEmptyValues = false, int $removeShortValues = null): array
7969
    {
7970 17
        if ($str === '') {
7971 1
            return $removeEmptyValues === true ? [] : [''];
7972
        }
7973
7974 16
        if (self::$SUPPORT['mbstring'] === true) {
7975
            /** @noinspection PhpComposerExtensionStubsInspection */
7976 16
            $return = \mb_split("[\r\n]{1,2}", $str);
7977
        } else {
7978
            $return = \preg_split("/[\r\n]{1,2}/u", $str);
7979
        }
7980
7981 16
        if ($return === false) {
7982
            return $removeEmptyValues === true ? [] : [''];
7983
        }
7984
7985
        if (
7986 16
            $removeShortValues === null
7987
            &&
7988 16
            $removeEmptyValues === false
7989
        ) {
7990 16
            return $return;
0 ignored issues
show
Bug Best Practice introduced by
The expression return $return returns an array which contains values of type array which are incompatible with the documented value type string.
Loading history...
7991
        }
7992
7993
        return self::reduce_string_array(
7994
            $return,
7995
            $removeEmptyValues,
7996
            $removeShortValues
7997
        );
7998
    }
7999
8000
    /**
8001
     * Convert a string into an array of words.
8002
     *
8003
     * @param string   $str
8004
     * @param string   $charList          <p>Additional chars for the definition of "words".</p>
8005
     * @param bool     $removeEmptyValues <p>Remove empty values.</p>
8006
     * @param int|null $removeShortValues <p>The min. string length or null to disable</p>
8007
     *
8008
     * @return string[]
8009
     */
8010 13
    public static function str_to_words(
8011
        string $str,
8012
        string $charList = '',
8013
        bool $removeEmptyValues = false,
8014
        int $removeShortValues = null
8015
    ): array {
8016 13
        if ($str === '') {
8017 4
            return $removeEmptyValues === true ? [] : [''];
8018
        }
8019
8020 13
        $charList = self::rxClass($charList, '\pL');
8021
8022 13
        $return = \preg_split("/({$charList}+(?:[\p{Pd}’']{$charList}+)*)/u", $str, -1, \PREG_SPLIT_DELIM_CAPTURE);
8023 13
        if ($return === false) {
8024
            return $removeEmptyValues === true ? [] : [''];
8025
        }
8026
8027
        if (
8028 13
            $removeShortValues === null
8029
            &&
8030 13
            $removeEmptyValues === false
8031
        ) {
8032 13
            return $return;
0 ignored issues
show
Bug Best Practice introduced by
The expression return $return returns an array which contains values of type array which are incompatible with the documented value type string.
Loading history...
8033
        }
8034
8035 2
        $tmpReturn = self::reduce_string_array(
8036 2
            $return,
8037 2
            $removeEmptyValues,
8038 2
            $removeShortValues
8039
        );
8040
8041 2
        foreach ($tmpReturn as &$item) {
8042 2
            $item = (string) $item;
8043
        }
8044
8045 2
        return $tmpReturn;
8046
    }
8047
8048
    /**
8049
     * alias for "UTF8::to_ascii()"
8050
     *
8051
     * @param string $str
8052
     * @param string $unknown
8053
     * @param bool   $strict
8054
     *
8055
     * @return string
8056
     *
8057
     * @see UTF8::to_ascii()
8058
     */
8059 8
    public static function str_transliterate(string $str, string $unknown = '?', bool $strict = false): string
8060
    {
8061 8
        return self::to_ascii($str, $unknown, $strict);
8062
    }
8063
8064
    /**
8065
     * Truncates the string to a given length. If $substring is provided, and
8066
     * truncating occurs, the string is further truncated so that the substring
8067
     * may be appended without exceeding the desired length.
8068
     *
8069
     * @param string $str
8070
     * @param int    $length    <p>Desired length of the truncated string.</p>
8071
     * @param string $substring [optional] <p>The substring to append if it can fit. Default: ''</p>
8072
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
8073
     *
8074
     * @return string string after truncating
8075
     */
8076 22
    public static function str_truncate(
8077
        string $str,
8078
        int $length,
8079
        string $substring = '',
8080
        string $encoding = 'UTF-8'
8081
    ): string {
8082 22
        if ($str === '') {
8083
            return '';
8084
        }
8085
8086 22
        if ($encoding === 'UTF-8') {
8087 10
            if ($length >= (int) \mb_strlen($str)) {
8088 2
                return $str;
8089
            }
8090
8091 8
            if ($substring !== '') {
8092 4
                $length -= (int) \mb_strlen($substring);
8093
8094
                /** @noinspection UnnecessaryCastingInspection */
8095 4
                return (string) \mb_substr($str, 0, $length) . $substring;
8096
            }
8097
8098
            /** @noinspection UnnecessaryCastingInspection */
8099 4
            return (string) \mb_substr($str, 0, $length);
8100
        }
8101
8102 12
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
8103
8104 12
        if ($length >= (int) self::strlen($str, $encoding)) {
8105 2
            return $str;
8106
        }
8107
8108 10
        if ($substring !== '') {
8109 6
            $length -= (int) self::strlen($substring, $encoding);
8110
        }
8111
8112
        return (
8113 10
               (string) self::substr(
8114 10
                   $str,
8115 10
                   0,
8116 10
                   $length,
8117 10
                   $encoding
8118
               )
8119 10
               ) . $substring;
8120
    }
8121
8122
    /**
8123
     * Truncates the string to a given length, while ensuring that it does not
8124
     * split words. If $substring is provided, and truncating occurs, the
8125
     * string is further truncated so that the substring may be appended without
8126
     * exceeding the desired length.
8127
     *
8128
     * @param string $str
8129
     * @param int    $length                          <p>Desired length of the truncated string.</p>
8130
     * @param string $substring                       [optional] <p>The substring to append if it can fit. Default:
8131
     *                                                ''</p>
8132
     * @param string $encoding                        [optional] <p>Default: 'UTF-8'</p>
8133
     * @param bool   $ignoreDoNotSplitWordsForOneWord [optional] <p>Default: false</p>
8134
     *
8135
     * @return string string after truncating
8136
     */
8137 47
    public static function str_truncate_safe(
8138
        string $str,
8139
        int $length,
8140
        string $substring = '',
8141
        string $encoding = 'UTF-8',
8142
        bool $ignoreDoNotSplitWordsForOneWord = false
8143
    ): string {
8144 47
        if ($str === '' || $length <= 0) {
8145 1
            return $substring;
8146
        }
8147
8148 47
        if ($encoding === 'UTF-8') {
8149 21
            if ($length >= (int) \mb_strlen($str)) {
8150 5
                return $str;
8151
            }
8152
8153
            // need to further trim the string so we can append the substring
8154 17
            $length -= (int) \mb_strlen($substring);
8155 17
            if ($length <= 0) {
8156 1
                return $substring;
8157
            }
8158
8159 17
            $truncated = \mb_substr($str, 0, $length);
8160
8161 17
            if ($truncated === false) {
8162
                return '';
8163
            }
8164
8165
            // if the last word was truncated
8166 17
            $strPosSpace = \mb_strpos($str, ' ', $length - 1);
8167 17
            if ($strPosSpace !== $length) {
8168
                // find pos of the last occurrence of a space, get up to that
8169 13
                $lastPos = \mb_strrpos($truncated, ' ', 0);
8170
8171
                if (
8172 13
                    $lastPos !== false
8173
                    ||
8174 13
                    ($strPosSpace !== false && $ignoreDoNotSplitWordsForOneWord === false)
8175
                ) {
8176 17
                    $truncated = (string) \mb_substr($truncated, 0, (int) $lastPos);
8177
                }
8178
            }
8179
        } else {
8180 26
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
8181
8182 26
            if ($length >= (int) self::strlen($str, $encoding)) {
8183 4
                return $str;
8184
            }
8185
8186
            // need to further trim the string so we can append the substring
8187 22
            $length -= (int) self::strlen($substring, $encoding);
8188 22
            if ($length <= 0) {
8189
                return $substring;
8190
            }
8191
8192 22
            $truncated = self::substr($str, 0, $length, $encoding);
8193
8194 22
            if ($truncated === false) {
8195
                return '';
8196
            }
8197
8198
            // if the last word was truncated
8199 22
            $strPosSpace = self::strpos($str, ' ', $length - 1, $encoding);
8200 22
            if ($strPosSpace !== $length) {
8201
                // find pos of the last occurrence of a space, get up to that
8202 12
                $lastPos = self::strrpos($truncated, ' ', 0, $encoding);
8203
8204
                if (
8205 12
                    $lastPos !== false
8206
                    ||
8207 12
                    ($strPosSpace !== false && $ignoreDoNotSplitWordsForOneWord === false)
8208
                ) {
8209 9
                    $truncated = (string) self::substr($truncated, 0, (int) $lastPos, $encoding);
8210
                }
8211
            }
8212
        }
8213
8214 39
        return $truncated . $substring;
8215
    }
8216
8217
    /**
8218
     * Returns a lowercase and trimmed string separated by underscores.
8219
     * Underscores are inserted before uppercase characters (with the exception
8220
     * of the first character of the string), and in place of spaces as well as
8221
     * dashes.
8222
     *
8223
     * @param string $str
8224
     *
8225
     * @return string the underscored string
8226
     */
8227 16
    public static function str_underscored(string $str): string
8228
    {
8229 16
        return self::str_delimit($str, '_');
8230
    }
8231
8232
    /**
8233
     * Returns an UpperCamelCase version of the supplied string. It trims
8234
     * surrounding spaces, capitalizes letters following digits, spaces, dashes
8235
     * and underscores, and removes spaces, dashes, underscores.
8236
     *
8237
     * @param string      $str                   <p>The input string.</p>
8238
     * @param string      $encoding              [optional] <p>Default: 'UTF-8'</p>
8239
     * @param bool        $cleanUtf8             [optional] <p>Remove non UTF-8 chars from the string.</p>
8240
     * @param string|null $lang                  [optional] <p>Set the language for special cases: az, el, lt, tr</p>
8241
     * @param bool        $tryToKeepStringLength [optional] <p>true === try to keep the string length: e.g. ẞ -> ß</p>
8242
     *
8243
     * @return string string in UpperCamelCase
8244
     */
8245 13
    public static function str_upper_camelize(
8246
        string $str,
8247
        string $encoding = 'UTF-8',
8248
        bool $cleanUtf8 = false,
8249
        string $lang = null,
8250
        bool $tryToKeepStringLength = false
8251
    ): string {
8252 13
        return self::ucfirst(self::str_camelize($str, $encoding), $encoding, $cleanUtf8, $lang, $tryToKeepStringLength);
8253
    }
8254
8255
    /**
8256
     * alias for "UTF8::ucfirst()"
8257
     *
8258
     * @param string      $str
8259
     * @param string      $encoding
8260
     * @param bool        $cleanUtf8
8261
     * @param string|null $lang
8262
     * @param bool        $tryToKeepStringLength
8263
     *
8264
     * @return string
8265
     *
8266
     * @see UTF8::ucfirst()
8267
     */
8268 39
    public static function str_upper_first(
8269
        string $str,
8270
        string $encoding = 'UTF-8',
8271
        bool $cleanUtf8 = false,
8272
        string $lang = null,
8273
        bool $tryToKeepStringLength = false
8274
    ): string {
8275 39
        return self::ucfirst($str, $encoding, $cleanUtf8, $lang, $tryToKeepStringLength);
8276
    }
8277
8278
    /**
8279
     * Counts number of words in the UTF-8 string.
8280
     *
8281
     * @param string $str      <p>The input string.</p>
8282
     * @param int    $format   [optional] <p>
8283
     *                         <strong>0</strong> => return a number of words (default)<br>
8284
     *                         <strong>1</strong> => return an array of words<br>
8285
     *                         <strong>2</strong> => return an array of words with word-offset as key
8286
     *                         </p>
8287
     * @param string $charlist [optional] <p>Additional chars that contains to words and do not start a new word.</p>
8288
     *
8289
     * @return int|string[] The number of words in the string
8290
     */
8291 2
    public static function str_word_count(string $str, int $format = 0, string $charlist = '')
8292
    {
8293 2
        $strParts = self::str_to_words($str, $charlist);
8294
8295 2
        $len = \count($strParts);
8296
8297 2
        if ($format === 1) {
8298 2
            $numberOfWords = [];
8299 2
            for ($i = 1; $i < $len; $i += 2) {
8300 2
                $numberOfWords[] = $strParts[$i];
8301
            }
8302 2
        } elseif ($format === 2) {
8303 2
            $numberOfWords = [];
8304 2
            $offset = (int) self::strlen($strParts[0]);
8305 2
            for ($i = 1; $i < $len; $i += 2) {
8306 2
                $numberOfWords[$offset] = $strParts[$i];
8307 2
                $offset += (int) self::strlen($strParts[$i]) + (int) self::strlen($strParts[$i + 1]);
8308
            }
8309
        } else {
8310 2
            $numberOfWords = (int) (($len - 1) / 2);
8311
        }
8312
8313 2
        return $numberOfWords;
8314
    }
8315
8316
    /**
8317
     * Case-insensitive string comparison.
8318
     *
8319
     * INFO: Case-insensitive version of UTF8::strcmp()
8320
     *
8321
     * @param string $str1     <p>The first string.</p>
8322
     * @param string $str2     <p>The second string.</p>
8323
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
8324
     *
8325
     * @return int
8326
     *             <strong>&lt; 0</strong> if str1 is less than str2;<br>
8327
     *             <strong>&gt; 0</strong> if str1 is greater than str2,<br>
8328
     *             <strong>0</strong> if they are equal
8329
     */
8330 23
    public static function strcasecmp(string $str1, string $str2, string $encoding = 'UTF-8'): int
8331
    {
8332 23
        return self::strcmp(
8333 23
            self::strtocasefold($str1, true, false, $encoding, null, false),
8334 23
            self::strtocasefold($str2, true, false, $encoding, null, false)
8335
        );
8336
    }
8337
8338
    /**
8339
     * alias for "UTF8::strstr()"
8340
     *
8341
     * @param string $haystack
8342
     * @param string $needle
8343
     * @param bool   $before_needle
8344
     * @param string $encoding
8345
     * @param bool   $cleanUtf8
8346
     *
8347
     * @return false|string
8348
     *
8349
     * @see UTF8::strstr()
8350
     */
8351 2
    public static function strchr(
8352
        string $haystack,
8353
        string $needle,
8354
        bool $before_needle = false,
8355
        string $encoding = 'UTF-8',
8356
        bool $cleanUtf8 = false
8357
    ) {
8358 2
        return self::strstr($haystack, $needle, $before_needle, $encoding, $cleanUtf8);
8359
    }
8360
8361
    /**
8362
     * Case-sensitive string comparison.
8363
     *
8364
     * @param string $str1 <p>The first string.</p>
8365
     * @param string $str2 <p>The second string.</p>
8366
     *
8367
     * @return int
8368
     *             <strong>&lt; 0</strong> if str1 is less than str2<br>
8369
     *             <strong>&gt; 0</strong> if str1 is greater than str2<br>
8370
     *             <strong>0</strong> if they are equal
8371
     */
8372 29
    public static function strcmp(string $str1, string $str2): int
8373
    {
8374 29
        if ($str1 === $str2) {
8375 21
            return 0;
8376
        }
8377
8378 24
        return \strcmp(
8379 24
            \Normalizer::normalize($str1, \Normalizer::NFD),
8380 24
            \Normalizer::normalize($str2, \Normalizer::NFD)
8381
        );
8382
    }
8383
8384
    /**
8385
     * Find length of initial segment not matching mask.
8386
     *
8387
     * @param string $str
8388
     * @param string $charList
8389
     * @param int    $offset
8390
     * @param int    $length
8391
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
8392
     *
8393
     * @return int
8394
     */
8395 12
    public static function strcspn(
8396
        string $str,
8397
        string $charList,
8398
        int $offset = null,
8399
        int $length = null,
8400
        string $encoding = 'UTF-8'
8401
    ): int {
8402 12
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
8403
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
8404
        }
8405
8406 12
        if ($charList === '') {
8407 2
            return (int) self::strlen($str, $encoding);
8408
        }
8409
8410 11
        if ($offset !== null || $length !== null) {
8411 3
            if ($encoding === 'UTF-8') {
8412 3
                if ($length === null) {
8413
                    /** @noinspection UnnecessaryCastingInspection */
8414 2
                    $strTmp = \mb_substr($str, (int) $offset);
8415
                } else {
8416
                    /** @noinspection UnnecessaryCastingInspection */
8417 3
                    $strTmp = \mb_substr($str, (int) $offset, $length);
8418
                }
8419
            } else {
8420
                /** @noinspection UnnecessaryCastingInspection */
8421
                $strTmp = self::substr($str, (int) $offset, $length, $encoding);
8422
            }
8423
8424 3
            if ($strTmp === false) {
8425
                return 0;
8426
            }
8427
8428 3
            $str = $strTmp;
8429
        }
8430
8431 11
        if ($str === '') {
8432 2
            return 0;
8433
        }
8434
8435 10
        $matches = [];
8436 10
        if (\preg_match('/^(.*?)' . self::rxClass($charList) . '/us', $str, $matches)) {
8437 9
            $return = self::strlen($matches[1], $encoding);
8438 9
            if ($return === false) {
8439
                return 0;
8440
            }
8441
8442 9
            return $return;
8443
        }
8444
8445 2
        return (int) self::strlen($str, $encoding);
8446
    }
8447
8448
    /**
8449
     * alias for "UTF8::stristr()"
8450
     *
8451
     * @param string $haystack
8452
     * @param string $needle
8453
     * @param bool   $before_needle
8454
     * @param string $encoding
8455
     * @param bool   $cleanUtf8
8456
     *
8457
     * @return false|string
8458
     *
8459
     * @see UTF8::stristr()
8460
     */
8461 1
    public static function strichr(
8462
        string $haystack,
8463
        string $needle,
8464
        bool $before_needle = false,
8465
        string $encoding = 'UTF-8',
8466
        bool $cleanUtf8 = false
8467
    ) {
8468 1
        return self::stristr($haystack, $needle, $before_needle, $encoding, $cleanUtf8);
8469
    }
8470
8471
    /**
8472
     * Create a UTF-8 string from code points.
8473
     *
8474
     * INFO: opposite to UTF8::codepoints()
8475
     *
8476
     * @param array $array <p>Integer or Hexadecimal codepoints.</p>
8477
     *
8478
     * @return string UTF-8 encoded string
8479
     */
8480 4
    public static function string(array $array): string
8481
    {
8482 4
        return \implode(
8483 4
            '',
8484 4
            \array_map(
8485
                [
8486 4
                    self::class,
8487
                    'chr',
8488
                ],
8489 4
                $array
8490
            )
8491
        );
8492
    }
8493
8494
    /**
8495
     * Checks if string starts with "BOM" (Byte Order Mark Character) character.
8496
     *
8497
     * @param string $str <p>The input string.</p>
8498
     *
8499
     * @return bool
8500
     *              <strong>true</strong> if the string has BOM at the start,<br>
8501
     *              <strong>false</strong> otherwise
8502
     */
8503 6
    public static function string_has_bom(string $str): bool
8504
    {
8505
        /** @noinspection PhpUnusedLocalVariableInspection */
8506 6
        foreach (self::$BOM as $bomString => &$bomByteLength) {
8507 6
            if (\strpos($str, $bomString) === 0) {
8508 6
                return true;
8509
            }
8510
        }
8511
8512 6
        return false;
8513
    }
8514
8515
    /**
8516
     * Strip HTML and PHP tags from a string + clean invalid UTF-8.
8517
     *
8518
     * @see http://php.net/manual/en/function.strip-tags.php
8519
     *
8520
     * @param string $str            <p>
8521
     *                               The input string.
8522
     *                               </p>
8523
     * @param string $allowable_tags [optional] <p>
8524
     *                               You can use the optional second parameter to specify tags which should
8525
     *                               not be stripped.
8526
     *                               </p>
8527
     *                               <p>
8528
     *                               HTML comments and PHP tags are also stripped. This is hardcoded and
8529
     *                               can not be changed with allowable_tags.
8530
     *                               </p>
8531
     * @param bool   $cleanUtf8      [optional] <p>Remove non UTF-8 chars from the string.</p>
8532
     *
8533
     * @return string the stripped string
8534
     */
8535 4
    public static function strip_tags(string $str, string $allowable_tags = null, bool $cleanUtf8 = false): string
8536
    {
8537 4
        if ($str === '') {
8538 1
            return '';
8539
        }
8540
8541 4
        if ($cleanUtf8 === true) {
8542 2
            $str = self::clean($str);
8543
        }
8544
8545 4
        if ($allowable_tags === null) {
8546 4
            return \strip_tags($str);
8547
        }
8548
8549 2
        return \strip_tags($str, $allowable_tags);
8550
    }
8551
8552
    /**
8553
     * Strip all whitespace characters. This includes tabs and newline
8554
     * characters, as well as multibyte whitespace such as the thin space
8555
     * and ideographic space.
8556
     *
8557
     * @param string $str
8558
     *
8559
     * @return string
8560
     */
8561 36
    public static function strip_whitespace(string $str): string
8562
    {
8563 36
        if ($str === '') {
8564 3
            return '';
8565
        }
8566
8567 33
        return (string) \preg_replace('/[[:space:]]+/u', '', $str);
8568
    }
8569
8570
    /**
8571
     * Finds position of first occurrence of a string within another, case insensitive.
8572
     *
8573
     * @see http://php.net/manual/en/function.mb-stripos.php
8574
     *
8575
     * @param string $haystack  <p>The string from which to get the position of the first occurrence of needle.</p>
8576
     * @param string $needle    <p>The string to find in haystack.</p>
8577
     * @param int    $offset    [optional] <p>The position in haystack to start searching.</p>
8578
     * @param string $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
8579
     * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
8580
     *
8581
     * @return false|int
8582
     *                   Return the <strong>(int)</strong> numeric position of the first occurrence of needle in the
8583
     *                   haystack string,<br> or <strong>false</strong> if needle is not found
8584
     */
8585 24
    public static function stripos(
8586
        string $haystack,
8587
        string $needle,
8588
        int $offset = 0,
8589
        $encoding = 'UTF-8',
8590
        bool $cleanUtf8 = false
8591
    ) {
8592 24
        if ($haystack === '' || $needle === '') {
8593 5
            return false;
8594
        }
8595
8596 23
        if ($cleanUtf8 === true) {
8597
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
8598
            // if invalid characters are found in $haystack before $needle
8599 1
            $haystack = self::clean($haystack);
8600 1
            $needle = self::clean($needle);
8601
        }
8602
8603 23
        if (self::$SUPPORT['mbstring'] === true) {
8604 23
            if ($encoding === 'UTF-8') {
8605 23
                return \mb_stripos($haystack, $needle, $offset);
8606
            }
8607
8608 3
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
8609
8610 3
            return \mb_stripos($haystack, $needle, $offset, $encoding);
8611
        }
8612
8613 2
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
8614
8615
        if (
8616 2
            $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
8617
            &&
8618 2
            $offset >= 0 // grapheme_stripos() can't handle negative offset
8619
            &&
8620 2
            self::$SUPPORT['intl'] === true
8621
        ) {
8622
            $returnTmp = \grapheme_stripos($haystack, $needle, $offset);
8623
            if ($returnTmp !== false) {
8624
                return $returnTmp;
8625
            }
8626
        }
8627
8628
        //
8629
        // fallback for ascii only
8630
        //
8631
8632 2
        if (self::is_ascii($haystack . $needle)) {
8633
            return \stripos($haystack, $needle, $offset);
8634
        }
8635
8636
        //
8637
        // fallback via vanilla php
8638
        //
8639
8640 2
        $haystack = self::strtocasefold($haystack, true, false, $encoding, null, false);
8641 2
        $needle = self::strtocasefold($needle, true, false, $encoding, null, false);
8642
8643 2
        return self::strpos($haystack, $needle, $offset, $encoding);
8644
    }
8645
8646
    /**
8647
     * Returns all of haystack starting from and including the first occurrence of needle to the end.
8648
     *
8649
     * @param string $haystack      <p>The input string. Must be valid UTF-8.</p>
8650
     * @param string $needle        <p>The string to look for. Must be valid UTF-8.</p>
8651
     * @param bool   $before_needle [optional] <p>
8652
     *                              If <b>TRUE</b>, it returns the part of the
8653
     *                              haystack before the first occurrence of the needle (excluding the needle).
8654
     *                              </p>
8655
     * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
8656
     * @param bool   $cleanUtf8     [optional] <p>Remove non UTF-8 chars from the string.</p>
8657
     *
8658
     * @return false|string a sub-string,<br>or <strong>false</strong> if needle is not found
8659
     */
8660 12
    public static function stristr(
8661
        string $haystack,
8662
        string $needle,
8663
        bool $before_needle = false,
8664
        string $encoding = 'UTF-8',
8665
        bool $cleanUtf8 = false
8666
    ) {
8667 12
        if ($haystack === '' || $needle === '') {
8668 3
            return false;
8669
        }
8670
8671 9
        if ($cleanUtf8 === true) {
8672
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
8673
            // if invalid characters are found in $haystack before $needle
8674 1
            $needle = self::clean($needle);
8675 1
            $haystack = self::clean($haystack);
8676
        }
8677
8678 9
        if (!$needle) {
8679
            return $haystack;
8680
        }
8681
8682 9
        if (self::$SUPPORT['mbstring'] === true) {
8683 9
            if ($encoding === 'UTF-8') {
8684 9
                return \mb_stristr($haystack, $needle, $before_needle);
8685
            }
8686
8687 1
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
8688
8689 1
            return \mb_stristr($haystack, $needle, $before_needle, $encoding);
8690
        }
8691
8692
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
8693
8694
        if (
8695
            $encoding !== 'UTF-8'
8696
            &&
8697
            self::$SUPPORT['mbstring'] === false
8698
        ) {
8699
            \trigger_error('UTF8::stristr() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
8700
        }
8701
8702
        if (
8703
            $encoding === 'UTF-8' // INFO: "grapheme_stristr()" can't handle other encodings
8704
            &&
8705
            self::$SUPPORT['intl'] === true
8706
        ) {
8707
            $returnTmp = \grapheme_stristr($haystack, $needle, $before_needle);
8708
            if ($returnTmp !== false) {
8709
                return $returnTmp;
8710
            }
8711
        }
8712
8713
        if (self::is_ascii($needle . $haystack)) {
8714
            return \stristr($haystack, $needle, $before_needle);
8715
        }
8716
8717
        \preg_match('/^(.*?)' . \preg_quote($needle, '/') . '/usi', $haystack, $match);
8718
8719
        if (!isset($match[1])) {
8720
            return false;
8721
        }
8722
8723
        if ($before_needle) {
8724
            return $match[1];
8725
        }
8726
8727
        return self::substr($haystack, (int) self::strlen($match[1], $encoding), null, $encoding);
8728
    }
8729
8730
    /**
8731
     * Get the string length, not the byte-length!
8732
     *
8733
     * @see http://php.net/manual/en/function.mb-strlen.php
8734
     *
8735
     * @param string $str       <p>The string being checked for length.</p>
8736
     * @param string $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
8737
     * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
8738
     *
8739
     * @return false|int
8740
     *                   The number <strong>(int)</strong> of characters in the string $str having character encoding
8741
     *                   $encoding.
8742
     *                   (One multi-byte character counted as +1).
8743
     *                   <br>
8744
     *                   Can return <strong>false</strong>, if e.g. mbstring is not installed and we process invalid
8745
     *                   chars.
8746
     */
8747 173
    public static function strlen(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
8748
    {
8749 173
        if ($str === '') {
8750 21
            return 0;
8751
        }
8752
8753 171
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
8754 12
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
8755
        }
8756
8757 171
        if ($cleanUtf8 === true) {
8758
            // "mb_strlen" and "\iconv_strlen" returns wrong length,
8759
            // if invalid characters are found in $str
8760 4
            $str = self::clean($str);
8761
        }
8762
8763
        //
8764
        // fallback via mbstring
8765
        //
8766
8767 171
        if (self::$SUPPORT['mbstring'] === true) {
8768 165
            if ($encoding === 'UTF-8') {
8769 165
                return \mb_strlen($str);
8770
            }
8771
8772 4
            return \mb_strlen($str, $encoding);
8773
        }
8774
8775
        //
8776
        // fallback for binary || ascii only
8777
        //
8778
8779
        if (
8780 8
            $encoding === 'CP850'
8781
            ||
8782 8
            $encoding === 'ASCII'
8783
        ) {
8784
            return \strlen($str);
8785
        }
8786
8787
        if (
8788 8
            $encoding !== 'UTF-8'
8789
            &&
8790 8
            self::$SUPPORT['mbstring'] === false
8791
            &&
8792 8
            self::$SUPPORT['iconv'] === false
8793
        ) {
8794 2
            \trigger_error('UTF8::strlen() without mbstring / iconv cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
8795
        }
8796
8797
        //
8798
        // fallback via iconv
8799
        //
8800
8801 8
        if (self::$SUPPORT['iconv'] === true) {
8802
            $returnTmp = \iconv_strlen($str, $encoding);
8803
            if ($returnTmp !== false) {
8804
                return $returnTmp;
8805
            }
8806
        }
8807
8808
        //
8809
        // fallback via intl
8810
        //
8811
8812
        if (
8813 8
            $encoding === 'UTF-8' // INFO: "grapheme_strlen()" can't handle other encodings
8814
            &&
8815 8
            self::$SUPPORT['intl'] === true
8816
        ) {
8817
            $returnTmp = \grapheme_strlen($str);
8818
            if ($returnTmp !== null) {
8819
                return $returnTmp;
8820
            }
8821
        }
8822
8823
        //
8824
        // fallback for ascii only
8825
        //
8826
8827 8
        if (self::is_ascii($str)) {
8828 4
            return \strlen($str);
8829
        }
8830
8831
        //
8832
        // fallback via vanilla php
8833
        //
8834
8835 8
        \preg_match_all('/./us', $str, $parts);
8836
8837 8
        $returnTmp = \count($parts[0]);
8838 8
        if ($returnTmp === 0) {
8839
            return false;
8840
        }
8841
8842 8
        return $returnTmp;
8843
    }
8844
8845
    /**
8846
     * Get string length in byte.
8847
     *
8848
     * @param string $str
8849
     *
8850
     * @return int
8851
     */
8852
    public static function strlen_in_byte(string $str): int
8853
    {
8854
        if ($str === '') {
8855
            return 0;
8856
        }
8857
8858
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
8859
            // "mb_" is available if overload is used, so use it ...
8860
            return \mb_strlen($str, 'CP850'); // 8-BIT
8861
        }
8862
8863
        return \strlen($str);
8864
    }
8865
8866
    /**
8867
     * Case insensitive string comparisons using a "natural order" algorithm.
8868
     *
8869
     * INFO: natural order version of UTF8::strcasecmp()
8870
     *
8871
     * @param string $str1     <p>The first string.</p>
8872
     * @param string $str2     <p>The second string.</p>
8873
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
8874
     *
8875
     * @return int
8876
     *             <strong>&lt; 0</strong> if str1 is less than str2<br>
8877
     *             <strong>&gt; 0</strong> if str1 is greater than str2<br>
8878
     *             <strong>0</strong> if they are equal
8879
     */
8880 2
    public static function strnatcasecmp(string $str1, string $str2, string $encoding = 'UTF-8'): int
8881
    {
8882 2
        return self::strnatcmp(
8883 2
            self::strtocasefold($str1, true, false, $encoding, null, false),
8884 2
            self::strtocasefold($str2, true, false, $encoding, null, false)
8885
        );
8886
    }
8887
8888
    /**
8889
     * String comparisons using a "natural order" algorithm
8890
     *
8891
     * INFO: natural order version of UTF8::strcmp()
8892
     *
8893
     * @see http://php.net/manual/en/function.strnatcmp.php
8894
     *
8895
     * @param string $str1 <p>The first string.</p>
8896
     * @param string $str2 <p>The second string.</p>
8897
     *
8898
     * @return int
8899
     *             <strong>&lt; 0</strong> if str1 is less than str2;<br>
8900
     *             <strong>&gt; 0</strong> if str1 is greater than str2;<br>
8901
     *             <strong>0</strong> if they are equal
8902
     */
8903 4
    public static function strnatcmp(string $str1, string $str2): int
8904
    {
8905 4
        if ($str1 === $str2) {
8906 4
            return 0;
8907
        }
8908
8909 4
        return \strnatcmp(
8910 4
            (string) self::strtonatfold($str1),
8911 4
            (string) self::strtonatfold($str2)
8912
        );
8913
    }
8914
8915
    /**
8916
     * Case-insensitive string comparison of the first n characters.
8917
     *
8918
     * @see http://php.net/manual/en/function.strncasecmp.php
8919
     *
8920
     * @param string $str1     <p>The first string.</p>
8921
     * @param string $str2     <p>The second string.</p>
8922
     * @param int    $len      <p>The length of strings to be used in the comparison.</p>
8923
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
8924
     *
8925
     * @return int
8926
     *             <strong>&lt; 0</strong> if <i>str1</i> is less than <i>str2</i>;<br>
8927
     *             <strong>&gt; 0</strong> if <i>str1</i> is greater than <i>str2</i>;<br>
8928
     *             <strong>0</strong> if they are equal
8929
     */
8930 2
    public static function strncasecmp(
8931
        string $str1,
8932
        string $str2,
8933
        int $len,
8934
        string $encoding = 'UTF-8'
8935
    ): int {
8936 2
        return self::strncmp(
8937 2
            self::strtocasefold($str1, true, false, $encoding, null, false),
8938 2
            self::strtocasefold($str2, true, false, $encoding, null, false),
8939 2
            $len
8940
        );
8941
    }
8942
8943
    /**
8944
     * String comparison of the first n characters.
8945
     *
8946
     * @see http://php.net/manual/en/function.strncmp.php
8947
     *
8948
     * @param string $str1     <p>The first string.</p>
8949
     * @param string $str2     <p>The second string.</p>
8950
     * @param int    $len      <p>Number of characters to use in the comparison.</p>
8951
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
8952
     *
8953
     * @return int
8954
     *             <strong>&lt; 0</strong> if <i>str1</i> is less than <i>str2</i>;<br>
8955
     *             <strong>&gt; 0</strong> if <i>str1</i> is greater than <i>str2</i>;<br>
8956
     *             <strong>0</strong> if they are equal
8957
     */
8958 4
    public static function strncmp(
8959
        string $str1,
8960
        string $str2,
8961
        int $len,
8962
        string $encoding = 'UTF-8'
8963
    ): int {
8964 4
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
8965
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
8966
        }
8967
8968 4
        if ($encoding === 'UTF-8') {
8969 4
            $str1 = (string) \mb_substr($str1, 0, $len);
8970 4
            $str2 = (string) \mb_substr($str2, 0, $len);
8971
        } else {
8972
            $str1 = (string) self::substr($str1, 0, $len, $encoding);
8973
            $str2 = (string) self::substr($str2, 0, $len, $encoding);
8974
        }
8975
8976 4
        return self::strcmp($str1, $str2);
8977
    }
8978
8979
    /**
8980
     * Search a string for any of a set of characters.
8981
     *
8982
     * @see http://php.net/manual/en/function.strpbrk.php
8983
     *
8984
     * @param string $haystack  <p>The string where char_list is looked for.</p>
8985
     * @param string $char_list <p>This parameter is case sensitive.</p>
8986
     *
8987
     * @return false|string string starting from the character found, or false if it is not found
8988
     */
8989 2
    public static function strpbrk(string $haystack, string $char_list)
8990
    {
8991 2
        if ($haystack === '' || $char_list === '') {
8992 2
            return false;
8993
        }
8994
8995 2
        if (\preg_match('/' . self::rxClass($char_list) . '/us', $haystack, $m)) {
8996 2
            return \substr($haystack, (int) \strpos($haystack, $m[0]));
8997
        }
8998
8999 2
        return false;
9000
    }
9001
9002
    /**
9003
     * Find position of first occurrence of string in a string.
9004
     *
9005
     * @see http://php.net/manual/en/function.mb-strpos.php
9006
     *
9007
     * @param string     $haystack  <p>The string from which to get the position of the first occurrence of needle.</p>
9008
     * @param int|string $needle    <p>The string to find in haystack.<br>Or a code point as int.</p>
9009
     * @param int        $offset    [optional] <p>The search offset. If it is not specified, 0 is used.</p>
9010
     * @param string     $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
9011
     * @param bool       $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
9012
     *
9013
     * @return false|int
9014
     *                   The <strong>(int)</strong> numeric position of the first occurrence of needle in the haystack
9015
     *                   string.<br> If needle is not found it returns false.
9016
     */
9017 53
    public static function strpos(
9018
        string $haystack,
9019
        $needle,
9020
        int $offset = 0,
9021
        $encoding = 'UTF-8',
9022
        bool $cleanUtf8 = false
9023
    ) {
9024 53
        if ($haystack === '') {
9025 4
            return false;
9026
        }
9027
9028
        // iconv and mbstring do not support integer $needle
9029 52
        if ((int) $needle === $needle) {
9030
            $needle = (string) self::chr($needle);
9031
        }
9032 52
        $needle = (string) $needle;
9033
9034 52
        if ($needle === '') {
9035 2
            return false;
9036
        }
9037
9038 52
        if ($cleanUtf8 === true) {
9039
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
9040
            // if invalid characters are found in $haystack before $needle
9041 3
            $needle = self::clean($needle);
9042 3
            $haystack = self::clean($haystack);
9043
        }
9044
9045 52
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
9046 11
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
9047
        }
9048
9049
        //
9050
        // fallback via mbstring
9051
        //
9052
9053 52
        if (self::$SUPPORT['mbstring'] === true) {
9054 50
            if ($encoding === 'UTF-8') {
9055 50
                return \mb_strpos($haystack, $needle, $offset);
9056
            }
9057
9058 2
            return \mb_strpos($haystack, $needle, $offset, $encoding);
9059
        }
9060
9061
        //
9062
        // fallback for binary || ascii only
9063
        //
9064
        if (
9065 4
            $encoding === 'CP850'
9066
            ||
9067 4
            $encoding === 'ASCII'
9068
        ) {
9069 2
            return \strpos($haystack, $needle, $offset);
9070
        }
9071
9072
        if (
9073 4
            $encoding !== 'UTF-8'
9074
            &&
9075 4
            self::$SUPPORT['iconv'] === false
9076
            &&
9077 4
            self::$SUPPORT['mbstring'] === false
9078
        ) {
9079 2
            \trigger_error('UTF8::strpos() without mbstring / iconv cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
9080
        }
9081
9082
        //
9083
        // fallback via intl
9084
        //
9085
9086
        if (
9087 4
            $encoding === 'UTF-8' // INFO: "grapheme_strpos()" can't handle other encodings
9088
            &&
9089 4
            $offset >= 0 // grapheme_strpos() can't handle negative offset
9090
            &&
9091 4
            self::$SUPPORT['intl'] === true
9092
        ) {
9093
            $returnTmp = \grapheme_strpos($haystack, $needle, $offset);
9094
            if ($returnTmp !== false) {
9095
                return $returnTmp;
9096
            }
9097
        }
9098
9099
        //
9100
        // fallback via iconv
9101
        //
9102
9103
        if (
9104 4
            $offset >= 0 // iconv_strpos() can't handle negative offset
9105
            &&
9106 4
            self::$SUPPORT['iconv'] === true
9107
        ) {
9108
            // ignore invalid negative offset to keep compatibility
9109
            // with php < 5.5.35, < 5.6.21, < 7.0.6
9110
            $returnTmp = \iconv_strpos($haystack, $needle, $offset > 0 ? $offset : 0, $encoding);
9111
            if ($returnTmp !== false) {
9112
                return $returnTmp;
9113
            }
9114
        }
9115
9116
        //
9117
        // fallback for ascii only
9118
        //
9119
9120 4
        if (self::is_ascii($haystack . $needle)) {
9121 2
            return \strpos($haystack, $needle, $offset);
9122
        }
9123
9124
        //
9125
        // fallback via vanilla php
9126
        //
9127
9128 4
        $haystackTmp = self::substr($haystack, $offset, null, $encoding);
9129 4
        if ($haystackTmp === false) {
9130
            $haystackTmp = '';
9131
        }
9132 4
        $haystack = (string) $haystackTmp;
9133
9134 4
        if ($offset < 0) {
9135
            $offset = 0;
9136
        }
9137
9138 4
        $pos = \strpos($haystack, $needle);
9139 4
        if ($pos === false) {
9140 2
            return false;
9141
        }
9142
9143 4
        if ($pos) {
9144 4
            return $offset + (int) self::strlen(\substr($haystack, 0, $pos), $encoding);
9145
        }
9146
9147 2
        return $offset + 0;
9148
    }
9149
9150
    /**
9151
     * Find position of first occurrence of string in a string.
9152
     *
9153
     * @param string $haystack <p>
9154
     *                         The string being checked.
9155
     *                         </p>
9156
     * @param string $needle   <p>
9157
     *                         The position counted from the beginning of haystack.
9158
     *                         </p>
9159
     * @param int    $offset   [optional] <p>
9160
     *                         The search offset. If it is not specified, 0 is used.
9161
     *                         </p>
9162
     *
9163
     * @return false|int The numeric position of the first occurrence of needle in the
9164
     *                   haystack string. If needle is not found, it returns false.
9165
     */
9166
    public static function strpos_in_byte(string $haystack, string $needle, int $offset = 0)
9167
    {
9168
        if ($haystack === '' || $needle === '') {
9169
            return false;
9170
        }
9171
9172
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
9173
            // "mb_" is available if overload is used, so use it ...
9174
            return \mb_strpos($haystack, $needle, $offset, 'CP850'); // 8-BIT
9175
        }
9176
9177
        return \strpos($haystack, $needle, $offset);
9178
    }
9179
9180
    /**
9181
     * Finds the last occurrence of a character in a string within another.
9182
     *
9183
     * @see http://php.net/manual/en/function.mb-strrchr.php
9184
     *
9185
     * @param string $haystack      <p>The string from which to get the last occurrence of needle.</p>
9186
     * @param string $needle        <p>The string to find in haystack</p>
9187
     * @param bool   $before_needle [optional] <p>
9188
     *                              Determines which portion of haystack
9189
     *                              this function returns.
9190
     *                              If set to true, it returns all of haystack
9191
     *                              from the beginning to the last occurrence of needle.
9192
     *                              If set to false, it returns all of haystack
9193
     *                              from the last occurrence of needle to the end,
9194
     *                              </p>
9195
     * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
9196
     * @param bool   $cleanUtf8     [optional] <p>Remove non UTF-8 chars from the string.</p>
9197
     *
9198
     * @return false|string the portion of haystack or false if needle is not found
9199
     */
9200 2
    public static function strrchr(
9201
        string $haystack,
9202
        string $needle,
9203
        bool $before_needle = false,
9204
        string $encoding = 'UTF-8',
9205
        bool $cleanUtf8 = false
9206
    ) {
9207 2
        if ($haystack === '' || $needle === '') {
9208 2
            return false;
9209
        }
9210
9211 2
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
9212 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
9213
        }
9214
9215 2
        if ($cleanUtf8 === true) {
9216
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
9217
            // if invalid characters are found in $haystack before $needle
9218 2
            $needle = self::clean($needle);
9219 2
            $haystack = self::clean($haystack);
9220
        }
9221
9222
        //
9223
        // fallback via mbstring
9224
        //
9225
9226 2
        if (self::$SUPPORT['mbstring'] === true) {
9227 2
            if ($encoding === 'UTF-8') {
9228 2
                return \mb_strrchr($haystack, $needle, $before_needle);
9229
            }
9230
9231 2
            return \mb_strrchr($haystack, $needle, $before_needle, $encoding);
9232
        }
9233
9234
        //
9235
        // fallback for binary || ascii only
9236
        //
9237
9238
        if (
9239
            $before_needle === false
9240
            &&
9241
            (
9242
                $encoding === 'CP850'
9243
                ||
9244
                $encoding === 'ASCII'
9245
            )
9246
        ) {
9247
            return \strrchr($haystack, $needle);
9248
        }
9249
9250
        if (
9251
            $encoding !== 'UTF-8'
9252
            &&
9253
            self::$SUPPORT['mbstring'] === false
9254
        ) {
9255
            \trigger_error('UTF8::strrchr() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
9256
        }
9257
9258
        //
9259
        // fallback via iconv
9260
        //
9261
9262
        if (self::$SUPPORT['iconv'] === true) {
9263
            $needleTmp = self::substr($needle, 0, 1, $encoding);
9264
            if ($needleTmp === false) {
9265
                return false;
9266
            }
9267
            $needle = (string) $needleTmp;
9268
9269
            $pos = \iconv_strrpos($haystack, $needle, $encoding);
9270
            if ($pos === false) {
9271
                return false;
9272
            }
9273
9274
            if ($before_needle) {
9275
                return self::substr($haystack, 0, $pos, $encoding);
9276
            }
9277
9278
            return self::substr($haystack, $pos, null, $encoding);
9279
        }
9280
9281
        //
9282
        // fallback via vanilla php
9283
        //
9284
9285
        $needleTmp = self::substr($needle, 0, 1, $encoding);
9286
        if ($needleTmp === false) {
9287
            return false;
9288
        }
9289
        $needle = (string) $needleTmp;
9290
9291
        $pos = self::strrpos($haystack, $needle, 0, $encoding);
9292
        if ($pos === false) {
9293
            return false;
9294
        }
9295
9296
        if ($before_needle) {
9297
            return self::substr($haystack, 0, $pos, $encoding);
9298
        }
9299
9300
        return self::substr($haystack, $pos, null, $encoding);
9301
    }
9302
9303
    /**
9304
     * Reverses characters order in the string.
9305
     *
9306
     * @param string $str      <p>The input string.</p>
9307
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
9308
     *
9309
     * @return string the string with characters in the reverse sequence
9310
     */
9311 10
    public static function strrev(string $str, string $encoding = 'UTF-8'): string
9312
    {
9313 10
        if ($str === '') {
9314 4
            return '';
9315
        }
9316
9317
        // init
9318 8
        $reversed = '';
9319
9320 8
        $str = self::emoji_encode($str, true);
9321
9322 8
        if ($encoding === 'UTF-8') {
9323 8
            if (self::$SUPPORT['intl'] === true) {
9324
                // try "grapheme" first: https://stackoverflow.com/questions/17496493/strrev-dosent-support-utf-8
9325 8
                $i = (int) \grapheme_strlen($str);
9326 8
                while ($i--) {
9327 8
                    $reversedTmp = \grapheme_substr($str, $i, 1);
9328 8
                    if ($reversedTmp !== false) {
9329 8
                        $reversed .= $reversedTmp;
9330
                    }
9331
                }
9332
            } else {
9333
                $i = (int) \mb_strlen($str);
9334 8
                while ($i--) {
9335
                    $reversedTmp = \mb_substr($str, $i, 1);
9336
                    if ($reversedTmp !== false) {
9337
                        $reversed .= $reversedTmp;
9338
                    }
9339
                }
9340
            }
9341
        } else {
9342
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
9343
9344
            $i = (int) self::strlen($str, $encoding);
9345
            while ($i--) {
9346
                $reversedTmp = self::substr($str, $i, 1, $encoding);
9347
                if ($reversedTmp !== false) {
9348
                    $reversed .= $reversedTmp;
9349
                }
9350
            }
9351
        }
9352
9353 8
        return self::emoji_decode($reversed, true);
9354
    }
9355
9356
    /**
9357
     * Finds the last occurrence of a character in a string within another, case insensitive.
9358
     *
9359
     * @see http://php.net/manual/en/function.mb-strrichr.php
9360
     *
9361
     * @param string $haystack      <p>The string from which to get the last occurrence of needle.</p>
9362
     * @param string $needle        <p>The string to find in haystack.</p>
9363
     * @param bool   $before_needle [optional] <p>
9364
     *                              Determines which portion of haystack
9365
     *                              this function returns.
9366
     *                              If set to true, it returns all of haystack
9367
     *                              from the beginning to the last occurrence of needle.
9368
     *                              If set to false, it returns all of haystack
9369
     *                              from the last occurrence of needle to the end,
9370
     *                              </p>
9371
     * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
9372
     * @param bool   $cleanUtf8     [optional] <p>Remove non UTF-8 chars from the string.</p>
9373
     *
9374
     * @return false|string the portion of haystack or<br>false if needle is not found
9375
     */
9376 3
    public static function strrichr(
9377
        string $haystack,
9378
        string $needle,
9379
        bool $before_needle = false,
9380
        string $encoding = 'UTF-8',
9381
        bool $cleanUtf8 = false
9382
    ) {
9383 3
        if ($haystack === '' || $needle === '') {
9384 2
            return false;
9385
        }
9386
9387 3
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
9388 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
9389
        }
9390
9391 3
        if ($cleanUtf8 === true) {
9392
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
9393
            // if invalid characters are found in $haystack before $needle
9394 2
            $needle = self::clean($needle);
9395 2
            $haystack = self::clean($haystack);
9396
        }
9397
9398
        //
9399
        // fallback via mbstring
9400
        //
9401
9402 3
        if (self::$SUPPORT['mbstring'] === true) {
9403 3
            if ($encoding === 'UTF-8') {
9404 3
                return \mb_strrichr($haystack, $needle, $before_needle);
9405
            }
9406
9407 2
            return \mb_strrichr($haystack, $needle, $before_needle, $encoding);
9408
        }
9409
9410
        //
9411
        // fallback via vanilla php
9412
        //
9413
9414
        $needleTmp = self::substr($needle, 0, 1, $encoding);
9415
        if ($needleTmp === false) {
9416
            return false;
9417
        }
9418
        $needle = (string) $needleTmp;
9419
9420
        $pos = self::strripos($haystack, $needle, 0, $encoding);
9421
        if ($pos === false) {
9422
            return false;
9423
        }
9424
9425
        if ($before_needle) {
9426
            return self::substr($haystack, 0, $pos, $encoding);
9427
        }
9428
9429
        return self::substr($haystack, $pos, null, $encoding);
9430
    }
9431
9432
    /**
9433
     * Find position of last occurrence of a case-insensitive string.
9434
     *
9435
     * @param string     $haystack  <p>The string to look in.</p>
9436
     * @param int|string $needle    <p>The string to look for.</p>
9437
     * @param int        $offset    [optional] <p>Number of characters to ignore in the beginning or end.</p>
9438
     * @param string     $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
9439
     * @param bool       $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
9440
     *
9441
     * @return false|int
9442
     *                   The <strong>(int)</strong> numeric position of the last occurrence of needle in the haystack
9443
     *                   string.<br>If needle is not found, it returns false.
9444
     */
9445 3
    public static function strripos(
9446
        string $haystack,
9447
        $needle,
9448
        int $offset = 0,
9449
        string $encoding = 'UTF-8',
9450
        bool $cleanUtf8 = false
9451
    ) {
9452 3
        if ($haystack === '') {
9453
            return false;
9454
        }
9455
9456
        // iconv and mbstring do not support integer $needle
9457 3
        if ((int) $needle === $needle && $needle >= 0) {
9458
            $needle = (string) self::chr($needle);
9459
        }
9460 3
        $needle = (string) $needle;
9461
9462 3
        if ($needle === '') {
9463
            return false;
9464
        }
9465
9466 3
        if ($cleanUtf8 === true) {
9467
            // mb_strripos() && iconv_strripos() is not tolerant to invalid characters
9468 2
            $needle = self::clean($needle);
9469 2
            $haystack = self::clean($haystack);
9470
        }
9471
9472 3
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
9473 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
9474
        }
9475
9476
        //
9477
        // fallback via mbstrig
9478
        //
9479
9480 3
        if (self::$SUPPORT['mbstring'] === true) {
9481 3
            if ($encoding === 'UTF-8') {
9482 3
                return \mb_strripos($haystack, $needle, $offset);
9483
            }
9484
9485
            return \mb_strripos($haystack, $needle, $offset, $encoding);
9486
        }
9487
9488
        //
9489
        // fallback for binary || ascii only
9490
        //
9491
9492
        if (
9493
            $encoding === 'CP850'
9494
            ||
9495
            $encoding === 'ASCII'
9496
        ) {
9497
            return \strripos($haystack, $needle, $offset);
9498
        }
9499
9500
        if (
9501
            $encoding !== 'UTF-8'
9502
            &&
9503
            self::$SUPPORT['mbstring'] === false
9504
        ) {
9505
            \trigger_error('UTF8::strripos() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
9506
        }
9507
9508
        //
9509
        // fallback via intl
9510
        //
9511
9512
        if (
9513
            $encoding === 'UTF-8' // INFO: "grapheme_strripos()" can't handle other encodings
9514
            &&
9515
            $offset >= 0 // grapheme_strripos() can't handle negative offset
9516
            &&
9517
            self::$SUPPORT['intl'] === true
9518
        ) {
9519
            $returnTmp = \grapheme_strripos($haystack, $needle, $offset);
9520
            if ($returnTmp !== false) {
9521
                return $returnTmp;
9522
            }
9523
        }
9524
9525
        //
9526
        // fallback for ascii only
9527
        //
9528
9529
        if (self::is_ascii($haystack . $needle)) {
9530
            return \strripos($haystack, $needle, $offset);
9531
        }
9532
9533
        //
9534
        // fallback via vanilla php
9535
        //
9536
9537
        $haystack = self::strtocasefold($haystack, true, false, $encoding);
9538
        $needle = self::strtocasefold($needle, true, false, $encoding);
9539
9540
        return self::strrpos($haystack, $needle, $offset, $encoding, $cleanUtf8);
9541
    }
9542
9543
    /**
9544
     * Finds position of last occurrence of a string within another, case insensitive.
9545
     *
9546
     * @param string $haystack <p>
9547
     *                         The string from which to get the position of the last occurrence
9548
     *                         of needle.
9549
     *                         </p>
9550
     * @param string $needle   <p>
9551
     *                         The string to find in haystack.
9552
     *                         </p>
9553
     * @param int    $offset   [optional] <p>
9554
     *                         The position in haystack
9555
     *                         to start searching.
9556
     *                         </p>
9557
     *
9558
     * @return false|int return the numeric position of the last occurrence of needle in the
9559
     *                   haystack string, or false if needle is not found
9560
     */
9561
    public static function strripos_in_byte(string $haystack, string $needle, int $offset = 0)
9562
    {
9563
        if ($haystack === '' || $needle === '') {
9564
            return false;
9565
        }
9566
9567
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
9568
            // "mb_" is available if overload is used, so use it ...
9569
            return \mb_strripos($haystack, $needle, $offset, 'CP850'); // 8-BIT
9570
        }
9571
9572
        return \strripos($haystack, $needle, $offset);
9573
    }
9574
9575
    /**
9576
     * Find position of last occurrence of a string in a string.
9577
     *
9578
     * @see http://php.net/manual/en/function.mb-strrpos.php
9579
     *
9580
     * @param string     $haystack  <p>The string being checked, for the last occurrence of needle</p>
9581
     * @param int|string $needle    <p>The string to find in haystack.<br>Or a code point as int.</p>
9582
     * @param int        $offset    [optional] <p>May be specified to begin searching an arbitrary number of characters
9583
     *                              into the string. Negative values will stop searching at an arbitrary point prior to
9584
     *                              the end of the string.
9585
     *                              </p>
9586
     * @param string     $encoding  [optional] <p>Set the charset.</p>
9587
     * @param bool       $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
9588
     *
9589
     * @return false|int
9590
     *                   The <strong>(int)</strong> numeric position of the last occurrence of needle in the haystack
9591
     *                   string.<br>If needle is not found, it returns false.
9592
     */
9593 35
    public static function strrpos(
9594
        string $haystack,
9595
        $needle,
9596
        int $offset = 0,
9597
        string $encoding = 'UTF-8',
9598
        bool $cleanUtf8 = false
9599
    ) {
9600 35
        if ($haystack === '') {
9601 3
            return false;
9602
        }
9603
9604
        // iconv and mbstring do not support integer $needle
9605 34
        if ((int) $needle === $needle && $needle >= 0) {
9606 2
            $needle = (string) self::chr($needle);
9607
        }
9608 34
        $needle = (string) $needle;
9609
9610 34
        if ($needle === '') {
9611 2
            return false;
9612
        }
9613
9614 34
        if ($cleanUtf8 === true) {
9615
            // \mb_strrpos && iconv_strrpos is not tolerant to invalid characters
9616 4
            $needle = self::clean($needle);
9617 4
            $haystack = self::clean($haystack);
9618
        }
9619
9620 34
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
9621 8
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
9622
        }
9623
9624
        //
9625
        // fallback via mbstring
9626
        //
9627
9628 34
        if (self::$SUPPORT['mbstring'] === true) {
9629 34
            if ($encoding === 'UTF-8') {
9630 34
                return \mb_strrpos($haystack, $needle, $offset);
9631
            }
9632
9633 2
            return \mb_strrpos($haystack, $needle, $offset, $encoding);
9634
        }
9635
9636
        //
9637
        // fallback for binary || ascii only
9638
        //
9639
9640
        if (
9641
            $encoding === 'CP850'
9642
            ||
9643
            $encoding === 'ASCII'
9644
        ) {
9645
            return \strrpos($haystack, $needle, $offset);
9646
        }
9647
9648
        if (
9649
            $encoding !== 'UTF-8'
9650
            &&
9651
            self::$SUPPORT['mbstring'] === false
9652
        ) {
9653
            \trigger_error('UTF8::strrpos() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
9654
        }
9655
9656
        //
9657
        // fallback via intl
9658
        //
9659
9660
        if (
9661
            $offset >= 0 // grapheme_strrpos() can't handle negative offset
9662
            &&
9663
            $encoding === 'UTF-8' // INFO: "grapheme_strrpos()" can't handle other encodings
9664
            &&
9665
            self::$SUPPORT['intl'] === true
9666
        ) {
9667
            $returnTmp = \grapheme_strrpos($haystack, $needle, $offset);
9668
            if ($returnTmp !== false) {
9669
                return $returnTmp;
9670
            }
9671
        }
9672
9673
        //
9674
        // fallback for ascii only
9675
        //
9676
9677
        if (self::is_ascii($haystack . $needle)) {
9678
            return \strrpos($haystack, $needle, $offset);
9679
        }
9680
9681
        //
9682
        // fallback via vanilla php
9683
        //
9684
9685
        $haystackTmp = null;
9686
        if ($offset > 0) {
9687
            $haystackTmp = self::substr($haystack, $offset);
9688
        } elseif ($offset < 0) {
9689
            $haystackTmp = self::substr($haystack, 0, $offset);
9690
            $offset = 0;
9691
        }
9692
9693
        if ($haystackTmp !== null) {
9694
            if ($haystackTmp === false) {
9695
                $haystackTmp = '';
9696
            }
9697
            $haystack = (string) $haystackTmp;
9698
        }
9699
9700
        $pos = \strrpos($haystack, $needle);
9701
        if ($pos === false) {
9702
            return false;
9703
        }
9704
9705
        $strTmp = \substr($haystack, 0, $pos);
9706
        if ($strTmp === false) {
9707
            return false;
9708
        }
9709
9710
        return $offset + (int) self::strlen($strTmp);
9711
    }
9712
9713
    /**
9714
     * Find position of last occurrence of a string in a string.
9715
     *
9716
     * @param string $haystack <p>
9717
     *                         The string being checked, for the last occurrence
9718
     *                         of needle.
9719
     *                         </p>
9720
     * @param string $needle   <p>
9721
     *                         The string to find in haystack.
9722
     *                         </p>
9723
     * @param int    $offset   [optional] May be specified to begin searching an arbitrary number of characters into
9724
     *                         the string. Negative values will stop searching at an arbitrary point
9725
     *                         prior to the end of the string.
9726
     *
9727
     * @return false|int The numeric position of the last occurrence of needle in the
9728
     *                   haystack string. If needle is not found, it returns false.
9729
     */
9730
    public static function strrpos_in_byte(string $haystack, string $needle, int $offset = 0)
9731
    {
9732
        if ($haystack === '' || $needle === '') {
9733
            return false;
9734
        }
9735
9736
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
9737
            // "mb_" is available if overload is used, so use it ...
9738
            return \mb_strrpos($haystack, $needle, $offset, 'CP850'); // 8-BIT
9739
        }
9740
9741
        return \strrpos($haystack, $needle, $offset);
9742
    }
9743
9744
    /**
9745
     * Finds the length of the initial segment of a string consisting entirely of characters contained within a given
9746
     * mask.
9747
     *
9748
     * @param string $str      <p>The input string.</p>
9749
     * @param string $mask     <p>The mask of chars</p>
9750
     * @param int    $offset   [optional]
9751
     * @param int    $length   [optional]
9752
     * @param string $encoding [optional] <p>Set the charset.</p>
9753
     *
9754
     * @return false|int
9755
     */
9756 10
    public static function strspn(
9757
        string $str,
9758
        string $mask,
9759
        int $offset = 0,
9760
        int $length = null,
9761
        string $encoding = 'UTF-8'
9762
    ) {
9763 10
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
9764
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
9765
        }
9766
9767 10
        if ($offset || $length !== null) {
9768 2
            if ($encoding === 'UTF-8') {
9769 2
                if ($length === null) {
9770
                    $str = (string) \mb_substr($str, $offset);
9771
                } else {
9772 2
                    $str = (string) \mb_substr($str, $offset, $length);
9773
                }
9774
            } else {
9775
                $str = (string) self::substr($str, $offset, $length, $encoding);
9776
            }
9777
        }
9778
9779 10
        if ($str === '' || $mask === '') {
9780 2
            return 0;
9781
        }
9782
9783 8
        $matches = [];
9784
9785 8
        return \preg_match('/^' . self::rxClass($mask) . '+/u', $str, $matches) ? (int) self::strlen($matches[0], $encoding) : 0;
9786
    }
9787
9788
    /**
9789
     * Returns part of haystack string from the first occurrence of needle to the end of haystack.
9790
     *
9791
     * @param string $haystack      <p>The input string. Must be valid UTF-8.</p>
9792
     * @param string $needle        <p>The string to look for. Must be valid UTF-8.</p>
9793
     * @param bool   $before_needle [optional] <p>
9794
     *                              If <b>TRUE</b>, strstr() returns the part of the
9795
     *                              haystack before the first occurrence of the needle (excluding the needle).
9796
     *                              </p>
9797
     * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
9798
     * @param bool   $cleanUtf8     [optional] <p>Remove non UTF-8 chars from the string.</p>
9799
     *
9800
     * @return false|string
9801
     *                      A sub-string,<br>or <strong>false</strong> if needle is not found
9802
     */
9803 3
    public static function strstr(
9804
        string $haystack,
9805
        string $needle,
9806
        bool $before_needle = false,
9807
        string $encoding = 'UTF-8',
9808
        $cleanUtf8 = false
9809
    ) {
9810 3
        if ($haystack === '' || $needle === '') {
9811 2
            return false;
9812
        }
9813
9814 3
        if ($cleanUtf8 === true) {
9815
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
9816
            // if invalid characters are found in $haystack before $needle
9817
            $needle = self::clean($needle);
9818
            $haystack = self::clean($haystack);
9819
        }
9820
9821 3
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
9822 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
9823
        }
9824
9825
        //
9826
        // fallback via mbstring
9827
        //
9828
9829 3
        if (self::$SUPPORT['mbstring'] === true) {
9830 3
            if ($encoding === 'UTF-8') {
9831 3
                return \mb_strstr($haystack, $needle, $before_needle);
9832
            }
9833
9834 2
            return \mb_strstr($haystack, $needle, $before_needle, $encoding);
9835
        }
9836
9837
        //
9838
        // fallback for binary || ascii only
9839
        //
9840
9841
        if (
9842
            $encoding === 'CP850'
9843
            ||
9844
            $encoding === 'ASCII'
9845
        ) {
9846
            return \strstr($haystack, $needle, $before_needle);
9847
        }
9848
9849
        if (
9850
            $encoding !== 'UTF-8'
9851
            &&
9852
            self::$SUPPORT['mbstring'] === false
9853
        ) {
9854
            \trigger_error('UTF8::strstr() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
9855
        }
9856
9857
        //
9858
        // fallback via intl
9859
        //
9860
9861
        if (
9862
            $encoding === 'UTF-8' // INFO: "grapheme_strstr()" can't handle other encodings
9863
            &&
9864
            self::$SUPPORT['intl'] === true
9865
        ) {
9866
            $returnTmp = \grapheme_strstr($haystack, $needle, $before_needle);
9867
            if ($returnTmp !== false) {
9868
                return $returnTmp;
9869
            }
9870
        }
9871
9872
        //
9873
        // fallback for ascii only
9874
        //
9875
9876
        if (self::is_ascii($haystack . $needle)) {
9877
            return \strstr($haystack, $needle, $before_needle);
9878
        }
9879
9880
        //
9881
        // fallback via vanilla php
9882
        //
9883
9884
        \preg_match('/^(.*?)' . \preg_quote($needle, '/') . '/us', $haystack, $match);
9885
9886
        if (!isset($match[1])) {
9887
            return false;
9888
        }
9889
9890
        if ($before_needle) {
9891
            return $match[1];
9892
        }
9893
9894
        return self::substr($haystack, (int) self::strlen($match[1]));
9895
    }
9896
9897
    /**
9898
     *  * Finds first occurrence of a string within another.
9899
     *
9900
     * @param string $haystack      <p>
9901
     *                              The string from which to get the first occurrence
9902
     *                              of needle.
9903
     *                              </p>
9904
     * @param string $needle        <p>
9905
     *                              The string to find in haystack.
9906
     *                              </p>
9907
     * @param bool   $before_needle [optional] <p>
9908
     *                              Determines which portion of haystack
9909
     *                              this function returns.
9910
     *                              If set to true, it returns all of haystack
9911
     *                              from the beginning to the first occurrence of needle.
9912
     *                              If set to false, it returns all of haystack
9913
     *                              from the first occurrence of needle to the end,
9914
     *                              </p>
9915
     *
9916
     * @return false|string the portion of haystack,
9917
     *                      or false if needle is not found
9918
     */
9919
    public static function strstr_in_byte(string $haystack, string $needle, bool $before_needle = false)
9920
    {
9921
        if ($haystack === '' || $needle === '') {
9922
            return false;
9923
        }
9924
9925
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
9926
            // "mb_" is available if overload is used, so use it ...
9927
            return \mb_strstr($haystack, $needle, $before_needle, 'CP850'); // 8-BIT
9928
        }
9929
9930
        return \strstr($haystack, $needle, $before_needle);
9931
    }
9932
9933
    /**
9934
     * Unicode transformation for case-less matching.
9935
     *
9936
     * @see http://unicode.org/reports/tr21/tr21-5.html
9937
     *
9938
     * @param string      $str       <p>The input string.</p>
9939
     * @param bool        $full      [optional] <p>
9940
     *                               <b>true</b>, replace full case folding chars (default)<br>
9941
     *                               <b>false</b>, use only limited static array [UTF8::$COMMON_CASE_FOLD]
9942
     *                               </p>
9943
     * @param bool        $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
9944
     * @param string      $encoding  [optional] <p>Set the charset.</p>
9945
     * @param string|null $lang      [optional] <p>Set the language for special cases: az, el, lt, tr</p>
9946
     * @param bool        $lower     [optional] <p>Use lowercase string, otherwise use uppercase string. PS: uppercase
9947
     *                               is for some languages better ...</p>
9948
     *
9949
     * @return string
9950
     */
9951 32
    public static function strtocasefold(
9952
        string $str,
9953
        bool $full = true,
9954
        bool $cleanUtf8 = false,
9955
        string $encoding = 'UTF-8',
9956
        string $lang = null,
9957
        $lower = true
9958
    ): string {
9959 32
        if ($str === '') {
9960 5
            return '';
9961
        }
9962
9963 31
        if ($cleanUtf8 === true) {
9964
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
9965
            // if invalid characters are found in $haystack before $needle
9966 2
            $str = self::clean($str);
9967
        }
9968
9969 31
        $str = self::fixStrCaseHelper($str, $lower, $full);
9970
9971 31
        if ($lang === null && $encoding === 'UTF-8') {
9972 31
            if ($lower === true) {
9973 2
                return \mb_strtolower($str);
9974
            }
9975
9976 29
            return \mb_strtoupper($str);
9977
        }
9978
9979 2
        if ($lower === true) {
9980
            return self::strtolower($str, $encoding, false, $lang);
9981
        }
9982
9983 2
        return self::strtoupper($str, $encoding, false, $lang);
9984
    }
9985
9986
    /**
9987
     * Make a string lowercase.
9988
     *
9989
     * @see http://php.net/manual/en/function.mb-strtolower.php
9990
     *
9991
     * @param string      $str                   <p>The string being lowercased.</p>
9992
     * @param string      $encoding              [optional] <p>Set the charset for e.g. "mb_" function</p>
9993
     * @param bool        $cleanUtf8             [optional] <p>Remove non UTF-8 chars from the string.</p>
9994
     * @param string|null $lang                  [optional] <p>Set the language for special cases: az, el, lt, tr</p>
9995
     * @param bool        $tryToKeepStringLength [optional] <p>true === try to keep the string length: e.g. ẞ -> ß</p>
9996
     *
9997
     * @return string
9998
     *                <p>String with all alphabetic characters converted to lowercase.</p>
9999
     */
10000 73
    public static function strtolower(
10001
        $str,
10002
        string $encoding = 'UTF-8',
10003
        bool $cleanUtf8 = false,
10004
        string $lang = null,
10005
        bool $tryToKeepStringLength = false
10006
    ): string {
10007
        // init
10008 73
        $str = (string) $str;
10009
10010 73
        if ($str === '') {
10011 1
            return '';
10012
        }
10013
10014 72
        if ($cleanUtf8 === true) {
10015
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
10016
            // if invalid characters are found in $haystack before $needle
10017 2
            $str = self::clean($str);
10018
        }
10019
10020
        // hack for old php version or for the polyfill ...
10021 72
        if ($tryToKeepStringLength === true) {
10022
            $str = self::fixStrCaseHelper($str, true);
10023
        }
10024
10025 72
        if ($lang === null && $encoding === 'UTF-8') {
10026 13
            return \mb_strtolower($str);
10027
        }
10028
10029 61
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
10030
10031 61
        if ($lang !== null) {
10032 2
            if (self::$SUPPORT['intl'] === true) {
10033 2
                if (self::$INTL_TRANSLITERATOR_LIST === null) {
10034
                    self::$INTL_TRANSLITERATOR_LIST = self::getData('transliterator_list');
10035
                }
10036
10037 2
                $langCode = $lang . '-Lower';
10038 2
                if (!\in_array($langCode, self::$INTL_TRANSLITERATOR_LIST, true)) {
10039
                    \trigger_error('UTF8::strtolower() cannot handle special language: ' . $lang, \E_USER_WARNING);
10040
10041
                    $langCode = 'Any-Lower';
10042
                }
10043
10044
                /** @noinspection PhpComposerExtensionStubsInspection */
10045
                /** @noinspection UnnecessaryCastingInspection */
10046 2
                return (string) \transliterator_transliterate($langCode, $str);
10047
            }
10048
10049
            \trigger_error('UTF8::strtolower() without intl cannot handle the "lang" parameter: ' . $lang, \E_USER_WARNING);
10050
        }
10051
10052
        // always fallback via symfony polyfill
10053 61
        return \mb_strtolower($str, $encoding);
10054
    }
10055
10056
    /**
10057
     * Make a string uppercase.
10058
     *
10059
     * @see http://php.net/manual/en/function.mb-strtoupper.php
10060
     *
10061
     * @param string      $str                   <p>The string being uppercased.</p>
10062
     * @param string      $encoding              [optional] <p>Set the charset.</p>
10063
     * @param bool        $cleanUtf8             [optional] <p>Remove non UTF-8 chars from the string.</p>
10064
     * @param string|null $lang                  [optional] <p>Set the language for special cases: az, el, lt, tr</p>
10065
     * @param bool        $tryToKeepStringLength [optional] <p>true === try to keep the string length: e.g. ẞ -> ß</p>
10066
     *
10067
     * @return string
10068
     *                <p>String with all alphabetic characters converted to uppercase.</p>
10069
     */
10070 17
    public static function strtoupper(
10071
        $str,
10072
        string $encoding = 'UTF-8',
10073
        bool $cleanUtf8 = false,
10074
        string $lang = null,
10075
        bool $tryToKeepStringLength = false
10076
    ): string {
10077
        // init
10078 17
        $str = (string) $str;
10079
10080 17
        if ($str === '') {
10081 1
            return '';
10082
        }
10083
10084 16
        if ($cleanUtf8 === true) {
10085
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
10086
            // if invalid characters are found in $haystack before $needle
10087 2
            $str = self::clean($str);
10088
        }
10089
10090
        // hack for old php version or for the polyfill ...
10091 16
        if ($tryToKeepStringLength === true) {
10092 2
            $str = self::fixStrCaseHelper($str, false);
10093
        }
10094
10095 16
        if ($lang === null && $encoding === 'UTF-8') {
10096 8
            return \mb_strtoupper($str);
10097
        }
10098
10099 10
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
10100
10101 10
        if ($lang !== null) {
10102 2
            if (self::$SUPPORT['intl'] === true) {
10103 2
                if (self::$INTL_TRANSLITERATOR_LIST === null) {
10104
                    self::$INTL_TRANSLITERATOR_LIST = self::getData('transliterator_list');
10105
                }
10106
10107 2
                $langCode = $lang . '-Upper';
10108 2
                if (!\in_array($langCode, self::$INTL_TRANSLITERATOR_LIST, true)) {
10109
                    \trigger_error('UTF8::strtoupper() without intl for special language: ' . $lang, \E_USER_WARNING);
10110
10111
                    $langCode = 'Any-Upper';
10112
                }
10113
10114
                /** @noinspection PhpComposerExtensionStubsInspection */
10115
                /** @noinspection UnnecessaryCastingInspection */
10116 2
                return (string) \transliterator_transliterate($langCode, $str);
10117
            }
10118
10119
            \trigger_error('UTF8::strtolower() without intl cannot handle the "lang"-parameter: ' . $lang, \E_USER_WARNING);
10120
        }
10121
10122
        // always fallback via symfony polyfill
10123 10
        return \mb_strtoupper($str, $encoding);
10124
    }
10125
10126
    /**
10127
     * Translate characters or replace sub-strings.
10128
     *
10129
     * @see http://php.net/manual/en/function.strtr.php
10130
     *
10131
     * @param string          $str  <p>The string being translated.</p>
10132
     * @param string|string[] $from <p>The string replacing from.</p>
10133
     * @param string|string[] $to   [optional] <p>The string being translated to to.</p>
10134
     *
10135
     * @return string
10136
     *                This function returns a copy of str, translating all occurrences of each character in from to the
10137
     *                corresponding character in to
10138
     */
10139 2
    public static function strtr(string $str, $from, $to = ''): string
10140
    {
10141 2
        if ($str === '') {
10142
            return '';
10143
        }
10144
10145 2
        if ($from === $to) {
10146
            return $str;
10147
        }
10148
10149 2
        if ($to !== '') {
10150 2
            $from = self::str_split($from);
10151 2
            $to = self::str_split($to);
10152 2
            $countFrom = \count($from);
10153 2
            $countTo = \count($to);
10154
10155 2
            if ($countFrom > $countTo) {
10156 2
                $from = \array_slice($from, 0, $countTo);
10157 2
            } elseif ($countFrom < $countTo) {
10158 2
                $to = \array_slice($to, 0, $countFrom);
10159
            }
10160
10161 2
            $from = \array_combine($from, $to);
10162 2
            if ($from === false) {
10163
                throw new \InvalidArgumentException('The number of elements for each array isn\'t equal or the arrays are empty: (from: ' . \print_r($from, true) . ' | to: ' . \print_r($to, true) . ')');
10164
            }
10165
        }
10166
10167 2
        if (\is_string($from)) {
10168 2
            return \str_replace($from, '', $str);
10169
        }
10170
10171 2
        return \strtr($str, $from);
10172
    }
10173
10174
    /**
10175
     * Return the width of a string.
10176
     *
10177
     * @param string $str       <p>The input string.</p>
10178
     * @param string $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
10179
     * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
10180
     *
10181
     * @return int
10182
     */
10183 2
    public static function strwidth(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false): int
10184
    {
10185 2
        if ($str === '') {
10186 2
            return 0;
10187
        }
10188
10189 2
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
10190 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
10191
        }
10192
10193 2
        if ($cleanUtf8 === true) {
10194
            // iconv and mbstring are not tolerant to invalid encoding
10195
            // further, their behaviour is inconsistent with that of PHP's substr
10196 2
            $str = self::clean($str);
10197
        }
10198
10199
        //
10200
        // fallback via mbstring
10201
        //
10202
10203 2
        if (self::$SUPPORT['mbstring'] === true) {
10204 2
            if ($encoding === 'UTF-8') {
10205 2
                return \mb_strwidth($str);
10206
            }
10207
10208
            return \mb_strwidth($str, $encoding);
10209
        }
10210
10211
        //
10212
        // fallback via vanilla php
10213
        //
10214
10215
        if ($encoding !== 'UTF-8') {
10216
            $str = self::encode('UTF-8', $str, false, $encoding);
10217
        }
10218
10219
        $wide = 0;
10220
        $str = (string) \preg_replace('/[\x{1100}-\x{115F}\x{2329}\x{232A}\x{2E80}-\x{303E}\x{3040}-\x{A4CF}\x{AC00}-\x{D7A3}\x{F900}-\x{FAFF}\x{FE10}-\x{FE19}\x{FE30}-\x{FE6F}\x{FF00}-\x{FF60}\x{FFE0}-\x{FFE6}\x{20000}-\x{2FFFD}\x{30000}-\x{3FFFD}]/u', '', $str, -1, $wide);
10221
10222
        return ($wide << 1) + (int) self::strlen($str, 'UTF-8');
10223
    }
10224
10225
    /**
10226
     * Get part of a string.
10227
     *
10228
     * @see http://php.net/manual/en/function.mb-substr.php
10229
     *
10230
     * @param string $str       <p>The string being checked.</p>
10231
     * @param int    $offset    <p>The first position used in str.</p>
10232
     * @param int    $length    [optional] <p>The maximum length of the returned string.</p>
10233
     * @param string $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
10234
     * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
10235
     *
10236
     * @return false|string
10237
     *                      The portion of <i>str</i> specified by the <i>offset</i> and
10238
     *                      <i>length</i> parameters.</p><p>If <i>str</i> is shorter than <i>offset</i>
10239
     *                      characters long, <b>FALSE</b> will be returned.
10240
     */
10241 172
    public static function substr(
10242
        string $str,
10243
        int $offset = 0,
10244
        int $length = null,
10245
        string $encoding = 'UTF-8',
10246
        bool $cleanUtf8 = false
10247
    ) {
10248
        // empty string
10249 172
        if ($str === '' || $length === 0) {
10250 8
            return '';
10251
        }
10252
10253 168
        if ($cleanUtf8 === true) {
10254
            // iconv and mbstring are not tolerant to invalid encoding
10255
            // further, their behaviour is inconsistent with that of PHP's substr
10256 2
            $str = self::clean($str);
10257
        }
10258
10259
        // whole string
10260 168
        if (!$offset && $length === null) {
10261 7
            return $str;
10262
        }
10263
10264 163
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
10265 19
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
10266
        }
10267
10268
        //
10269
        // fallback via mbstring
10270
        //
10271
10272 163
        if (self::$SUPPORT['mbstring'] === true) {
10273 161
            if ($encoding === 'UTF-8') {
10274 161
                if ($length === null) {
10275 64
                    return \mb_substr($str, $offset);
10276
                }
10277
10278 102
                return \mb_substr($str, $offset, $length);
10279
            }
10280
10281
            return self::substr($str, $offset, $length, $encoding);
10282
        }
10283
10284
        //
10285
        // fallback for binary || ascii only
10286
        //
10287
10288
        if (
10289 4
            $encoding === 'CP850'
10290
            ||
10291 4
            $encoding === 'ASCII'
10292
        ) {
10293
            if ($length === null) {
10294
                return \substr($str, $offset);
10295
            }
10296
10297
            return \substr($str, $offset, $length);
10298
        }
10299
10300
        // otherwise we need the string-length
10301 4
        $str_length = 0;
10302 4
        if ($offset || $length === null) {
10303 4
            $str_length = self::strlen($str, $encoding);
10304
        }
10305
10306
        // e.g.: invalid chars + mbstring not installed
10307 4
        if ($str_length === false) {
10308
            return false;
10309
        }
10310
10311
        // empty string
10312 4
        if ($offset === $str_length && !$length) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $length of type integer|null is loosely compared to false; this is ambiguous if the integer can be 0. You might want to explicitly use === null instead.

In PHP, under loose comparison (like ==, or !=, or switch conditions), values of different types might be equal.

For integer values, zero is a special case, in particular the following results might be unexpected:

0   == false // true
0   == null  // true
123 == false // false
123 == null  // false

// It is often better to use strict comparison
0 === false // false
0 === null  // false
Loading history...
10313
            return '';
10314
        }
10315
10316
        // impossible
10317 4
        if ($offset && $offset > $str_length) {
10318
            return '';
10319
        }
10320
10321 4
        if ($length === null) {
10322 4
            $length = (int) $str_length;
10323
        } else {
10324 2
            $length = (int) $length;
10325
        }
10326
10327
        if (
10328 4
            $encoding !== 'UTF-8'
10329
            &&
10330 4
            self::$SUPPORT['mbstring'] === false
10331
        ) {
10332 2
            \trigger_error('UTF8::substr() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
10333
        }
10334
10335
        //
10336
        // fallback via intl
10337
        //
10338
10339
        if (
10340 4
            $encoding === 'UTF-8' // INFO: "grapheme_substr()" can't handle other encodings
10341
            &&
10342 4
            $offset >= 0 // grapheme_substr() can't handle negative offset
10343
            &&
10344 4
            self::$SUPPORT['intl'] === true
10345
        ) {
10346
            $returnTmp = \grapheme_substr($str, $offset, $length);
10347
            if ($returnTmp !== false) {
10348
                return $returnTmp;
10349
            }
10350
        }
10351
10352
        //
10353
        // fallback via iconv
10354
        //
10355
10356
        if (
10357 4
            $length >= 0 // "iconv_substr()" can't handle negative length
10358
            &&
10359 4
            self::$SUPPORT['iconv'] === true
10360
        ) {
10361
            $returnTmp = \iconv_substr($str, $offset, $length);
10362
            if ($returnTmp !== false) {
10363
                return $returnTmp;
10364
            }
10365
        }
10366
10367
        //
10368
        // fallback for ascii only
10369
        //
10370
10371 4
        if (self::is_ascii($str)) {
10372
            return \substr($str, $offset, $length);
10373
        }
10374
10375
        //
10376
        // fallback via vanilla php
10377
        //
10378
10379
        // split to array, and remove invalid characters
10380 4
        $array = self::str_split($str);
10381
10382
        // extract relevant part, and join to make sting again
10383 4
        return \implode('', \array_slice($array, $offset, $length));
10384
    }
10385
10386
    /**
10387
     * Binary safe comparison of two strings from an offset, up to length characters.
10388
     *
10389
     * @param string   $str1               <p>The main string being compared.</p>
10390
     * @param string   $str2               <p>The secondary string being compared.</p>
10391
     * @param int      $offset             [optional] <p>The start position for the comparison. If negative, it starts
10392
     *                                     counting from the end of the string.</p>
10393
     * @param int|null $length             [optional] <p>The length of the comparison. The default value is the largest
10394
     *                                     of the length of the str compared to the length of main_str less the
10395
     *                                     offset.</p>
10396
     * @param bool     $case_insensitivity [optional] <p>If case_insensitivity is TRUE, comparison is case
10397
     *                                     insensitive.</p>
10398
     * @param string   $encoding           [optional] <p>Set the charset for e.g. "mb_" function</p>
10399
     *
10400
     * @return int
10401
     *             <strong>&lt; 0</strong> if str1 is less than str2;<br>
10402
     *             <strong>&gt; 0</strong> if str1 is greater than str2,<br>
10403
     *             <strong>0</strong> if they are equal
10404
     */
10405 2
    public static function substr_compare(
10406
        string $str1,
10407
        string $str2,
10408
        int $offset = 0,
10409
        int $length = null,
10410
        bool $case_insensitivity = false,
10411
        string $encoding = 'UTF-8'
10412
    ): int {
10413
        if (
10414 2
            $offset !== 0
10415
            ||
10416 2
            $length !== null
10417
        ) {
10418 2
            if ($encoding === 'UTF-8') {
10419 2
                if ($length === null) {
10420 2
                    $str1 = (string) \mb_substr($str1, $offset);
10421
                } else {
10422 2
                    $str1 = (string) \mb_substr($str1, $offset, $length);
10423
                }
10424 2
                $str2 = (string) \mb_substr($str2, 0, (int) self::strlen($str1));
10425
            } else {
10426
                $encoding = self::normalize_encoding($encoding, 'UTF-8');
10427
10428
                $str1 = (string) self::substr($str1, $offset, $length, $encoding);
10429
                $str2 = (string) self::substr($str2, 0, (int) self::strlen($str1), $encoding);
10430
            }
10431
        }
10432
10433 2
        if ($case_insensitivity === true) {
10434 2
            return self::strcasecmp($str1, $str2, $encoding);
10435
        }
10436
10437 2
        return self::strcmp($str1, $str2);
10438
    }
10439
10440
    /**
10441
     * Count the number of substring occurrences.
10442
     *
10443
     * @see http://php.net/manual/en/function.substr-count.php
10444
     *
10445
     * @param string $haystack  <p>The string to search in.</p>
10446
     * @param string $needle    <p>The substring to search for.</p>
10447
     * @param int    $offset    [optional] <p>The offset where to start counting.</p>
10448
     * @param int    $length    [optional] <p>
10449
     *                          The maximum length after the specified offset to search for the
10450
     *                          substring. It outputs a warning if the offset plus the length is
10451
     *                          greater than the haystack length.
10452
     *                          </p>
10453
     * @param string $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
10454
     * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
10455
     *
10456
     * @return false|int this functions returns an integer or false if there isn't a string
10457
     */
10458 5
    public static function substr_count(
10459
        string $haystack,
10460
        string $needle,
10461
        int $offset = 0,
10462
        int $length = null,
10463
        string $encoding = 'UTF-8',
10464
        bool $cleanUtf8 = false
10465
    ) {
10466 5
        if ($haystack === '' || $needle === '') {
10467 2
            return false;
10468
        }
10469
10470 5
        if ($length === 0) {
10471 2
            return 0;
10472
        }
10473
10474 5
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
10475 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
10476
        }
10477
10478 5
        if ($cleanUtf8 === true) {
10479
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
10480
            // if invalid characters are found in $haystack before $needle
10481
            $needle = self::clean($needle);
10482
            $haystack = self::clean($haystack);
10483
        }
10484
10485 5
        if ($offset || $length > 0) {
10486 2
            if ($length === null) {
10487 2
                $lengthTmp = self::strlen($haystack, $encoding);
10488 2
                if ($lengthTmp === false) {
10489
                    return false;
10490
                }
10491 2
                $length = (int) $lengthTmp;
10492
            }
10493
10494 2
            if ($encoding === 'UTF-8') {
10495 2
                $haystack = (string) \mb_substr($haystack, $offset, $length);
10496
            } else {
10497 2
                $haystack = (string) \mb_substr($haystack, $offset, $length, $encoding);
10498
            }
10499
        }
10500
10501
        if (
10502 5
            $encoding !== 'UTF-8'
10503
            &&
10504 5
            self::$SUPPORT['mbstring'] === false
10505
        ) {
10506
            \trigger_error('UTF8::substr_count() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
10507
        }
10508
10509 5
        if (self::$SUPPORT['mbstring'] === true) {
10510 5
            if ($encoding === 'UTF-8') {
10511 5
                return \mb_substr_count($haystack, $needle);
10512
            }
10513
10514 2
            return \mb_substr_count($haystack, $needle, $encoding);
10515
        }
10516
10517
        \preg_match_all('/' . \preg_quote($needle, '/') . '/us', $haystack, $matches, \PREG_SET_ORDER);
10518
10519
        return \count($matches);
10520
    }
10521
10522
    /**
10523
     * Count the number of substring occurrences.
10524
     *
10525
     * @param string $haystack <p>
10526
     *                         The string being checked.
10527
     *                         </p>
10528
     * @param string $needle   <p>
10529
     *                         The string being found.
10530
     *                         </p>
10531
     * @param int    $offset   [optional] <p>
10532
     *                         The offset where to start counting
10533
     *                         </p>
10534
     * @param int    $length   [optional] <p>
10535
     *                         The maximum length after the specified offset to search for the
10536
     *                         substring. It outputs a warning if the offset plus the length is
10537
     *                         greater than the haystack length.
10538
     *                         </p>
10539
     *
10540
     * @return false|int the number of times the
10541
     *                   needle substring occurs in the
10542
     *                   haystack string
10543
     */
10544
    public static function substr_count_in_byte(
10545
        string $haystack,
10546
        string $needle,
10547
        int $offset = 0,
10548
        int $length = null
10549
    ) {
10550
        if ($haystack === '' || $needle === '') {
10551
            return 0;
10552
        }
10553
10554
        if (
10555
            ($offset || $length !== null)
10556
            &&
10557
            self::$SUPPORT['mbstring_func_overload'] === true
10558
        ) {
10559
            if ($length === null) {
10560
                $lengthTmp = self::strlen($haystack);
10561
                if ($lengthTmp === false) {
10562
                    return false;
10563
                }
10564
                $length = (int) $lengthTmp;
10565
            }
10566
10567
            if (
10568
                (
10569
                    $length !== 0
10570
                    &&
10571
                    $offset !== 0
10572
                )
10573
                &&
10574
                ($length + $offset) <= 0
10575
                &&
10576
                Bootup::is_php('7.1') === false // output from "substr_count()" have changed in PHP 7.1
10577
            ) {
10578
                return false;
10579
            }
10580
10581
            $haystackTmp = \substr($haystack, $offset, $length);
10582
            if ($haystackTmp === false) {
10583
                $haystackTmp = '';
10584
            }
10585
            $haystack = (string) $haystackTmp;
10586
        }
10587
10588
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
10589
            // "mb_" is available if overload is used, so use it ...
10590
            return \mb_substr_count($haystack, $needle, 'CP850'); // 8-BIT
10591
        }
10592
10593
        if ($length === null) {
10594
            return \substr_count($haystack, $needle, $offset);
10595
        }
10596
10597
        return \substr_count($haystack, $needle, $offset, $length);
10598
    }
10599
10600
    /**
10601
     * Returns the number of occurrences of $substring in the given string.
10602
     * By default, the comparison is case-sensitive, but can be made insensitive
10603
     * by setting $caseSensitive to false.
10604
     *
10605
     * @param string $str           <p>The input string.</p>
10606
     * @param string $substring     <p>The substring to search for.</p>
10607
     * @param bool   $caseSensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
10608
     * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
10609
     *
10610
     * @return int
10611
     */
10612 15
    public static function substr_count_simple(
10613
        string $str,
10614
        string $substring,
10615
        bool $caseSensitive = true,
10616
        string $encoding = 'UTF-8'
10617
    ): int {
10618 15
        if ($str === '' || $substring === '') {
10619 2
            return 0;
10620
        }
10621
10622 13
        if ($encoding === 'UTF-8') {
10623 7
            if ($caseSensitive) {
10624
                return (int) \mb_substr_count($str, $substring);
10625
            }
10626
10627 7
            return (int) \mb_substr_count(
10628 7
                \mb_strtoupper($str),
10629 7
                \mb_strtoupper($substring)
10630
10631
            );
10632
        }
10633
10634 6
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
10635
10636 6
        if ($caseSensitive) {
10637 3
            return (int) \mb_substr_count($str, $substring, $encoding);
10638
        }
10639
10640 3
        return (int) \mb_substr_count(
10641 3
            self::strtocasefold($str, true, false, $encoding, null, false),
10642 3
            self::strtocasefold($substring, true, false, $encoding, null, false),
10643 3
            $encoding
10644
        );
10645
    }
10646
10647
    /**
10648
     * Removes an prefix ($needle) from start of the string ($haystack), case insensitive.
10649
     *
10650
     * @param string $haystack <p>The string to search in.</p>
10651
     * @param string $needle   <p>The substring to search for.</p>
10652
     *
10653
     * @return string return the sub-string
10654
     */
10655 2
    public static function substr_ileft(string $haystack, string $needle): string
10656
    {
10657 2
        if ($haystack === '') {
10658 2
            return '';
10659
        }
10660
10661 2
        if ($needle === '') {
10662 2
            return $haystack;
10663
        }
10664
10665 2
        if (self::str_istarts_with($haystack, $needle) === true) {
10666 2
            $haystack = (string) \mb_substr($haystack, (int) self::strlen($needle));
10667
        }
10668
10669 2
        return $haystack;
10670
    }
10671
10672
    /**
10673
     * Get part of a string process in bytes.
10674
     *
10675
     * @param string $str    <p>The string being checked.</p>
10676
     * @param int    $offset <p>The first position used in str.</p>
10677
     * @param int    $length [optional] <p>The maximum length of the returned string.</p>
10678
     *
10679
     * @return false|string
10680
     *                      The portion of <i>str</i> specified by the <i>offset</i> and
10681
     *                      <i>length</i> parameters.</p><p>If <i>str</i> is shorter than <i>offset</i>
10682
     *                      characters long, <b>FALSE</b> will be returned.
10683
     */
10684
    public static function substr_in_byte(string $str, int $offset = 0, int $length = null)
10685
    {
10686
        // empty string
10687
        if ($str === '' || $length === 0) {
10688
            return '';
10689
        }
10690
10691
        // whole string
10692
        if (!$offset && $length === null) {
10693
            return $str;
10694
        }
10695
10696
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
10697
            // "mb_" is available if overload is used, so use it ...
10698
            return \mb_substr($str, $offset, $length, 'CP850'); // 8-BIT
10699
        }
10700
10701
        return \substr($str, $offset, $length ?? 2147483647);
10702
    }
10703
10704
    /**
10705
     * Removes an suffix ($needle) from end of the string ($haystack), case insensitive.
10706
     *
10707
     * @param string $haystack <p>The string to search in.</p>
10708
     * @param string $needle   <p>The substring to search for.</p>
10709
     *
10710
     * @return string return the sub-string
10711
     */
10712 2
    public static function substr_iright(string $haystack, string $needle): string
10713
    {
10714 2
        if ($haystack === '') {
10715 2
            return '';
10716
        }
10717
10718 2
        if ($needle === '') {
10719 2
            return $haystack;
10720
        }
10721
10722 2
        if (self::str_iends_with($haystack, $needle) === true) {
10723 2
            $haystack = (string) \mb_substr($haystack, 0, (int) self::strlen($haystack) - (int) self::strlen($needle));
10724
        }
10725
10726 2
        return $haystack;
10727
    }
10728
10729
    /**
10730
     * Removes an prefix ($needle) from start of the string ($haystack).
10731
     *
10732
     * @param string $haystack <p>The string to search in.</p>
10733
     * @param string $needle   <p>The substring to search for.</p>
10734
     *
10735
     * @return string return the sub-string
10736
     */
10737 2
    public static function substr_left(string $haystack, string $needle): string
10738
    {
10739 2
        if ($haystack === '') {
10740 2
            return '';
10741
        }
10742
10743 2
        if ($needle === '') {
10744 2
            return $haystack;
10745
        }
10746
10747 2
        if (self::str_starts_with($haystack, $needle) === true) {
10748 2
            $haystack = (string) \mb_substr($haystack, (int) self::strlen($needle));
10749
        }
10750
10751 2
        return $haystack;
10752
    }
10753
10754
    /**
10755
     * Replace text within a portion of a string.
10756
     *
10757
     * source: https://gist.github.com/stemar/8287074
10758
     *
10759
     * @param string|string[] $str         <p>The input string or an array of stings.</p>
10760
     * @param string|string[] $replacement <p>The replacement string or an array of stings.</p>
10761
     * @param int|int[]       $offset      <p>
10762
     *                                     If start is positive, the replacing will begin at the start'th offset
10763
     *                                     into string.
10764
     *                                     <br><br>
10765
     *                                     If start is negative, the replacing will begin at the start'th character
10766
     *                                     from the end of string.
10767
     *                                     </p>
10768
     * @param int|int[]|null  $length      [optional] <p>If given and is positive, it represents the length of the
10769
     *                                     portion of string which is to be replaced. If it is negative, it
10770
     *                                     represents the number of characters from the end of string at which to
10771
     *                                     stop replacing. If it is not given, then it will default to strlen(
10772
     *                                     string ); i.e. end the replacing at the end of string. Of course, if
10773
     *                                     length is zero then this function will have the effect of inserting
10774
     *                                     replacement into string at the given start offset.</p>
10775
     * @param string          $encoding    [optional] <p>Set the charset for e.g. "mb_" function</p>
10776
     *
10777
     * @return string|string[] The result string is returned. If string is an array then array is returned.
10778
     */
10779 10
    public static function substr_replace(
10780
        $str,
10781
        $replacement,
10782
        $offset,
10783
        $length = null,
10784
        string $encoding = 'UTF-8'
10785
    ) {
10786 10
        if (\is_array($str) === true) {
10787 1
            $num = \count($str);
10788
10789
            // the replacement
10790 1
            if (\is_array($replacement) === true) {
10791 1
                $replacement = \array_slice($replacement, 0, $num);
10792
            } else {
10793 1
                $replacement = \array_pad([$replacement], $num, $replacement);
10794
            }
10795
10796
            // the offset
10797 1
            if (\is_array($offset) === true) {
10798 1
                $offset = \array_slice($offset, 0, $num);
10799 1
                foreach ($offset as &$valueTmp) {
10800 1
                    $valueTmp = (int) $valueTmp === $valueTmp ? $valueTmp : 0;
10801
                }
10802 1
                unset($valueTmp);
10803
            } else {
10804 1
                $offset = \array_pad([$offset], $num, $offset);
10805
            }
10806
10807
            // the length
10808 1
            if ($length === null) {
10809 1
                $length = \array_fill(0, $num, 0);
10810 1
            } elseif (\is_array($length) === true) {
10811 1
                $length = \array_slice($length, 0, $num);
10812 1
                foreach ($length as &$valueTmpV2) {
10813 1
                    $valueTmpV2 = (int) $valueTmpV2 === $valueTmpV2 ? $valueTmpV2 : $num;
10814
                }
10815 1
                unset($valueTmpV2);
10816
            } else {
10817 1
                $length = \array_pad([$length], $num, $length);
10818
            }
10819
10820
            // recursive call
10821 1
            return \array_map([self::class, 'substr_replace'], $str, $replacement, $offset, $length);
10822
        }
10823
10824 10
        if (\is_array($replacement) === true) {
10825 1
            if (\count($replacement) > 0) {
10826 1
                $replacement = $replacement[0];
10827
            } else {
10828 1
                $replacement = '';
10829
            }
10830
        }
10831
10832
        // init
10833 10
        $str = (string) $str;
10834 10
        $replacement = (string) $replacement;
10835
10836 10
        if (\is_array($length) === true) {
10837
            throw new \InvalidArgumentException('Parameter "$length" can only be an array, if "$str" is also an array.');
10838
        }
10839
10840 10
        if (\is_array($offset) === true) {
10841
            throw new \InvalidArgumentException('Parameter "$offset" can only be an array, if "$str" is also an array.');
10842
        }
10843
10844 10
        if ($str === '') {
10845 1
            return $replacement;
10846
        }
10847
10848 9
        if (self::$SUPPORT['mbstring'] === true) {
10849 9
            $string_length = (int) self::strlen($str, $encoding);
10850
10851 9
            if ($offset < 0) {
10852 1
                $offset = (int) \max(0, $string_length + $offset);
10853 9
            } elseif ($offset > $string_length) {
10854 1
                $offset = $string_length;
10855
            }
10856
10857 9
            if ($length !== null && $length < 0) {
10858 1
                $length = (int) \max(0, $string_length - $offset + $length);
10859 9
            } elseif ($length === null || $length > $string_length) {
10860 4
                $length = $string_length;
10861
            }
10862
10863
            /** @noinspection AdditionOperationOnArraysInspection */
10864 9
            if (($offset + $length) > $string_length) {
10865 4
                $length = $string_length - $offset;
10866
            }
10867
10868
            /** @noinspection AdditionOperationOnArraysInspection */
10869 9
            return ((string) \mb_substr($str, 0, $offset, $encoding)) .
10870 9
                   $replacement .
10871 9
                   ((string) \mb_substr($str, $offset + $length, $string_length - $offset - $length, $encoding));
10872
        }
10873
10874
        //
10875
        // fallback for ascii only
10876
        //
10877
10878
        if (self::is_ascii($str)) {
10879
            return ($length === null) ?
10880
                \substr_replace($str, $replacement, $offset) :
10881
                \substr_replace($str, $replacement, $offset, $length);
10882
        }
10883
10884
        //
10885
        // fallback via vanilla php
10886
        //
10887
10888
        \preg_match_all('/./us', $str, $smatches);
10889
        \preg_match_all('/./us', $replacement, $rmatches);
10890
10891
        if ($length === null) {
10892
            $lengthTmp = self::strlen($str, $encoding);
10893
            if ($lengthTmp === false) {
10894
                // e.g.: non mbstring support + invalid chars
10895
                return '';
10896
            }
10897
            $length = (int) $lengthTmp;
10898
        }
10899
10900
        \array_splice($smatches[0], $offset, $length, $rmatches[0]);
10901
10902
        return \implode('', $smatches[0]);
10903
    }
10904
10905
    /**
10906
     * Removes an suffix ($needle) from end of the string ($haystack).
10907
     *
10908
     * @param string $haystack <p>The string to search in.</p>
10909
     * @param string $needle   <p>The substring to search for.</p>
10910
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
10911
     *
10912
     * @return string return the sub-string
10913
     */
10914 2
    public static function substr_right(
10915
        string $haystack,
10916
        string $needle,
10917
        string $encoding = 'UTF-8'
10918
    ): string {
10919 2
        if ($haystack === '') {
10920 2
            return '';
10921
        }
10922
10923 2
        if ($needle === '') {
10924 2
            return $haystack;
10925
        }
10926
10927
        if (
10928 2
            $encoding === 'UTF-8'
10929
            &&
10930 2
            \substr($haystack, -\strlen($needle)) === $needle
10931
        ) {
10932 2
            return (string) \mb_substr($haystack, 0, (int) \mb_strlen($haystack) - (int) \mb_strlen($needle));
10933
        }
10934
10935 2
        if (\substr($haystack, -\strlen($needle)) === $needle) {
10936
            return (string) self::substr(
10937
                $haystack,
10938
                0,
10939
                (int) self::strlen($haystack, $encoding) - (int) self::strlen($needle, $encoding),
10940
                $encoding
10941
            );
10942
        }
10943
10944 2
        return $haystack;
10945
    }
10946
10947
    /**
10948
     * Returns a case swapped version of the string.
10949
     *
10950
     * @param string $str       <p>The input string.</p>
10951
     * @param string $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
10952
     * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
10953
     *
10954
     * @return string each character's case swapped
10955
     */
10956 6
    public static function swapCase(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false): string
10957
    {
10958 6
        if ($str === '') {
10959 1
            return '';
10960
        }
10961
10962 6
        if ($cleanUtf8 === true) {
10963
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
10964
            // if invalid characters are found in $haystack before $needle
10965 2
            $str = self::clean($str);
10966
        }
10967
10968 6
        if ($encoding === 'UTF-8') {
10969 4
            return (string) (\mb_strtolower($str) ^ \mb_strtoupper($str) ^ $str);
10970
        }
10971
10972 4
        return (string) (self::strtolower($str, $encoding) ^ self::strtoupper($str, $encoding) ^ $str);
10973
    }
10974
10975
    /**
10976
     * Checks whether symfony-polyfills are used.
10977
     *
10978
     * @return bool
10979
     *              <strong>true</strong> if in use, <strong>false</strong> otherwise
10980
     */
10981
    public static function symfony_polyfill_used(): bool
10982
    {
10983
        // init
10984
        $return = false;
10985
10986
        $returnTmp = \extension_loaded('mbstring');
10987
        if ($returnTmp === false && \function_exists('mb_strlen')) {
10988
            $return = true;
10989
        }
10990
10991
        $returnTmp = \extension_loaded('iconv');
10992
        if ($returnTmp === false && \function_exists('iconv')) {
10993
            $return = true;
10994
        }
10995
10996
        return $return;
10997
    }
10998
10999
    /**
11000
     * @param string $str
11001
     * @param int    $tabLength
11002
     *
11003
     * @return string
11004
     */
11005 6
    public static function tabs_to_spaces(string $str, int $tabLength = 4): string
11006
    {
11007 6
        if ($tabLength === 4) {
11008 3
            $spaces = '    ';
11009 3
        } elseif ($tabLength === 2) {
11010 1
            $spaces = '  ';
11011
        } else {
11012 2
            $spaces = \str_repeat(' ', $tabLength);
11013
        }
11014
11015 6
        return \str_replace("\t", $spaces, $str);
11016
    }
11017
11018
    /**
11019
     * Converts the first character of each word in the string to uppercase
11020
     * and all other chars to lowercase.
11021
     *
11022
     * @param string      $str                   <p>The input string.</p>
11023
     * @param string      $encoding              [optional] <p>Set the charset for e.g. "mb_" function</p>
11024
     * @param bool        $cleanUtf8             [optional] <p>Remove non UTF-8 chars from the string.</p>
11025
     * @param string|null $lang                  [optional] <p>Set the language for special cases: az, el, lt, tr</p>
11026
     * @param bool        $tryToKeepStringLength [optional] <p>true === try to keep the string length: e.g. ẞ -> ß</p>
11027
     *
11028
     * @return string string with all characters of $str being title-cased
11029
     */
11030 5
    public static function titlecase(
11031
        string $str,
11032
        string $encoding = 'UTF-8',
11033
        bool $cleanUtf8 = false,
11034
        string $lang = null,
11035
        bool $tryToKeepStringLength = false
11036
    ): string {
11037 5
        if ($cleanUtf8 === true) {
11038
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
11039
            // if invalid characters are found in $haystack before $needle
11040
            $str = self::clean($str);
11041
        }
11042
11043 5
        if ($lang === null && $tryToKeepStringLength === false) {
11044 5
            if ($encoding === 'UTF-8') {
11045 3
                return \mb_convert_case($str, \MB_CASE_TITLE);
11046
            }
11047
11048 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
11049
11050 2
            return \mb_convert_case($str, \MB_CASE_TITLE, $encoding);
11051
        }
11052
11053
        return self::str_titleize($str, null, $encoding, false, $lang, $tryToKeepStringLength, false);
11054
    }
11055
11056
    /**
11057
     * alias for "UTF8::to_ascii()"
11058
     *
11059
     * @param string $str
11060
     * @param string $subst_chr
11061
     * @param bool   $strict
11062
     *
11063
     * @return string
11064
     *
11065
     * @see UTF8::to_ascii()
11066
     * @deprecated <p>use "UTF8::to_ascii()"</p>
11067
     */
11068 7
    public static function toAscii(string $str, string $subst_chr = '?', bool $strict = false): string
11069
    {
11070 7
        return self::to_ascii($str, $subst_chr, $strict);
11071
    }
11072
11073
    /**
11074
     * alias for "UTF8::to_iso8859()"
11075
     *
11076
     * @param string|string[] $str
11077
     *
11078
     * @return string|string[]
11079
     *
11080
     * @see UTF8::to_iso8859()
11081
     * @deprecated <p>use "UTF8::to_iso8859()"</p>
11082
     */
11083 2
    public static function toIso8859($str)
11084
    {
11085 2
        return self::to_iso8859($str);
11086
    }
11087
11088
    /**
11089
     * alias for "UTF8::to_latin1()"
11090
     *
11091
     * @param string|string[] $str
11092
     *
11093
     * @return string|string[]
11094
     *
11095
     * @see UTF8::to_latin1()
11096
     * @deprecated <p>use "UTF8::to_latin1()"</p>
11097
     */
11098 2
    public static function toLatin1($str)
11099
    {
11100 2
        return self::to_latin1($str);
11101
    }
11102
11103
    /**
11104
     * alias for "UTF8::to_utf8()"
11105
     *
11106
     * @param string|string[] $str
11107
     *
11108
     * @return string|string[]
11109
     *
11110
     * @see UTF8::to_utf8()
11111
     * @deprecated <p>use "UTF8::to_utf8()"</p>
11112
     */
11113 2
    public static function toUTF8($str)
11114
    {
11115 2
        return self::to_utf8($str);
11116
    }
11117
11118
    /**
11119
     * Convert a string into ASCII.
11120
     *
11121
     * @param string $str     <p>The input string.</p>
11122
     * @param string $unknown [optional] <p>Character use if character unknown. (default is ?)</p>
11123
     * @param bool   $strict  [optional] <p>Use "transliterator_transliterate()" from PHP-Intl | WARNING: bad
11124
     *                        performance</p>
11125
     *
11126
     * @return string
11127
     */
11128 38
    public static function to_ascii(string $str, string $unknown = '?', bool $strict = false): string
11129
    {
11130 38
        static $UTF8_TO_ASCII;
11131
11132 38
        if ($str === '') {
11133 3
            return '';
11134
        }
11135
11136
        // check if we only have ASCII, first (better performance)
11137 35
        if (self::is_ascii($str) === true) {
11138 9
            return $str;
11139
        }
11140
11141 28
        $str = self::clean(
11142 28
            $str,
11143 28
            true,
11144 28
            true,
11145 28
            true,
11146 28
            false,
11147 28
            true,
11148 28
            true
11149
        );
11150
11151
        // check again, if we only have ASCII, now ...
11152 28
        if (self::is_ascii($str) === true) {
11153 10
            return $str;
11154
        }
11155
11156
        if (
11157 19
            $strict === true
11158
            &&
11159 19
            self::$SUPPORT['intl'] === true
11160
        ) {
11161
            // INFO: https://unicode.org/cldr/utility/character.jsp?a=%E2%84%8C
11162
            /** @noinspection PhpComposerExtensionStubsInspection */
11163
            /** @noinspection UnnecessaryCastingInspection */
11164 1
            $str = (string) \transliterator_transliterate('NFKC; [:Nonspacing Mark:] Remove; NFKC; Any-Latin; Latin-ASCII;', $str);
11165
11166
            // check again, if we only have ASCII, now ...
11167 1
            if (self::is_ascii($str) === true) {
11168 1
                return $str;
11169
            }
11170
        }
11171
11172 19
        if (self::$ORD === null) {
11173
            self::$ORD = self::getData('ord');
11174
        }
11175
11176 19
        \preg_match_all('/.{1}|[^\x00]{1,1}$/us', $str, $ar);
11177 19
        $chars = $ar[0];
11178 19
        $ord = null;
11179
        /** @noinspection ForeachSourceInspection */
11180 19
        foreach ($chars as &$c) {
11181 19
            $ordC0 = self::$ORD[$c[0]];
11182
11183 19
            if ($ordC0 >= 0 && $ordC0 <= 127) {
11184 15
                continue;
11185
            }
11186
11187 19
            $ordC1 = self::$ORD[$c[1]];
11188
11189
            // ASCII - next please
11190 19
            if ($ordC0 >= 192 && $ordC0 <= 223) {
11191 17
                $ord = ($ordC0 - 192) * 64 + ($ordC1 - 128);
11192
            }
11193
11194 19
            if ($ordC0 >= 224) {
11195 8
                $ordC2 = self::$ORD[$c[2]];
11196
11197 8
                if ($ordC0 <= 239) {
11198 7
                    $ord = ($ordC0 - 224) * 4096 + ($ordC1 - 128) * 64 + ($ordC2 - 128);
11199
                }
11200
11201 8
                if ($ordC0 >= 240) {
11202 2
                    $ordC3 = self::$ORD[$c[3]];
11203
11204 2
                    if ($ordC0 <= 247) {
11205 2
                        $ord = ($ordC0 - 240) * 262144 + ($ordC1 - 128) * 4096 + ($ordC2 - 128) * 64 + ($ordC3 - 128);
11206
                    }
11207
11208 2
                    if ($ordC0 >= 248) {
11209
                        $ordC4 = self::$ORD[$c[4]];
11210
11211
                        if ($ordC0 <= 251) {
11212
                            $ord = ($ordC0 - 248) * 16777216 + ($ordC1 - 128) * 262144 + ($ordC2 - 128) * 4096 + ($ordC3 - 128) * 64 + ($ordC4 - 128);
11213
                        }
11214
11215
                        if ($ordC0 >= 252) {
11216
                            $ordC5 = self::$ORD[$c[5]];
11217
11218
                            if ($ordC0 <= 253) {
11219
                                $ord = ($ordC0 - 252) * 1073741824 + ($ordC1 - 128) * 16777216 + ($ordC2 - 128) * 262144 + ($ordC3 - 128) * 4096 + ($ordC4 - 128) * 64 + ($ordC5 - 128);
11220
                            }
11221
                        }
11222
                    }
11223
                }
11224
            }
11225
11226 19
            if ($ordC0 === 254 || $ordC0 === 255) {
11227
                $c = $unknown;
11228
11229
                continue;
11230
            }
11231
11232 19
            if ($ord === null) {
11233
                $c = $unknown;
11234
11235
                continue;
11236
            }
11237
11238 19
            $bank = $ord >> 8;
11239 19
            if (!isset($UTF8_TO_ASCII[$bank])) {
11240 9
                $UTF8_TO_ASCII[$bank] = self::getDataIfExists(\sprintf('x%02x', $bank));
11241 9
                if ($UTF8_TO_ASCII[$bank] === false) {
11242 2
                    $UTF8_TO_ASCII[$bank] = [];
11243
                }
11244
            }
11245
11246 19
            $newchar = $ord & 255;
11247
11248
            /** @noinspection NullCoalescingOperatorCanBeUsedInspection */
11249 19
            if (isset($UTF8_TO_ASCII[$bank][$newchar])) {
11250
11251
                // keep for debugging
11252
                /*
11253
                echo "file: " . sprintf('x%02x', $bank) . "\n";
11254
                echo "char: " . $c . "\n";
11255
                echo "ord: " . $ord . "\n";
11256
                echo "newchar: " . $newchar . "\n";
11257
                echo "ascii: " . $UTF8_TO_ASCII[$bank][$newchar] . "\n";
11258
                echo "bank:" . $bank . "\n\n";
11259
                 */
11260
11261 18
                $c = $UTF8_TO_ASCII[$bank][$newchar];
11262
            } else {
11263
11264
                // keep for debugging missing chars
11265
                /*
11266
                echo "file: " . sprintf('x%02x', $bank) . "\n";
11267
                echo "char: " . $c . "\n";
11268
                echo "ord: " . $ord . "\n";
11269
                echo "newchar: " . $newchar . "\n";
11270
                echo "bank:" . $bank . "\n\n";
11271
                 */
11272
11273 19
                $c = $unknown;
11274
            }
11275
        }
11276
11277 19
        return \implode('', $chars);
11278
    }
11279
11280
    /**
11281
     * @param mixed $str
11282
     *
11283
     * @return bool
11284
     */
11285 19
    public static function to_boolean($str): bool
11286
    {
11287
        // init
11288 19
        $str = (string) $str;
11289
11290 19
        if ($str === '') {
11291 2
            return false;
11292
        }
11293
11294
        // Info: http://php.net/manual/en/filter.filters.validate.php
11295
        $map = [
11296 17
            'true'  => true,
11297
            '1'     => true,
11298
            'on'    => true,
11299
            'yes'   => true,
11300
            'false' => false,
11301
            '0'     => false,
11302
            'off'   => false,
11303
            'no'    => false,
11304
        ];
11305
11306 17
        if (isset($map[$str])) {
11307 11
            return $map[$str];
11308
        }
11309
11310 6
        $key = \strtolower($str);
11311 6
        if (isset($map[$key])) {
11312 2
            return $map[$key];
11313
        }
11314
11315 4
        if (\is_numeric($str)) {
11316 2
            return ((float) $str + 0) > 0;
11317
        }
11318
11319 2
        return (bool) \trim($str);
11320
    }
11321
11322
    /**
11323
     * Convert given string to safe filename (and keep string case).
11324
     *
11325
     * @param string $string
11326
     * @param bool   $use_transliterate No transliteration, conversion etc. is done by default - unsafe characters are
11327
     *                                  simply replaced with hyphen.
11328
     * @param string $fallback_char
11329
     *
11330
     * @return string
11331
     */
11332 1
    public static function to_filename(string $string, bool $use_transliterate = false, string $fallback_char = '-'): string
11333
    {
11334 1
        if ($use_transliterate === true) {
11335 1
            $string = self::str_transliterate($string, $fallback_char);
11336
        }
11337
11338 1
        $fallback_char_escaped = \preg_quote($fallback_char, '/');
11339
11340 1
        $string = (string) \preg_replace(
11341
            [
11342 1
                '/[^' . $fallback_char_escaped . '\\.\\-a-zA-Z0-9\\s]/', // 1) remove un-needed chars
11343 1
                '/[\\s]+/u',                                           // 2) convert spaces to $fallback_char
11344 1
                '/[' . $fallback_char_escaped . ']+/u',                // 3) remove double $fallback_char's
11345
            ],
11346
            [
11347 1
                '',
11348 1
                $fallback_char,
11349 1
                $fallback_char,
11350
            ],
11351 1
            $string
11352
        );
11353
11354
        // trim "$fallback_char" from beginning and end of the string
11355 1
        return \trim($string, $fallback_char);
11356
    }
11357
11358
    /**
11359
     * Convert a string into "ISO-8859"-encoding (Latin-1).
11360
     *
11361
     * @param string|string[] $str
11362
     *
11363
     * @return string|string[]
11364
     */
11365 8
    public static function to_iso8859($str)
11366
    {
11367 8
        if (\is_array($str) === true) {
11368 2
            foreach ($str as $k => &$v) {
11369 2
                $v = self::to_iso8859($v);
11370
            }
11371
11372 2
            return $str;
11373
        }
11374
11375 8
        $str = (string) $str;
11376 8
        if ($str === '') {
11377 2
            return '';
11378
        }
11379
11380 8
        return self::utf8_decode($str);
11381
    }
11382
11383
    /**
11384
     * alias for "UTF8::to_iso8859()"
11385
     *
11386
     * @param string|string[] $str
11387
     *
11388
     * @return string|string[]
11389
     *
11390
     * @see UTF8::to_iso8859()
11391
     */
11392 2
    public static function to_latin1($str)
11393
    {
11394 2
        return self::to_iso8859($str);
11395
    }
11396
11397
    /**
11398
     * This function leaves UTF-8 characters alone, while converting almost all non-UTF8 to UTF8.
11399
     *
11400
     * <ul>
11401
     * <li>It decode UTF-8 codepoints and unicode escape sequences.</li>
11402
     * <li>It assumes that the encoding of the original string is either WINDOWS-1252 or ISO-8859.</li>
11403
     * <li>WARNING: It does not remove invalid UTF-8 characters, so you maybe need to use "UTF8::clean()" for this
11404
     * case.</li>
11405
     * </ul>
11406
     *
11407
     * @param string|string[] $str                    <p>Any string or array.</p>
11408
     * @param bool            $decodeHtmlEntityToUtf8 <p>Set to true, if you need to decode html-entities.</p>
11409
     *
11410
     * @return string|string[] the UTF-8 encoded string
11411
     */
11412 41
    public static function to_utf8($str, bool $decodeHtmlEntityToUtf8 = false)
11413
    {
11414 41
        if (\is_array($str) === true) {
11415 4
            foreach ($str as $k => &$v) {
11416 4
                $v = self::to_utf8($v, $decodeHtmlEntityToUtf8);
11417
            }
11418
11419 4
            return $str;
11420
        }
11421
11422 41
        $str = (string) $str;
11423 41
        if ($str === '') {
11424 6
            return $str;
11425
        }
11426
11427 41
        $max = \strlen($str);
11428 41
        $buf = '';
11429
11430 41
        for ($i = 0; $i < $max; ++$i) {
11431 41
            $c1 = $str[$i];
11432
11433 41
            if ($c1 >= "\xC0") { // should be converted to UTF8, if it's not UTF8 already
11434
11435 37
                if ($c1 <= "\xDF") { // looks like 2 bytes UTF8
11436
11437 34
                    $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
11438
11439 34
                    if ($c2 >= "\x80" && $c2 <= "\xBF") { // yeah, almost sure it's UTF8 already
11440 20
                        $buf .= $c1 . $c2;
11441 20
                        ++$i;
11442
                    } else { // not valid UTF8 - convert it
11443 34
                        $buf .= self::to_utf8_convert_helper($c1);
11444
                    }
11445 34
                } elseif ($c1 >= "\xE0" && $c1 <= "\xEF") { // looks like 3 bytes UTF8
11446
11447 33
                    $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
11448 33
                    $c3 = $i + 2 >= $max ? "\x00" : $str[$i + 2];
11449
11450 33
                    if ($c2 >= "\x80" && $c2 <= "\xBF" && $c3 >= "\x80" && $c3 <= "\xBF") { // yeah, almost sure it's UTF8 already
11451 15
                        $buf .= $c1 . $c2 . $c3;
11452 15
                        $i += 2;
11453
                    } else { // not valid UTF8 - convert it
11454 33
                        $buf .= self::to_utf8_convert_helper($c1);
11455
                    }
11456 26
                } elseif ($c1 >= "\xF0" && $c1 <= "\xF7") { // looks like 4 bytes UTF8
11457
11458 26
                    $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
11459 26
                    $c3 = $i + 2 >= $max ? "\x00" : $str[$i + 2];
11460 26
                    $c4 = $i + 3 >= $max ? "\x00" : $str[$i + 3];
11461
11462 26
                    if ($c2 >= "\x80" && $c2 <= "\xBF" && $c3 >= "\x80" && $c3 <= "\xBF" && $c4 >= "\x80" && $c4 <= "\xBF") { // yeah, almost sure it's UTF8 already
11463 8
                        $buf .= $c1 . $c2 . $c3 . $c4;
11464 8
                        $i += 3;
11465
                    } else { // not valid UTF8 - convert it
11466 26
                        $buf .= self::to_utf8_convert_helper($c1);
11467
                    }
11468
                } else { // doesn't look like UTF8, but should be converted
11469
11470 37
                    $buf .= self::to_utf8_convert_helper($c1);
11471
                }
11472 38
            } elseif (($c1 & "\xC0") === "\x80") { // needs conversion
11473
11474 4
                $buf .= self::to_utf8_convert_helper($c1);
11475
            } else { // it doesn't need conversion
11476
11477 38
                $buf .= $c1;
11478
            }
11479
        }
11480
11481
        // decode unicode escape sequences + unicode surrogate pairs
11482 41
        $buf = \preg_replace_callback(
11483 41
            '/\\\\u([dD][89abAB][0-9a-fA-F]{2})\\\\u([dD][cdefCDEF][\da-fA-F]{2})|\\\\u([0-9a-fA-F]{4})/',
11484
            /**
11485
             * @param array $matches
11486
             *
11487
             * @return string
11488
             */
11489
            static function (array $matches): string {
11490 12
                if (isset($matches[3])) {
11491 12
                    $cp = (int) \hexdec($matches[3]);
11492
                } else {
11493
                    // http://unicode.org/faq/utf_bom.html#utf16-4
11494
                    $cp = ((int) \hexdec($matches[1]) << 10)
11495
                          + (int) \hexdec($matches[2])
11496
                          + 0x10000
11497
                          - (0xD800 << 10)
11498
                          - 0xDC00;
11499
                }
11500
11501
                // https://github.com/php/php-src/blob/php-7.3.2/ext/standard/html.c#L471
11502
                //
11503
                // php_utf32_utf8(unsigned char *buf, unsigned k)
11504
11505 12
                if ($cp < 0x80) {
11506 8
                    return (string) self::chr($cp);
11507
                }
11508
11509 9
                if ($cp < 0xA0) {
11510
                    /** @noinspection UnnecessaryCastingInspection */
11511
                    return (string) self::chr(0xC0 | $cp >> 6) . (string) self::chr(0x80 | $cp & 0x3F);
11512
                }
11513
11514 9
                return self::decimal_to_chr($cp);
11515 41
            },
11516 41
            $buf
11517
        );
11518
11519 41
        if ($buf === null) {
11520
            return '';
11521
        }
11522
11523
        // decode UTF-8 codepoints
11524 41
        if ($decodeHtmlEntityToUtf8 === true) {
11525 2
            $buf = self::html_entity_decode($buf);
11526
        }
11527
11528 41
        return $buf;
11529
    }
11530
11531
    /**
11532
     * Strip whitespace or other characters from beginning or end of a UTF-8 string.
11533
     *
11534
     * INFO: This is slower then "trim()"
11535
     *
11536
     * We can only use the original-function, if we use <= 7-Bit in the string / chars
11537
     * but the check for ACSII (7-Bit) cost more time, then we can safe here.
11538
     *
11539
     * @param string      $str   <p>The string to be trimmed</p>
11540
     * @param string|null $chars [optional] <p>Optional characters to be stripped</p>
11541
     *
11542
     * @return string the trimmed string
11543
     */
11544 55
    public static function trim(string $str = '', string $chars = null): string
11545
    {
11546 55
        if ($str === '') {
11547 9
            return '';
11548
        }
11549
11550 48
        if ($chars) {
11551 27
            $chars = \preg_quote($chars, '/');
11552 27
            $pattern = "^[${chars}]+|[${chars}]+\$";
11553
        } else {
11554 21
            $pattern = '^[\\s]+|[\\s]+$';
11555
        }
11556
11557 48
        if (self::$SUPPORT['mbstring'] === true) {
11558
            /** @noinspection PhpComposerExtensionStubsInspection */
11559 48
            return (string) \mb_ereg_replace($pattern, '', $str);
11560
        }
11561
11562 8
        return self::regex_replace($str, $pattern, '', '', '/');
11563
    }
11564
11565
    /**
11566
     * Makes string's first char uppercase.
11567
     *
11568
     * @param string      $str                   <p>The input string.</p>
11569
     * @param string      $encoding              [optional] <p>Set the charset for e.g. "mb_" function</p>
11570
     * @param bool        $cleanUtf8             [optional] <p>Remove non UTF-8 chars from the string.</p>
11571
     * @param string|null $lang                  [optional] <p>Set the language for special cases: az, el, lt, tr</p>
11572
     * @param bool        $tryToKeepStringLength [optional] <p>true === try to keep the string length: e.g. ẞ -> ß</p>
11573
     *
11574
     * @return string the resulting string
11575
     */
11576 69
    public static function ucfirst(
11577
        string $str,
11578
        string $encoding = 'UTF-8',
11579
        bool $cleanUtf8 = false,
11580
        string $lang = null,
11581
        bool $tryToKeepStringLength = false
11582
    ): string {
11583 69
        if ($str === '') {
11584 3
            return '';
11585
        }
11586
11587 68
        if ($cleanUtf8 === true) {
11588
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
11589
            // if invalid characters are found in $haystack before $needle
11590 1
            $str = self::clean($str);
11591
        }
11592
11593 68
        $useMbFunction = $lang === null && $tryToKeepStringLength === false;
11594
11595 68
        if ($encoding === 'UTF-8') {
11596 22
            $strPartTwo = (string) \mb_substr($str, 1);
11597
11598 22
            if ($useMbFunction === true) {
11599 22
                $strPartOne = \mb_strtoupper(
11600 22
                    (string) \mb_substr($str, 0, 1)
11601
                );
11602
            } else {
11603
                $strPartOne = self::strtoupper(
11604
                    (string) \mb_substr($str, 0, 1),
11605
                    $encoding,
11606
                    false,
11607
                    $lang,
11608 22
                    $tryToKeepStringLength
11609
                );
11610
            }
11611
        } else {
11612 47
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
11613
11614 47
            $strPartTwo = (string) self::substr($str, 1, null, $encoding);
11615
11616 47
            if ($useMbFunction === true) {
11617 47
                $strPartOne = \mb_strtoupper(
11618 47
                    (string) \mb_substr($str, 0, 1, $encoding),
11619 47
                    $encoding
11620
                );
11621
            } else {
11622
                $strPartOne = self::strtoupper(
11623
                    (string) self::substr($str, 0, 1, $encoding),
11624
                    $encoding,
11625
                    false,
11626
                    $lang,
11627
                    $tryToKeepStringLength
11628
                );
11629
            }
11630
        }
11631
11632 68
        return $strPartOne . $strPartTwo;
11633
    }
11634
11635
    /**
11636
     * alias for "UTF8::ucfirst()"
11637
     *
11638
     * @param string $str
11639
     * @param string $encoding
11640
     * @param bool   $cleanUtf8
11641
     *
11642
     * @return string
11643
     *
11644
     * @see UTF8::ucfirst()
11645
     */
11646 1
    public static function ucword(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false): string
11647
    {
11648 1
        return self::ucfirst($str, $encoding, $cleanUtf8);
11649
    }
11650
11651
    /**
11652
     * Uppercase for all words in the string.
11653
     *
11654
     * @param string   $str        <p>The input string.</p>
11655
     * @param string[] $exceptions [optional] <p>Exclusion for some words.</p>
11656
     * @param string   $charlist   [optional] <p>Additional chars that contains to words and do not start a new
11657
     *                             word.</p>
11658
     * @param string   $encoding   [optional] <p>Set the charset.</p>
11659
     * @param bool     $cleanUtf8  [optional] <p>Remove non UTF-8 chars from the string.</p>
11660
     *
11661
     * @return string
11662
     */
11663 8
    public static function ucwords(
11664
        string $str,
11665
        array $exceptions = [],
11666
        string $charlist = '',
11667
        string $encoding = 'UTF-8',
11668
        bool $cleanUtf8 = false
11669
    ): string {
11670 8
        if (!$str) {
11671 2
            return '';
11672
        }
11673
11674
        // INFO: mb_convert_case($str, MB_CASE_TITLE);
11675
        // -> MB_CASE_TITLE didn't only uppercase the first letter, it also lowercase all other letters
11676
11677 7
        if ($cleanUtf8 === true) {
11678
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
11679
            // if invalid characters are found in $haystack before $needle
11680 1
            $str = self::clean($str);
11681
        }
11682
11683 7
        $usePhpDefaultFunctions = !(bool) ($charlist . \implode('', $exceptions));
11684
11685
        if (
11686 7
            $usePhpDefaultFunctions === true
11687
            &&
11688 7
            self::is_ascii($str) === true
11689
        ) {
11690
            return \ucwords($str);
11691
        }
11692
11693 7
        $words = self::str_to_words($str, $charlist);
11694 7
        $useExceptions = \count($exceptions) > 0;
11695
11696 7
        foreach ($words as &$word) {
11697 7
            if (!$word) {
11698 7
                continue;
11699
            }
11700
11701
            if (
11702 7
                $useExceptions === false
11703
                ||
11704 7
                !\in_array($word, $exceptions, true)
11705
            ) {
11706 7
                $word = self::ucfirst($word, $encoding);
11707
            }
11708
        }
11709
11710 7
        return \implode('', $words);
11711
    }
11712
11713
    /**
11714
     * Multi decode html entity & fix urlencoded-win1252-chars.
11715
     *
11716
     * e.g:
11717
     * 'test+test'                     => 'test test'
11718
     * 'D&#252;sseldorf'               => 'Düsseldorf'
11719
     * 'D%FCsseldorf'                  => 'Düsseldorf'
11720
     * 'D&#xFC;sseldorf'               => 'Düsseldorf'
11721
     * 'D%26%23xFC%3Bsseldorf'         => 'Düsseldorf'
11722
     * 'Düsseldorf'                   => 'Düsseldorf'
11723
     * 'D%C3%BCsseldorf'               => 'Düsseldorf'
11724
     * 'D%C3%83%C2%BCsseldorf'         => 'Düsseldorf'
11725
     * 'D%25C3%2583%25C2%25BCsseldorf' => 'Düsseldorf'
11726
     *
11727
     * @param string $str          <p>The input string.</p>
11728
     * @param bool   $multi_decode <p>Decode as often as possible.</p>
11729
     *
11730
     * @return string
11731
     */
11732 4
    public static function urldecode(string $str, bool $multi_decode = true): string
11733
    {
11734 4
        if ($str === '') {
11735 3
            return '';
11736
        }
11737
11738
        if (
11739 4
            \strpos($str, '&') === false
11740
            &&
11741 4
            \strpos($str, '%') === false
11742
            &&
11743 4
            \strpos($str, '+') === false
11744
            &&
11745 4
            \strpos($str, '\u') === false
11746
        ) {
11747 3
            return self::fix_simple_utf8($str);
11748
        }
11749
11750 4
        $str = self::urldecode_unicode_helper($str);
11751
11752
        do {
11753 4
            $str_compare = $str;
11754
11755
            /**
11756
             * @psalm-suppress PossiblyInvalidArgument
11757
             */
11758 4
            $str = self::fix_simple_utf8(
11759 4
                \urldecode(
11760 4
                    self::html_entity_decode(
11761 4
                        self::to_utf8($str),
11762 4
                        \ENT_QUOTES | \ENT_HTML5
11763
                    )
11764
                )
11765
            );
11766 4
        } while ($multi_decode === true && $str_compare !== $str);
11767
11768 4
        return $str;
11769
    }
11770
11771
    /**
11772
     * Return a array with "urlencoded"-win1252 -> UTF-8
11773
     *
11774
     * @return string[]
11775
     *
11776
     * @deprecated <p>use the "UTF8::urldecode()" function to decode a string</p>
11777
     */
11778 2
    public static function urldecode_fix_win1252_chars(): array
11779
    {
11780
        return [
11781 2
            '%20' => ' ',
11782
            '%21' => '!',
11783
            '%22' => '"',
11784
            '%23' => '#',
11785
            '%24' => '$',
11786
            '%25' => '%',
11787
            '%26' => '&',
11788
            '%27' => "'",
11789
            '%28' => '(',
11790
            '%29' => ')',
11791
            '%2A' => '*',
11792
            '%2B' => '+',
11793
            '%2C' => ',',
11794
            '%2D' => '-',
11795
            '%2E' => '.',
11796
            '%2F' => '/',
11797
            '%30' => '0',
11798
            '%31' => '1',
11799
            '%32' => '2',
11800
            '%33' => '3',
11801
            '%34' => '4',
11802
            '%35' => '5',
11803
            '%36' => '6',
11804
            '%37' => '7',
11805
            '%38' => '8',
11806
            '%39' => '9',
11807
            '%3A' => ':',
11808
            '%3B' => ';',
11809
            '%3C' => '<',
11810
            '%3D' => '=',
11811
            '%3E' => '>',
11812
            '%3F' => '?',
11813
            '%40' => '@',
11814
            '%41' => 'A',
11815
            '%42' => 'B',
11816
            '%43' => 'C',
11817
            '%44' => 'D',
11818
            '%45' => 'E',
11819
            '%46' => 'F',
11820
            '%47' => 'G',
11821
            '%48' => 'H',
11822
            '%49' => 'I',
11823
            '%4A' => 'J',
11824
            '%4B' => 'K',
11825
            '%4C' => 'L',
11826
            '%4D' => 'M',
11827
            '%4E' => 'N',
11828
            '%4F' => 'O',
11829
            '%50' => 'P',
11830
            '%51' => 'Q',
11831
            '%52' => 'R',
11832
            '%53' => 'S',
11833
            '%54' => 'T',
11834
            '%55' => 'U',
11835
            '%56' => 'V',
11836
            '%57' => 'W',
11837
            '%58' => 'X',
11838
            '%59' => 'Y',
11839
            '%5A' => 'Z',
11840
            '%5B' => '[',
11841
            '%5C' => '\\',
11842
            '%5D' => ']',
11843
            '%5E' => '^',
11844
            '%5F' => '_',
11845
            '%60' => '`',
11846
            '%61' => 'a',
11847
            '%62' => 'b',
11848
            '%63' => 'c',
11849
            '%64' => 'd',
11850
            '%65' => 'e',
11851
            '%66' => 'f',
11852
            '%67' => 'g',
11853
            '%68' => 'h',
11854
            '%69' => 'i',
11855
            '%6A' => 'j',
11856
            '%6B' => 'k',
11857
            '%6C' => 'l',
11858
            '%6D' => 'm',
11859
            '%6E' => 'n',
11860
            '%6F' => 'o',
11861
            '%70' => 'p',
11862
            '%71' => 'q',
11863
            '%72' => 'r',
11864
            '%73' => 's',
11865
            '%74' => 't',
11866
            '%75' => 'u',
11867
            '%76' => 'v',
11868
            '%77' => 'w',
11869
            '%78' => 'x',
11870
            '%79' => 'y',
11871
            '%7A' => 'z',
11872
            '%7B' => '{',
11873
            '%7C' => '|',
11874
            '%7D' => '}',
11875
            '%7E' => '~',
11876
            '%7F' => '',
11877
            '%80' => '`',
11878
            '%81' => '',
11879
            '%82' => '‚',
11880
            '%83' => 'ƒ',
11881
            '%84' => '„',
11882
            '%85' => '…',
11883
            '%86' => '†',
11884
            '%87' => '‡',
11885
            '%88' => 'ˆ',
11886
            '%89' => '‰',
11887
            '%8A' => 'Š',
11888
            '%8B' => '‹',
11889
            '%8C' => 'Œ',
11890
            '%8D' => '',
11891
            '%8E' => 'Ž',
11892
            '%8F' => '',
11893
            '%90' => '',
11894
            '%91' => '‘',
11895
            '%92' => '’',
11896
            '%93' => '“',
11897
            '%94' => '”',
11898
            '%95' => '•',
11899
            '%96' => '–',
11900
            '%97' => '—',
11901
            '%98' => '˜',
11902
            '%99' => '™',
11903
            '%9A' => 'š',
11904
            '%9B' => '›',
11905
            '%9C' => 'œ',
11906
            '%9D' => '',
11907
            '%9E' => 'ž',
11908
            '%9F' => 'Ÿ',
11909
            '%A0' => '',
11910
            '%A1' => '¡',
11911
            '%A2' => '¢',
11912
            '%A3' => '£',
11913
            '%A4' => '¤',
11914
            '%A5' => '¥',
11915
            '%A6' => '¦',
11916
            '%A7' => '§',
11917
            '%A8' => '¨',
11918
            '%A9' => '©',
11919
            '%AA' => 'ª',
11920
            '%AB' => '«',
11921
            '%AC' => '¬',
11922
            '%AD' => '',
11923
            '%AE' => '®',
11924
            '%AF' => '¯',
11925
            '%B0' => '°',
11926
            '%B1' => '±',
11927
            '%B2' => '²',
11928
            '%B3' => '³',
11929
            '%B4' => '´',
11930
            '%B5' => 'µ',
11931
            '%B6' => '¶',
11932
            '%B7' => '·',
11933
            '%B8' => '¸',
11934
            '%B9' => '¹',
11935
            '%BA' => 'º',
11936
            '%BB' => '»',
11937
            '%BC' => '¼',
11938
            '%BD' => '½',
11939
            '%BE' => '¾',
11940
            '%BF' => '¿',
11941
            '%C0' => 'À',
11942
            '%C1' => 'Á',
11943
            '%C2' => 'Â',
11944
            '%C3' => 'Ã',
11945
            '%C4' => 'Ä',
11946
            '%C5' => 'Å',
11947
            '%C6' => 'Æ',
11948
            '%C7' => 'Ç',
11949
            '%C8' => 'È',
11950
            '%C9' => 'É',
11951
            '%CA' => 'Ê',
11952
            '%CB' => 'Ë',
11953
            '%CC' => 'Ì',
11954
            '%CD' => 'Í',
11955
            '%CE' => 'Î',
11956
            '%CF' => 'Ï',
11957
            '%D0' => 'Ð',
11958
            '%D1' => 'Ñ',
11959
            '%D2' => 'Ò',
11960
            '%D3' => 'Ó',
11961
            '%D4' => 'Ô',
11962
            '%D5' => 'Õ',
11963
            '%D6' => 'Ö',
11964
            '%D7' => '×',
11965
            '%D8' => 'Ø',
11966
            '%D9' => 'Ù',
11967
            '%DA' => 'Ú',
11968
            '%DB' => 'Û',
11969
            '%DC' => 'Ü',
11970
            '%DD' => 'Ý',
11971
            '%DE' => 'Þ',
11972
            '%DF' => 'ß',
11973
            '%E0' => 'à',
11974
            '%E1' => 'á',
11975
            '%E2' => 'â',
11976
            '%E3' => 'ã',
11977
            '%E4' => 'ä',
11978
            '%E5' => 'å',
11979
            '%E6' => 'æ',
11980
            '%E7' => 'ç',
11981
            '%E8' => 'è',
11982
            '%E9' => 'é',
11983
            '%EA' => 'ê',
11984
            '%EB' => 'ë',
11985
            '%EC' => 'ì',
11986
            '%ED' => 'í',
11987
            '%EE' => 'î',
11988
            '%EF' => 'ï',
11989
            '%F0' => 'ð',
11990
            '%F1' => 'ñ',
11991
            '%F2' => 'ò',
11992
            '%F3' => 'ó',
11993
            '%F4' => 'ô',
11994
            '%F5' => 'õ',
11995
            '%F6' => 'ö',
11996
            '%F7' => '÷',
11997
            '%F8' => 'ø',
11998
            '%F9' => 'ù',
11999
            '%FA' => 'ú',
12000
            '%FB' => 'û',
12001
            '%FC' => 'ü',
12002
            '%FD' => 'ý',
12003
            '%FE' => 'þ',
12004
            '%FF' => 'ÿ',
12005
        ];
12006
    }
12007
12008
    /**
12009
     * Decodes an UTF-8 string to ISO-8859-1.
12010
     *
12011
     * @param string $str           <p>The input string.</p>
12012
     * @param bool   $keepUtf8Chars
12013
     *
12014
     * @return string
12015
     */
12016 14
    public static function utf8_decode(string $str, bool $keepUtf8Chars = false): string
12017
    {
12018 14
        if ($str === '') {
12019 6
            return '';
12020
        }
12021
12022
        // save for later comparision
12023 14
        $str_backup = $str;
12024 14
        $len = \strlen($str);
12025
12026 14
        if (self::$ORD === null) {
12027
            self::$ORD = self::getData('ord');
12028
        }
12029
12030 14
        if (self::$CHR === null) {
12031
            self::$CHR = self::getData('chr');
12032
        }
12033
12034 14
        $noCharFound = '?';
12035
        /** @noinspection ForeachInvariantsInspection */
12036 14
        for ($i = 0, $j = 0; $i < $len; ++$i, ++$j) {
12037 14
            switch ($str[$i] & "\xF0") {
12038 14
                case "\xC0":
12039 13
                case "\xD0":
12040 13
                    $c = (self::$ORD[$str[$i] & "\x1F"] << 6) | self::$ORD[$str[++$i] & "\x3F"];
12041 13
                    $str[$j] = $c < 256 ? self::$CHR[$c] : $noCharFound;
12042
12043 13
                    break;
12044
12045
                /** @noinspection PhpMissingBreakStatementInspection */
12046 13
                case "\xF0":
12047
                    ++$i;
12048
12049
                // no break
12050
12051 13
                case "\xE0":
12052 11
                    $str[$j] = $noCharFound;
12053 11
                    $i += 2;
12054
12055 11
                    break;
12056
12057
                default:
12058 12
                    $str[$j] = $str[$i];
12059
            }
12060
        }
12061
12062 14
        $return = \substr($str, 0, $j);
12063 14
        if ($return === false) {
12064
            $return = '';
12065
        }
12066
12067
        if (
12068 14
            $keepUtf8Chars === true
12069
            &&
12070 14
            self::strlen($return) >= (int) self::strlen($str_backup)
12071
        ) {
12072 2
            return $str_backup;
12073
        }
12074
12075 14
        return $return;
12076
    }
12077
12078
    /**
12079
     * Encodes an ISO-8859-1 string to UTF-8.
12080
     *
12081
     * @param string $str <p>The input string.</p>
12082
     *
12083
     * @return string
12084
     */
12085 14
    public static function utf8_encode(string $str): string
12086
    {
12087 14
        if ($str === '') {
12088 14
            return '';
12089
        }
12090
12091 14
        $str = \utf8_encode($str);
12092
12093
        // the polyfill maybe return false
12094
        /** @noinspection CallableParameterUseCaseInTypeContextInspection */
12095
        /** @psalm-suppress TypeDoesNotContainType */
12096 14
        if ($str === false) {
12097
            return '';
12098
        }
12099
12100 14
        return $str;
12101
    }
12102
12103
    /**
12104
     * fix -> utf8-win1252 chars
12105
     *
12106
     * @param string $str <p>The input string.</p>
12107
     *
12108
     * @return string
12109
     *
12110
     * @deprecated <p>use "UTF8::fix_simple_utf8()"</p>
12111
     */
12112 2
    public static function utf8_fix_win1252_chars(string $str): string
12113
    {
12114 2
        return self::fix_simple_utf8($str);
12115
    }
12116
12117
    /**
12118
     * Returns an array with all utf8 whitespace characters.
12119
     *
12120
     * @see http://www.bogofilter.org/pipermail/bogofilter/2003-March/001889.html
12121
     *
12122
     * @return string[]
12123
     *                  An array with all known whitespace characters as values and the type of whitespace as keys
12124
     *                  as defined in above URL
12125
     */
12126 2
    public static function whitespace_table(): array
12127
    {
12128 2
        return self::$WHITESPACE_TABLE;
12129
    }
12130
12131
    /**
12132
     * Limit the number of words in a string.
12133
     *
12134
     * @param string $str      <p>The input string.</p>
12135
     * @param int    $limit    <p>The limit of words as integer.</p>
12136
     * @param string $strAddOn <p>Replacement for the striped string.</p>
12137
     *
12138
     * @return string
12139
     */
12140 2
    public static function words_limit(string $str, int $limit = 100, string $strAddOn = '…'): string
12141
    {
12142 2
        if ($str === '' || $limit < 1) {
12143 2
            return '';
12144
        }
12145
12146 2
        \preg_match('/^\\s*+(?:[^\\s]++\\s*+){1,' . $limit . '}/u', $str, $matches);
12147
12148
        if (
12149 2
            !isset($matches[0])
12150
            ||
12151 2
            \mb_strlen($str) === (int) \mb_strlen($matches[0])
12152
        ) {
12153 2
            return $str;
12154
        }
12155
12156 2
        return \rtrim($matches[0]) . $strAddOn;
12157
    }
12158
12159
    /**
12160
     * Wraps a string to a given number of characters
12161
     *
12162
     * @see http://php.net/manual/en/function.wordwrap.php
12163
     *
12164
     * @param string $str   <p>The input string.</p>
12165
     * @param int    $width [optional] <p>The column width.</p>
12166
     * @param string $break [optional] <p>The line is broken using the optional break parameter.</p>
12167
     * @param bool   $cut   [optional] <p>
12168
     *                      If the cut is set to true, the string is
12169
     *                      always wrapped at or before the specified width. So if you have
12170
     *                      a word that is larger than the given width, it is broken apart.
12171
     *                      </p>
12172
     *
12173
     * @return string
12174
     *                <p>The given string wrapped at the specified column.</p>
12175
     */
12176 12
    public static function wordwrap(
12177
        string $str,
12178
        int $width = 75,
12179
        string $break = "\n",
12180
        bool $cut = false
12181
    ): string {
12182 12
        if ($str === '' || $break === '') {
12183 4
            return '';
12184
        }
12185
12186 10
        $strSplit = \explode($break, $str);
12187 10
        if ($strSplit === false) {
12188
            return '';
12189
        }
12190
12191 10
        $chars = [];
12192 10
        $wordSplit = '';
12193 10
        foreach ($strSplit as $i => $iValue) {
12194 10
            if ($i) {
12195 3
                $chars[] = $break;
12196 3
                $wordSplit .= '#';
12197
            }
12198
12199 10
            foreach (self::str_split($iValue) as $c) {
12200 10
                $chars[] = $c;
12201 10
                if ($c === ' ') {
12202 3
                    $wordSplit .= ' ';
12203
                } else {
12204 10
                    $wordSplit .= '?';
12205
                }
12206
            }
12207
        }
12208
12209 10
        $strReturn = '';
12210 10
        $j = 0;
12211 10
        $b = -1;
12212 10
        $i = -1;
12213 10
        $wordSplit = \wordwrap($wordSplit, $width, '#', $cut);
12214
12215 10
        $max = \mb_strlen($wordSplit);
12216 10
        while (($b = \mb_strpos($wordSplit, '#', $b + 1)) !== false) {
12217 8
            for (++$i; $i < $b; ++$i) {
12218 8
                $strReturn .= $chars[$j];
12219 8
                unset($chars[$j++]);
12220
12221
                // prevent endless loop, e.g. if there is a error in the "mb_*" polyfill
12222 8
                if ($i > $max) {
12223
                    break 2;
12224
                }
12225
            }
12226
12227
            if (
12228 8
                $break === $chars[$j]
12229
                ||
12230 8
                $chars[$j] === ' '
12231
            ) {
12232 5
                unset($chars[$j++]);
12233
            }
12234
12235 8
            $strReturn .= $break;
12236
12237
            // prevent endless loop, e.g. if there is a error in the "mb_*" polyfill
12238 8
            if ($b > $max) {
12239
                break;
12240
            }
12241
        }
12242
12243 10
        return $strReturn . \implode('', $chars);
12244
    }
12245
12246
    /**
12247
     * Line-Wrap the string after $limit, but split the string by "$delimiter" before ...
12248
     *    ... so that we wrap the per line.
12249
     *
12250
     * @param string      $str           <p>The input string.</p>
12251
     * @param int         $width         [optional] <p>The column width.</p>
12252
     * @param string      $break         [optional] <p>The line is broken using the optional break parameter.</p>
12253
     * @param bool        $cut           [optional] <p>
12254
     *                                   If the cut is set to true, the string is
12255
     *                                   always wrapped at or before the specified width. So if you have
12256
     *                                   a word that is larger than the given width, it is broken apart.
12257
     *                                   </p>
12258
     * @param bool        $addFinalBreak [optional] <p>
12259
     *                                   If this flag is true, then the method will add a $break at the end
12260
     *                                   of the result string.
12261
     *                                   </p>
12262
     * @param string|null $delimiter     [optional] <p>
12263
     *                                   You can change the default behavior, where we split the string by newline.
12264
     *                                   </p>
12265
     *
12266
     * @return string
12267
     */
12268 1
    public static function wordwrap_per_line(
12269
        string $str,
12270
        int $width = 75,
12271
        string $break = "\n",
12272
        bool $cut = false,
12273
        bool $addFinalBreak = true,
12274
        string $delimiter = null
12275
    ): string {
12276 1
        if ($delimiter === null) {
12277 1
            $strings = \preg_split('/\\r\\n|\\r|\\n/', $str);
12278
        } else {
12279 1
            $strings = \explode($delimiter, $str);
12280
        }
12281
12282 1
        $stringArray = [];
12283 1
        if ($strings !== false) {
12284 1
            foreach ($strings as $value) {
12285 1
                $stringArray[] = self::wordwrap($value, $width, $break, $cut);
12286
            }
12287
        }
12288
12289 1
        if ($addFinalBreak) {
12290 1
            $finalBreak = $break;
12291
        } else {
12292 1
            $finalBreak = '';
12293
        }
12294
12295 1
        return \implode($delimiter ?? "\n", $stringArray) . $finalBreak;
12296
    }
12297
12298
    /**
12299
     * Returns an array of Unicode White Space characters.
12300
     *
12301
     * @return string[] an array with numeric code point as key and White Space Character as value
12302
     */
12303 2
    public static function ws(): array
12304
    {
12305 2
        return self::$WHITESPACE;
12306
    }
12307
12308
    /**
12309
     * @param string $str
12310
     * @param bool   $useLower     <p>Use uppercase by default, otherwise use lowecase.</p>
12311
     * @param bool   $fullCaseFold <p>Convert not only common cases.</p>
12312
     *
12313
     * @return string
12314
     */
12315 33
    private static function fixStrCaseHelper(string $str, $useLower = false, $fullCaseFold = false): string
12316
    {
12317 33
        $upper = self::$COMMON_CASE_FOLD['upper'];
12318 33
        $lower = self::$COMMON_CASE_FOLD['lower'];
12319
12320 33
        if ($useLower === true) {
12321 2
            $str = \str_replace(
12322 2
                $upper,
12323 2
                $lower,
12324 2
                $str
12325
            );
12326
        } else {
12327 31
            $str = \str_replace(
12328 31
                $lower,
12329 31
                $upper,
12330 31
                $str
12331
            );
12332
        }
12333
12334 33
        if ($fullCaseFold) {
12335 31
            static $FULL_CASE_FOLD = null;
12336 31
            if ($FULL_CASE_FOLD === null) {
12337 1
                $FULL_CASE_FOLD = self::getData('caseFolding_full');
12338
            }
12339
12340 31
            if ($useLower === true) {
12341 2
                $str = \str_replace($FULL_CASE_FOLD[0], $FULL_CASE_FOLD[1], $str);
12342
            } else {
12343 29
                $str = \str_replace($FULL_CASE_FOLD[1], $FULL_CASE_FOLD[0], $str);
12344
            }
12345
        }
12346
12347 33
        return $str;
12348
    }
12349
12350
    /**
12351
     * get data from "/data/*.php"
12352
     *
12353
     * @param string $file
12354
     *
12355
     * @return array
12356
     */
12357 6
    private static function getData(string $file): array
12358
    {
12359
        /** @noinspection PhpIncludeInspection */
12360
        /** @noinspection UsingInclusionReturnValueInspection */
12361
        /** @psalm-suppress UnresolvableInclude */
12362 6
        return include __DIR__ . '/data/' . $file . '.php';
12363
    }
12364
12365
    /**
12366
     * get data from "/data/*.php"
12367
     *
12368
     * @param string $file
12369
     *
12370
     * @return false|mixed will return false on error
12371
     */
12372 9
    private static function getDataIfExists(string $file)
12373
    {
12374 9
        $file = __DIR__ . '/data/' . $file . '.php';
12375 9
        if (\file_exists($file)) {
12376
            /** @noinspection PhpIncludeInspection */
12377
            /** @noinspection UsingInclusionReturnValueInspection */
12378 8
            return include $file;
12379
        }
12380
12381 2
        return false;
12382
    }
12383
12384
    /**
12385
     * @return true|null
12386
     */
12387 12
    private static function initEmojiData()
12388
    {
12389 12
        if (self::$EMOJI_KEYS_CACHE === null) {
12390 1
            if (self::$EMOJI === null) {
12391 1
                self::$EMOJI = self::getData('emoji');
12392
            }
12393
12394 1
            \uksort(
12395 1
                self::$EMOJI,
12396
                static function (string $a, string $b): int {
12397 1
                    return \strlen($b) <=> \strlen($a);
12398 1
                }
12399
            );
12400
12401 1
            self::$EMOJI_KEYS_CACHE = \array_keys(self::$EMOJI);
12402 1
            self::$EMOJI_VALUES_CACHE = \array_values(self::$EMOJI);
12403
12404 1
            foreach (self::$EMOJI_KEYS_CACHE as $key) {
12405 1
                $tmpKey = \crc32($key);
12406 1
                self::$EMOJI_KEYS_REVERSIBLE_CACHE[] = '_-_PORTABLE_UTF8_-_' . $tmpKey . '_-_' . \strrev((string) $tmpKey) . '_-_8FTU_ELBATROP_-_';
12407
            }
12408
12409 1
            return true;
12410
        }
12411
12412 12
        return null;
12413
    }
12414
12415
    /**
12416
     * Checks whether mbstring "overloaded" is active on the server.
12417
     *
12418
     * @return bool
12419
     */
12420
    private static function mbstring_overloaded(): bool
12421
    {
12422
        /**
12423
         * INI directive 'mbstring.func_overload' is deprecated since PHP 7.2
12424
         */
12425
12426
        /** @noinspection PhpComposerExtensionStubsInspection */
12427
        /** @noinspection PhpUsageOfSilenceOperatorInspection */
12428
        return \defined('MB_OVERLOAD_STRING')
12429
               &&
12430
               ((int) @\ini_get('mbstring.func_overload') & \MB_OVERLOAD_STRING);
12431
    }
12432
12433
    /**
12434
     * @param array $strings
12435
     * @param bool  $removeEmptyValues
12436
     * @param int   $removeShortValues
12437
     *
12438
     * @return array
12439
     */
12440 2
    private static function reduce_string_array(array $strings, bool $removeEmptyValues, int $removeShortValues = null): array
12441
    {
12442
        // init
12443 2
        $return = [];
12444
12445 2
        foreach ($strings as &$str) {
12446
            if (
12447 2
                $removeShortValues !== null
12448
                &&
12449 2
                \mb_strlen($str) <= $removeShortValues
12450
            ) {
12451 2
                continue;
12452
            }
12453
12454
            if (
12455 2
                $removeEmptyValues === true
12456
                &&
12457 2
                \trim($str) === ''
12458
            ) {
12459 2
                continue;
12460
            }
12461
12462 2
            $return[] = $str;
12463
        }
12464
12465 2
        return $return;
12466
    }
12467
12468
    /**
12469
     * rxClass
12470
     *
12471
     * @param string $s
12472
     * @param string $class
12473
     *
12474
     * @return string
12475
     */
12476 33
    private static function rxClass(string $s, string $class = ''): string
12477
    {
12478 33
        static $RX_CLASS_CACHE = [];
12479
12480 33
        $cacheKey = $s . $class;
12481
12482 33
        if (isset($RX_CLASS_CACHE[$cacheKey])) {
12483 21
            return $RX_CLASS_CACHE[$cacheKey];
12484
        }
12485
12486 16
        $classArray = [$class];
12487
12488
        /** @noinspection SuspiciousLoopInspection */
12489
        /** @noinspection AlterInForeachInspection */
12490 16
        foreach (self::str_split($s) as &$s) {
0 ignored issues
show
Bug introduced by
The expression self::str_split($s) cannot be used as a reference.

Let?s assume that you have the following foreach statement:

foreach ($array as &$itemValue) { }

$itemValue is assigned by reference. This is possible because the expression (in the example $array) can be used as a reference target.

However, if we were to replace $array with something different like the result of a function call as in

foreach (getArray() as &$itemValue) { }

then assigning by reference is not possible anymore as there is no target that could be modified.

Available Fixes

1. Do not assign by reference
foreach (getArray() as $itemValue) { }
2. Assign to a local variable first
$array = getArray();
foreach ($array as &$itemValue) {}
3. Return a reference
function &getArray() { $array = array(); return $array; }

foreach (getArray() as &$itemValue) { }
Loading history...
12491 15
            if ($s === '-') {
12492
                $classArray[0] = '-' . $classArray[0];
12493 15
            } elseif (!isset($s[2])) {
12494 15
                $classArray[0] .= \preg_quote($s, '/');
12495 1
            } elseif (self::strlen($s) === 1) {
12496 1
                $classArray[0] .= $s;
12497
            } else {
12498 15
                $classArray[] = $s;
12499
            }
12500
        }
12501
12502 16
        if ($classArray[0]) {
12503 16
            $classArray[0] = '[' . $classArray[0] . ']';
12504
        }
12505
12506 16
        if (\count($classArray) === 1) {
12507 16
            $return = $classArray[0];
12508
        } else {
12509
            $return = '(?:' . \implode('|', $classArray) . ')';
12510
        }
12511
12512 16
        $RX_CLASS_CACHE[$cacheKey] = $return;
12513
12514 16
        return $return;
12515
    }
12516
12517
    /**
12518
     * Personal names such as "Marcus Aurelius" are sometimes typed incorrectly using lowercase ("marcus aurelius").
12519
     *
12520
     * @param string $names
12521
     * @param string $delimiter
12522
     * @param string $encoding
12523
     *
12524
     * @return string
12525
     */
12526 1
    private static function str_capitalize_name_helper(string $names, string $delimiter, string $encoding = 'UTF-8'): string
12527
    {
12528
        // init
12529 1
        $namesArray = \explode($delimiter, $names);
12530
12531 1
        if ($namesArray === false) {
12532
            return '';
12533
        }
12534
12535
        $specialCases = [
12536 1
            'names' => [
12537
                'ab',
12538
                'af',
12539
                'al',
12540
                'and',
12541
                'ap',
12542
                'bint',
12543
                'binte',
12544
                'da',
12545
                'de',
12546
                'del',
12547
                'den',
12548
                'der',
12549
                'di',
12550
                'dit',
12551
                'ibn',
12552
                'la',
12553
                'mac',
12554
                'nic',
12555
                'of',
12556
                'ter',
12557
                'the',
12558
                'und',
12559
                'van',
12560
                'von',
12561
                'y',
12562
                'zu',
12563
            ],
12564
            'prefixes' => [
12565
                'al-',
12566
                "d'",
12567
                'ff',
12568
                "l'",
12569
                'mac',
12570
                'mc',
12571
                'nic',
12572
            ],
12573
        ];
12574
12575 1
        foreach ($namesArray as &$name) {
12576 1
            if (\in_array($name, $specialCases['names'], true)) {
12577 1
                continue;
12578
            }
12579
12580 1
            $continue = false;
12581
12582 1
            if ($delimiter === '-') {
12583
                /** @noinspection AlterInForeachInspection */
12584 1
                foreach ((array) $specialCases['names'] as &$beginning) {
12585 1
                    if (self::strpos($name, $beginning, 0, $encoding) === 0) {
12586 1
                        $continue = true;
12587
                    }
12588
                }
12589
            }
12590
12591
            /** @noinspection AlterInForeachInspection */
12592 1
            foreach ((array) $specialCases['prefixes'] as &$beginning) {
12593 1
                if (self::strpos($name, $beginning, 0, $encoding) === 0) {
12594 1
                    $continue = true;
12595
                }
12596
            }
12597
12598 1
            if ($continue === true) {
12599 1
                continue;
12600
            }
12601
12602 1
            $name = self::ucfirst($name);
12603
        }
12604
12605 1
        return \implode($delimiter, $namesArray);
12606
    }
12607
12608
    /**
12609
     * Generic case sensitive transformation for collation matching.
12610
     *
12611
     * @param string $str <p>The input string</p>
12612
     *
12613
     * @return string|null
12614
     */
12615 6
    private static function strtonatfold(string $str)
12616
    {
12617 6
        return \preg_replace(
12618 6
            '/\p{Mn}+/u',
12619 6
            '',
12620 6
            \Normalizer::normalize($str, \Normalizer::NFD)
12621
        );
12622
    }
12623
12624
    /**
12625
     * @param int|string $input
12626
     *
12627
     * @return string
12628
     */
12629 31
    private static function to_utf8_convert_helper($input): string
12630
    {
12631
        // init
12632 31
        $buf = '';
12633
12634 31
        if (self::$ORD === null) {
12635 1
            self::$ORD = self::getData('ord');
12636
        }
12637
12638 31
        if (self::$CHR === null) {
12639 1
            self::$CHR = self::getData('chr');
12640
        }
12641
12642 31
        if (self::$WIN1252_TO_UTF8 === null) {
12643 1
            self::$WIN1252_TO_UTF8 = self::getData('win1252_to_utf8');
12644
        }
12645
12646 31
        $ordC1 = self::$ORD[$input];
12647 31
        if (isset(self::$WIN1252_TO_UTF8[$ordC1])) { // found in Windows-1252 special cases
12648 31
            $buf .= self::$WIN1252_TO_UTF8[$ordC1];
12649
        } else {
12650
            /** @noinspection OffsetOperationsInspection */
12651 1
            $cc1 = self::$CHR[$ordC1 / 64] | "\xC0";
12652 1
            $cc2 = ((string) $input & "\x3F") | "\x80";
0 ignored issues
show
Bug introduced by
Are you sure you want to use the bitwise | or did you mean ||?
Loading history...
Bug introduced by
Are you sure you want to use the bitwise & or did you mean &&?
Loading history...
12653 1
            $buf .= $cc1 . $cc2;
12654
        }
12655
12656 31
        return $buf;
12657
    }
12658
12659
    /**
12660
     * @param string $str
12661
     *
12662
     * @return string
12663
     */
12664 9
    private static function urldecode_unicode_helper(string $str): string
12665
    {
12666 9
        $pattern = '/%u([0-9a-fA-F]{3,4})/';
12667 9
        if (\preg_match($pattern, $str)) {
12668 7
            $str = (string) \preg_replace($pattern, '&#x\\1;', $str);
12669
        }
12670
12671 9
        return $str;
12672
    }
12673
}
12674