Passed
Push — master ( 2d4bff...43944f )
by Lars
03:45
created

UTF8   F

Complexity

Total Complexity 1558

Size/Duplication

Total Lines 11191
Duplicated Lines 0 %

Test Coverage

Coverage 83.14%

Importance

Changes 0
Metric Value
eloc 3810
dl 0
loc 11191
ccs 2613
cts 3143
cp 0.8314
rs 0.8
c 0
b 0
f 0
wmc 1558

291 Methods

Rating   Name   Duplication   Size   Complexity  
A add_bom_to_string() 0 7 2
A __construct() 0 3 1
A access() 0 11 3
A chr_to_decimal() 0 30 6
A str_substr_after_first_separator() 0 20 4
A file_has_bom() 0 8 2
A max() 0 14 3
A str_camelize() 0 23 2
A parse_str() 0 20 5
A filter_input() 0 9 2
A str_contains() 0 16 6
B str_to_lines() 0 24 7
B substr_in_byte() 0 26 7
A array_change_key_case() 0 20 5
A get_unique_string() 0 15 2
A is_bom() 0 9 3
A is_hexadecimal() 0 3 1
A strnatcasecmp() 0 5 1
A encode_mimeheader() 0 25 5
A substr_left() 0 19 5
A count_chars() 0 3 1
F strlen() 0 99 21
A str_isubstr_last() 0 16 4
A ctype_loaded() 0 3 1
A str_replace_beginning() 0 21 6
A has_uppercase() 0 3 1
A remove_left() 0 12 2
C stripos() 0 57 15
A str_offset_exists() 0 10 2
F strrchr() 0 92 20
A to_filename() 0 24 2
A str_iends_with() 0 7 3
A max_chr_width() 0 8 2
A isBinary() 0 3 1
F utf8_decode() 0 77 16
A ltrim() 0 15 4
D is_utf8() 0 148 32
A remove_html() 0 3 1
A str_longest_common_suffix() 0 16 3
C wordwrap() 0 50 13
A ucfirst() 0 22 3
A lcword() 0 8 1
A str_pad_both() 0 5 1
A str_index_last() 0 7 1
A str_substr_last() 0 16 4
A mbstring_loaded() 0 8 2
A str_limit() 0 15 4
F chr() 0 88 21
A html_escape() 0 6 1
A toUTF8() 0 3 1
A string() 0 10 1
C normalize_encoding() 0 132 14
B rxClass() 0 39 8
A apply_padding() 0 19 5
C get_file_type() 0 86 12
A str_ensure_right() 0 7 2
A chr_to_int() 0 3 1
B str_titleize_for_humans() 0 127 5
C is_utf16() 0 63 16
A isHtml() 0 3 1
C filter() 0 55 13
A normalize_whitespace() 0 30 6
A str_starts_with() 0 7 3
A isBase64() 0 3 1
A str_humanize() 0 15 1
A is_html() 0 12 2
A decode_mimeheader() 0 19 6
C substr_count_in_byte() 0 50 15
A html_decode() 0 3 1
A strchr() 0 3 1
A strichr() 0 3 1
A isUtf32() 0 3 1
A str_index_first() 0 7 1
A strlen_in_byte() 0 16 4
A str_ireplace_ending() 0 21 6
A rtrim() 0 15 4
B str_longest_common_substring() 0 40 8
A regex_replace() 0 15 3
A chunk_split() 0 3 1
A titlecase() 0 7 3
A getData() 0 4 1
A str_iindex_first() 0 7 1
B strtolower() 0 46 10
A urldecode() 0 27 4
A str_isubstr_before_first_separator() 0 16 4
A strrev() 0 13 3
A replace_all() 0 7 2
F substr_replace() 0 107 26
A removeBOM() 0 3 1
A strstr_in_byte() 0 16 5
A str_matches_pattern() 0 3 1
A is_alpha() 0 3 1
A str_titleize() 0 36 5
A ws() 0 3 1
A get_random_string() 0 25 4
A str_replace_first() 0 8 2
A fix_utf8() 0 20 4
A toLatin1() 0 3 1
A str_pad_right() 0 3 1
B ucwords() 0 51 10
A first_char() 0 12 3
A to_boolean() 0 33 4
D stristr() 0 63 18
A isUtf8() 0 3 1
A strncasecmp() 0 6 1
B strwidth() 0 40 8
A css_stripe_media_queries() 0 6 1
A trim() 0 15 4
A clean() 0 46 6
A is_serialized() 0 11 3
A str_upper_camelize() 0 3 1
A is_uppercase() 0 3 1
A substr_compare() 0 25 6
F substr_count() 0 72 19
A strnatcmp() 0 3 2
B str_pad() 0 58 11
A str_ireplace() 0 18 3
A to_latin1() 0 3 1
A str_replace_ending() 0 21 6
A string_has_bom() 0 9 3
B strtr() 0 30 7
B str_contains_all() 0 22 7
A is_ascii() 0 7 2
A normalize_line_ending() 0 3 1
A str_isubstr_after_last_separator() 0 20 4
B range() 0 46 11
B strspn() 0 15 7
A strcasecmp() 0 5 1
A str_transliterate() 0 3 1
A rawurldecode() 0 27 4
B str_capitalize_name_helper() 0 78 10
A utf8_encode() 0 31 6
A normalize_msword() 0 19 4
C str_detect_encoding() 0 115 14
A spaces_to_tabs() 0 3 1
A str_istarts_with() 0 7 3
A is_blank() 0 3 1
A str_replace() 0 3 1
A substr_iright() 0 19 5
D getCharDirection() 0 109 119
A htmlspecialchars() 0 7 3
A replace() 0 7 2
A filter_var_array() 0 9 2
A decimal_to_chr() 0 3 1
A to_iso8859() 0 16 4
A words_limit() 0 21 5
A strip_tags() 0 11 3
A pcre_utf8_support() 0 4 1
A between() 0 24 5
A str_isubstr_before_last_separator() 0 16 4
A str_truncate_safe() 0 27 6
A codepoints() 0 29 4
A substr_right() 0 19 5
A lowerCaseFirst() 0 8 1
A str_split() 0 3 1
A str_ends_with_any() 0 13 4
A chr_map() 0 5 1
A strrpos_in_byte() 0 16 5
A cleanup() 0 25 2
F strrpos() 0 116 27
A remove_right() 0 11 2
A remove_html_breaks() 0 3 1
A showSupport() 0 11 3
A char_at() 0 3 1
A remove_invisible_characters() 0 19 3
A single_chr_html_encode() 0 19 6
A chars() 0 3 1
A str_replace_last() 0 8 2
A str_iindex_last() 0 7 1
A str_substr_before_last_separator() 0 20 4
C is_binary() 0 46 12
A intlChar_loaded() 0 3 1
A strtocasefold() 0 19 3
A lcfirst() 0 16 2
A tabs_to_spaces() 0 3 1
A finfo_loaded() 0 3 1
A str_truncate() 0 20 3
F strripos() 0 91 20
A strpos_in_byte() 0 16 5
A str_ends_with() 0 7 3
A fits_inside() 0 3 1
F to_ascii() 0 149 28
A is_binary_file() 0 16 3
A intl_loaded() 0 3 1
A reduce_string_array() 0 26 6
A str_longest_common_prefix() 0 16 3
A mbstring_overloaded() 0 11 2
A str_pad_left() 0 3 1
A str_substr_first() 0 21 4
A html_stripe_empty_tags() 0 6 1
A chr_size_list() 0 22 4
A remove_bom() 0 21 5
A str_repeat() 0 5 1
A strpbrk() 0 11 4
A whitespace_table() 0 3 1
A substr_count_simple() 0 17 6
A str_iends_with_any() 0 13 4
A str_substr_after_last_separator() 0 20 4
A str_isubstr_after_first_separator() 0 20 4
D extract_text() 0 106 20
A json_loaded() 0 3 1
A isBom() 0 3 1
A str_snakeize() 0 37 2
A int_to_chr() 0 3 1
A is_lowercase() 0 3 1
A str_sort() 0 15 3
F to_utf8() 0 88 32
A ucword() 0 3 1
A str_underscored() 0 3 1
A str_offset_get() 0 14 4
A strip_whitespace() 0 7 2
A getDataIfExists() 0 9 2
A hasBom() 0 3 1
A toAscii() 0 3 1
A str_capitalize_name() 0 7 1
A str_limit_after_word() 0 30 6
A iconv_loaded() 0 3 1
B lcwords() 0 39 7
A str_upper_first() 0 3 1
A isAscii() 0 3 1
A normalizeEncoding() 0 3 1
A swapCase() 0 17 5
A filter_var() 0 9 2
A substr_ileft() 0 19 5
A is_empty() 0 3 1
B html_encode() 0 39 7
A str_dasherize() 0 3 1
A isUtf16() 0 3 1
A str_ensure_left() 0 7 2
F encode() 0 132 39
B urldecode_fix_win1252_chars() 0 227 1
A toIso8859() 0 3 1
C is_utf32() 0 63 16
F ord() 0 64 17
A is_alphanumeric() 0 3 1
A strtonatfold() 0 4 1
A json_decode() 0 14 3
A fix_simple_utf8() 0 19 4
B strcspn() 0 23 7
A checkForSupport() 0 41 4
A is_json() 0 24 6
A fixStrCaseHelper() 0 33 5
A int_to_hex() 0 7 2
B str_split_pattern() 0 31 7
F strstr() 0 87 19
A has_lowercase() 0 3 1
A json_encode() 0 14 3
A str_isubstr_first() 0 21 4
A is_base64() 0 13 4
A str_last_char() 0 9 3
A str_ireplace_beginning() 0 21 6
A hex_to_int() 0 14 3
A htmlentities() 0 19 3
A hex_to_chr() 0 3 1
A str_substr_before_first_separator() 0 20 4
F substr() 0 139 31
A isJson() 0 3 1
A wordwrap_per_line() 0 15 3
A str_surround() 0 3 1
A strncmp() 0 6 1
A filter_input_array() 0 9 2
A str_insert() 0 12 2
A getSupportInfo() 0 15 4
A utf8_fix_win1252_chars() 0 3 1
B replace_diamond_question_mark() 0 41 7
A chr_to_hex() 0 11 3
A str_delimit() 0 15 1
A to_utf8_convert_helper() 0 27 5
B strtoupper() 0 46 10
A min() 0 14 3
A collapse_whitespace() 0 4 1
D html_entity_decode() 0 82 18
A str_starts_with_any() 0 17 5
B strrichr() 0 49 11
D split() 0 112 27
A str_istarts_with_any() 0 17 5
A str_contains_any() 0 13 4
A remove_duplicates() 0 14 4
A str_slice() 0 13 5
F strpos() 0 134 31
A str_shuffle() 0 12 2
A strcmp() 0 6 2
B file_get_contents() 0 54 10
A str_word_count() 0 23 5
A strripos_in_byte() 0 16 5
A str_to_binary() 0 5 1
A callback() 0 3 1
A symfony_polyfill_used() 0 16 5
A binary_to_str() 0 12 3
A bom() 0 3 1
B str_to_words() 0 33 8

How to fix   Complexity   

Complex Class

Complex classes like UTF8 often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.

Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.

While breaking up the class, it is a good idea to analyze how other classes use UTF8, and based on these observations, apply Extract Interface, too.

1
<?php
2
3
declare(strict_types=1);
4
5
namespace voku\helper;
6
7
final class UTF8
8
{
9
    // (CRLF|([ZWNJ-ZWJ]|T+|L*(LV?V+|LV|LVT)T*|L+|[^Control])[Extend]*|[Control])
10
    // This regular expression is a work around for http://bugs.exim.org/1279
11
    const GRAPHEME_CLUSTER_RX = "(?:\r\n|(?:[ -~\x{200C}\x{200D}]|[ᆨ-ᇹ]+|[ᄀ-ᅟ]*(?:[가개갸걔거게겨계고과괘괴교구궈궤귀규그긔기까깨꺄꺠꺼께껴꼐꼬꽈꽤꾀꾜꾸꿔꿰뀌뀨끄끠끼나내냐냬너네녀녜노놔놰뇌뇨누눠눼뉘뉴느늬니다대댜댸더데뎌뎨도돠돼되됴두둬뒈뒤듀드듸디따때땨떄떠떼뗘뗴또똬뙈뙤뚀뚜뚸뛔뛰뜌뜨띄띠라래랴럐러레려례로롸뢔뢰료루뤄뤠뤼류르릐리마매먀먜머메며몌모뫄뫠뫼묘무뭐뭬뮈뮤므믜미바배뱌뱨버베벼볘보봐봬뵈뵤부붜붸뷔뷰브븨비빠빼뺘뺴뻐뻬뼈뼤뽀뽜뽸뾔뾰뿌뿨쀄쀠쀼쁘쁴삐사새샤섀서세셔셰소솨쇄쇠쇼수숴쉐쉬슈스싀시싸쌔쌰썌써쎄쎠쎼쏘쏴쐐쐬쑈쑤쒀쒜쒸쓔쓰씌씨아애야얘어에여예오와왜외요우워웨위유으의이자재쟈쟤저제져졔조좌좨죄죠주줘줴쥐쥬즈즤지짜째쨔쨰쩌쩨쪄쪠쪼쫘쫴쬐쬬쭈쭤쮀쮜쮸쯔쯰찌차채챠챼처체쳐쳬초촤쵀최쵸추춰췌취츄츠츼치카캐캬컈커케켜켸코콰쾌쾨쿄쿠쿼퀘퀴큐크킈키타태탸턔터테텨톄토톼퇘퇴툐투퉈퉤튀튜트틔티파패퍄퍠퍼페펴폐포퐈퐤푀표푸풔풰퓌퓨프픠피하해햐햬허헤혀혜호화홰회효후훠훼휘휴흐희히]?[ᅠ-ᆢ]+|[가-힣])[ᆨ-ᇹ]*|[ᄀ-ᅟ]+|[^\p{Cc}\p{Cf}\p{Zl}\p{Zp}])[\p{Mn}\p{Me}\x{09BE}\x{09D7}\x{0B3E}\x{0B57}\x{0BBE}\x{0BD7}\x{0CC2}\x{0CD5}\x{0CD6}\x{0D3E}\x{0D57}\x{0DCF}\x{0DDF}\x{200C}\x{200D}\x{1D165}\x{1D16E}-\x{1D172}]*|[\p{Cc}\p{Cf}\p{Zl}\p{Zp}])";
12
13
    /**
14
     * Bom => Byte-Length
15
     *
16
     * INFO: https://en.wikipedia.org/wiki/Byte_order_mark
17
     *
18
     * @var array
19
     */
20
    private static $BOM = [
21
        "\xef\xbb\xbf"     => 3, // UTF-8 BOM
22
        ''              => 6, // UTF-8 BOM as "WINDOWS-1252" (one char has [maybe] more then one byte ...)
23
        "\x00\x00\xfe\xff" => 4, // UTF-32 (BE) BOM
24
        '  þÿ'             => 6, // UTF-32 (BE) BOM as "WINDOWS-1252"
25
        "\xff\xfe\x00\x00" => 4, // UTF-32 (LE) BOM
26
        'ÿþ  '             => 6, // UTF-32 (LE) BOM as "WINDOWS-1252"
27
        "\xfe\xff"         => 2, // UTF-16 (BE) BOM
28
        'þÿ'               => 4, // UTF-16 (BE) BOM as "WINDOWS-1252"
29
        "\xff\xfe"         => 2, // UTF-16 (LE) BOM
30
        'ÿþ'               => 4, // UTF-16 (LE) BOM as "WINDOWS-1252"
31
    ];
32
33
    /**
34
     * Numeric code point => UTF-8 Character
35
     *
36
     * url: http://www.w3schools.com/charsets/ref_utf_punctuation.asp
37
     *
38
     * @var array
39
     */
40
    private static $WHITESPACE = [
41
        // NUL Byte
42
        0 => "\x0",
43
        // Tab
44
        9 => "\x9",
45
        // New Line
46
        10 => "\xa",
47
        // Vertical Tab
48
        11 => "\xb",
49
        // Carriage Return
50
        13 => "\xd",
51
        // Ordinary Space
52
        32 => "\x20",
53
        // NO-BREAK SPACE
54
        160 => "\xc2\xa0",
55
        // OGHAM SPACE MARK
56
        5760 => "\xe1\x9a\x80",
57
        // MONGOLIAN VOWEL SEPARATOR
58
        6158 => "\xe1\xa0\x8e",
59
        // EN QUAD
60
        8192 => "\xe2\x80\x80",
61
        // EM QUAD
62
        8193 => "\xe2\x80\x81",
63
        // EN SPACE
64
        8194 => "\xe2\x80\x82",
65
        // EM SPACE
66
        8195 => "\xe2\x80\x83",
67
        // THREE-PER-EM SPACE
68
        8196 => "\xe2\x80\x84",
69
        // FOUR-PER-EM SPACE
70
        8197 => "\xe2\x80\x85",
71
        // SIX-PER-EM SPACE
72
        8198 => "\xe2\x80\x86",
73
        // FIGURE SPACE
74
        8199 => "\xe2\x80\x87",
75
        // PUNCTUATION SPACE
76
        8200 => "\xe2\x80\x88",
77
        // THIN SPACE
78
        8201 => "\xe2\x80\x89",
79
        //HAIR SPACE
80
        8202 => "\xe2\x80\x8a",
81
        // LINE SEPARATOR
82
        8232 => "\xe2\x80\xa8",
83
        // PARAGRAPH SEPARATOR
84
        8233 => "\xe2\x80\xa9",
85
        // NARROW NO-BREAK SPACE
86
        8239 => "\xe2\x80\xaf",
87
        // MEDIUM MATHEMATICAL SPACE
88
        8287 => "\xe2\x81\x9f",
89
        // IDEOGRAPHIC SPACE
90
        12288 => "\xe3\x80\x80",
91
    ];
92
93
    /**
94
     * @var array
95
     */
96
    private static $WHITESPACE_TABLE = [
97
        'SPACE'                     => "\x20",
98
        'NO-BREAK SPACE'            => "\xc2\xa0",
99
        'OGHAM SPACE MARK'          => "\xe1\x9a\x80",
100
        'EN QUAD'                   => "\xe2\x80\x80",
101
        'EM QUAD'                   => "\xe2\x80\x81",
102
        'EN SPACE'                  => "\xe2\x80\x82",
103
        'EM SPACE'                  => "\xe2\x80\x83",
104
        'THREE-PER-EM SPACE'        => "\xe2\x80\x84",
105
        'FOUR-PER-EM SPACE'         => "\xe2\x80\x85",
106
        'SIX-PER-EM SPACE'          => "\xe2\x80\x86",
107
        'FIGURE SPACE'              => "\xe2\x80\x87",
108
        'PUNCTUATION SPACE'         => "\xe2\x80\x88",
109
        'THIN SPACE'                => "\xe2\x80\x89",
110
        'HAIR SPACE'                => "\xe2\x80\x8a",
111
        'LINE SEPARATOR'            => "\xe2\x80\xa8",
112
        'PARAGRAPH SEPARATOR'       => "\xe2\x80\xa9",
113
        'ZERO WIDTH SPACE'          => "\xe2\x80\x8b",
114
        'NARROW NO-BREAK SPACE'     => "\xe2\x80\xaf",
115
        'MEDIUM MATHEMATICAL SPACE' => "\xe2\x81\x9f",
116
        'IDEOGRAPHIC SPACE'         => "\xe3\x80\x80",
117
    ];
118
119
    /**
120
     * bidirectional text chars
121
     *
122
     * url: https://www.w3.org/International/questions/qa-bidi-unicode-controls
123
     *
124
     * @var array
125
     */
126
    private static $BIDI_UNI_CODE_CONTROLS_TABLE = [
127
        // LEFT-TO-RIGHT EMBEDDING (use -> dir = "ltr")
128
        8234 => "\xE2\x80\xAA",
129
        // RIGHT-TO-LEFT EMBEDDING (use -> dir = "rtl")
130
        8235 => "\xE2\x80\xAB",
131
        // POP DIRECTIONAL FORMATTING // (use -> </bdo>)
132
        8236 => "\xE2\x80\xAC",
133
        // LEFT-TO-RIGHT OVERRIDE // (use -> <bdo dir = "ltr">)
134
        8237 => "\xE2\x80\xAD",
135
        // RIGHT-TO-LEFT OVERRIDE // (use -> <bdo dir = "rtl">)
136
        8238 => "\xE2\x80\xAE",
137
        // LEFT-TO-RIGHT ISOLATE // (use -> dir = "ltr")
138
        8294 => "\xE2\x81\xA6",
139
        // RIGHT-TO-LEFT ISOLATE // (use -> dir = "rtl")
140
        8295 => "\xE2\x81\xA7",
141
        // FIRST STRONG ISOLATE // (use -> dir = "auto")
142
        8296 => "\xE2\x81\xA8",
143
        // POP DIRECTIONAL ISOLATE
144
        8297 => "\xE2\x81\xA9",
145
    ];
146
147
    /**
148
     * @var array
149
     */
150
    private static $COMMON_CASE_FOLD = [
151
        'upper' => [
152
            'µ',
153
            'ſ',
154
            "\xCD\x85",
155
            'ς',
156
            'ẞ',
157
            "\xCF\x90",
158
            "\xCF\x91",
159
            "\xCF\x95",
160
            "\xCF\x96",
161
            "\xCF\xB0",
162
            "\xCF\xB1",
163
            "\xCF\xB5",
164
            "\xE1\xBA\x9B",
165
            "\xE1\xBE\xBE",
166
        ],
167
        'lower' => [
168
            'μ',
169
            's',
170
            'ι',
171
            'σ',
172
            'ß',
173
            'β',
174
            'θ',
175
            'φ',
176
            'π',
177
            'κ',
178
            'ρ',
179
            'ε',
180
            "\xE1\xB9\xA1",
181
            'ι',
182
        ],
183
    ];
184
185
    /**
186
     * @var array
187
     */
188
    private static $SUPPORT = [];
189
190
    /**
191
     * @var array|null
192
     */
193
    private static $UTF8_MSWORD;
194
195
    /**
196
     * @var array|null
197
     */
198
    private static $BROKEN_UTF8_FIX;
199
200
    /**
201
     * @var array|null
202
     */
203
    private static $WIN1252_TO_UTF8;
204
205
    /**
206
     * @var array|null
207
     */
208
    private static $ENCODINGS;
209
210
    /**
211
     * @var array|null
212
     */
213
    private static $ORD;
214
215
    /**
216
     * @var array|null
217
     */
218
    private static $CHR;
219
220
    /**
221
     * __construct()
222
     */
223 32
    public function __construct()
224
    {
225 32
        self::checkForSupport();
226 32
    }
227
228
    /**
229
     * Return the character at the specified position: $str[1] like functionality.
230
     *
231
     * @param string $str <p>A UTF-8 string.</p>
232
     * @param int    $pos <p>The position of character to return.</p>
233
     *
234
     * @return string single multi-byte character
235
     */
236 3
    public static function access(string $str, int $pos): string
237
    {
238 3
        if ($str === '') {
239 1
            return '';
240
        }
241
242 3
        if ($pos < 0) {
243 2
            return '';
244
        }
245
246 3
        return (string) self::substr($str, $pos, 1);
247
    }
248
249
    /**
250
     * Prepends UTF-8 BOM character to the string and returns the whole string.
251
     *
252
     * INFO: If BOM already existed there, the Input string is returned.
253
     *
254
     * @param string $str <p>The input string.</p>
255
     *
256
     * @return string the output string that contains BOM
257
     */
258 2
    public static function add_bom_to_string(string $str): string
259
    {
260 2
        if (self::string_has_bom($str) === false) {
261 2
            $str = self::bom() . $str;
262
        }
263
264 2
        return $str;
265
    }
266
267
    /**
268
     * Changes all keys in an array.
269
     *
270
     * @param array $array <p>The array to work on</p>
271
     * @param int   $case  [optional] <p> Either <strong>CASE_UPPER</strong><br>
272
     *                     or <strong>CASE_LOWER</strong> (default)</p>
273
     *
274
     * @return string[] an array with its keys lower or uppercased
275
     */
276 2
    public static function array_change_key_case(array $array, int $case = \CASE_LOWER): array
277
    {
278
        if (
279 2
            $case !== \CASE_LOWER
280
            &&
281 2
            $case !== \CASE_UPPER
282
        ) {
283
            $case = \CASE_LOWER;
284
        }
285
286 2
        $return = [];
287 2
        foreach ($array as $key => $value) {
288 2
            $key = $case === \CASE_LOWER
289 2
                ? self::strtolower($key)
290 2
                : self::strtoupper($key);
291
292 2
            $return[$key] = $value;
293
        }
294
295 2
        return $return;
296
    }
297
298
    /**
299
     * Returns the substring between $start and $end, if found, or an empty
300
     * string. An optional offset may be supplied from which to begin the
301
     * search for the start string.
302
     *
303
     * @param string $str
304
     * @param string $start    <p>Delimiter marking the start of the substring.</p>
305
     * @param string $end      <p>Delimiter marking the end of the substring.</p>
306
     * @param int    $offset   [optional] <p>Index from which to begin the search. Default: 0</p>
307
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
308
     *
309
     * @return string
310
     */
311 16
    public static function between(string $str, string $start, string $end, int $offset = 0, string $encoding = 'UTF-8'): string
312
    {
313 16
        $posStart = self::strpos($str, $start, $offset, $encoding);
314 16
        if ($posStart === false) {
315 2
            return '';
316
        }
317
318 14
        $substrIndex = $posStart + self::strlen($start, $encoding);
319 14
        $posEnd = self::strpos($str, $end, $substrIndex, $encoding);
320
        if (
321 14
            $posEnd === false
322
            ||
323 14
            $posEnd === $substrIndex
324
        ) {
325 4
            return '';
326
        }
327
328 10
        $return = self::substr($str, $substrIndex, $posEnd - $substrIndex, $encoding);
329
330 10
        if ($return === false) {
331
            return '';
332
        }
333
334 10
        return $return;
335
    }
336
337
    /**
338
     * Convert binary into an string.
339
     *
340
     * @param mixed $bin 1|0
341
     *
342
     * @return string
343
     */
344 2
    public static function binary_to_str($bin): string
345
    {
346 2
        if (!isset($bin[0])) {
347
            return '';
348
        }
349
350 2
        $convert = \base_convert($bin, 2, 16);
351 2
        if ($convert === '0') {
352 1
            return '';
353
        }
354
355 2
        return \pack('H*', $convert);
356
    }
357
358
    /**
359
     * Returns the UTF-8 Byte Order Mark Character.
360
     *
361
     * INFO: take a look at UTF8::$bom for e.g. UTF-16 and UTF-32 BOM values
362
     *
363
     * @return string UTF-8 Byte Order Mark
364
     */
365 4
    public static function bom(): string
366
    {
367 4
        return "\xef\xbb\xbf";
368
    }
369
370
    /**
371
     * @alias of UTF8::chr_map()
372
     *
373
     * @see   UTF8::chr_map()
374
     *
375
     * @param array|string $callback
376
     * @param string       $str
377
     *
378
     * @return string[]
379
     */
380 2
    public static function callback($callback, string $str): array
381
    {
382 2
        return self::chr_map($callback, $str);
383
    }
384
385
    /**
386
     * Returns the character at $index, with indexes starting at 0.
387
     *
388
     * @param string $str
389
     * @param int    $index    <p>Position of the character.</p>
390
     * @param string $encoding [optional] <p>Default is UTF-8</p>
391
     *
392
     * @return string the character at $index
393
     */
394 9
    public static function char_at(string $str, int $index, string $encoding = 'UTF-8'): string
395
    {
396 9
        return (string) self::substr($str, $index, 1, $encoding);
397
    }
398
399
    /**
400
     * Returns an array consisting of the characters in the string.
401
     *
402
     * @param string $str <p>The input string.</p>
403
     *
404
     * @return string[] an array of chars
405
     */
406 3
    public static function chars(string $str): array
407
    {
408 3
        return self::str_split($str, 1);
409
    }
410
411
    /**
412
     * This method will auto-detect your server environment for UTF-8 support.
413
     *
414
     * INFO: You don't need to run it manually, it will be triggered if it's needed.
415
     */
416 37
    public static function checkForSupport()
417
    {
418 37
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
419
            self::$SUPPORT['already_checked_via_portable_utf8'] = true;
420
421
            // http://php.net/manual/en/book.mbstring.php
422
            self::$SUPPORT['mbstring'] = self::mbstring_loaded();
423
            self::$SUPPORT['mbstring_func_overload'] = self::mbstring_overloaded();
424
425
            // http://php.net/manual/en/book.iconv.php
426
            self::$SUPPORT['iconv'] = self::iconv_loaded();
427
428
            // http://php.net/manual/en/book.intl.php
429
            self::$SUPPORT['intl'] = self::intl_loaded();
430
            self::$SUPPORT['intl__transliterator_list_ids'] = [];
431
432
            if (
433
                self::$SUPPORT['intl'] === true
434
                &&
435
                \function_exists('transliterator_list_ids') === true
436
            ) {
437
                /** @noinspection PhpComposerExtensionStubsInspection */
438
                self::$SUPPORT['intl__transliterator_list_ids'] = \transliterator_list_ids();
439
            }
440
441
            // http://php.net/manual/en/class.intlchar.php
442
            self::$SUPPORT['intlChar'] = self::intlChar_loaded();
443
444
            // http://php.net/manual/en/book.ctype.php
445
            self::$SUPPORT['ctype'] = self::ctype_loaded();
446
447
            // http://php.net/manual/en/class.finfo.php
448
            self::$SUPPORT['finfo'] = self::finfo_loaded();
449
450
            // http://php.net/manual/en/book.json.php
451
            self::$SUPPORT['json'] = self::json_loaded();
452
453
            // http://php.net/manual/en/book.pcre.php
454
            self::$SUPPORT['pcre_utf8'] = self::pcre_utf8_support();
455
456
            self::$SUPPORT['symfony_polyfill_used'] = self::symfony_polyfill_used();
457
        }
458 37
    }
459
460
    /**
461
     * Generates a UTF-8 encoded character from the given code point.
462
     *
463
     * INFO: opposite to UTF8::ord()
464
     *
465
     * @param int|string $code_point <p>The code point for which to generate a character.</p>
466
     * @param string     $encoding   [optional] <p>Default is UTF-8</p>
467
     *
468
     * @return string|null multi-byte character, returns null on failure or empty input
469
     */
470 17
    public static function chr($code_point, string $encoding = 'UTF-8')
471
    {
472
        // init
473 17
        static $CHAR_CACHE = [];
474
475 17
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
476
            self::checkForSupport();
477
        }
478
479 17
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
480 4
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
481
        }
482
483
        if (
484 17
            $encoding !== 'UTF-8'
485
            &&
486 17
            $encoding !== 'ISO-8859-1'
487
            &&
488 17
            $encoding !== 'WINDOWS-1252'
489
            &&
490 17
            self::$SUPPORT['mbstring'] === false
491
        ) {
492
            \trigger_error('UTF8::chr() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
493
        }
494
495 17
        $cacheKey = $code_point . $encoding;
496 17
        if (isset($CHAR_CACHE[$cacheKey]) === true) {
497 16
            return $CHAR_CACHE[$cacheKey];
498
        }
499
500 11
        if ($code_point <= 127) { // use "simple"-char only until "\x80"
501
502 10
            if (self::$CHR === null) {
503
                $chrTmp = self::getData('chr');
504
                if ($chrTmp) {
505
                    self::$CHR = (array) $chrTmp;
506
                }
507
            }
508
509 10
            $chr = self::$CHR[$code_point];
510
511 10
            if ($encoding !== 'UTF-8') {
512 1
                $chr = self::encode($encoding, $chr);
513
            }
514
515 10
            return $CHAR_CACHE[$cacheKey] = $chr;
516
        }
517
518 7
        if (self::$SUPPORT['intlChar'] === true) {
519
            /** @noinspection PhpComposerExtensionStubsInspection */
520 7
            $chr = \IntlChar::chr($code_point);
521
522 7
            if ($encoding !== 'UTF-8') {
523
                $chr = self::encode($encoding, $chr);
524
            }
525
526 7
            return $CHAR_CACHE[$cacheKey] = $chr;
527
        }
528
529
        if (self::$CHR === null) {
530
            $chrTmp = self::getData('chr');
531
            if ($chrTmp) {
532
                self::$CHR = (array) $chrTmp;
533
            }
534
        }
535
536
        $code_point = (int) $code_point;
537
        if ($code_point <= 0x7F) {
538
            $chr = self::$CHR[$code_point];
539
        } elseif ($code_point <= 0x7FF) {
540
            $chr = self::$CHR[($code_point >> 6) + 0xC0] .
541
                   self::$CHR[($code_point & 0x3F) + 0x80];
542
        } elseif ($code_point <= 0xFFFF) {
543
            $chr = self::$CHR[($code_point >> 12) + 0xE0] .
544
                   self::$CHR[(($code_point >> 6) & 0x3F) + 0x80] .
545
                   self::$CHR[($code_point & 0x3F) + 0x80];
546
        } else {
547
            $chr = self::$CHR[($code_point >> 18) + 0xF0] .
548
                   self::$CHR[(($code_point >> 12) & 0x3F) + 0x80] .
549
                   self::$CHR[(($code_point >> 6) & 0x3F) + 0x80] .
550
                   self::$CHR[($code_point & 0x3F) + 0x80];
551
        }
552
553
        if ($encoding !== 'UTF-8') {
554
            $chr = self::encode($encoding, $chr);
555
        }
556
557
        return $CHAR_CACHE[$cacheKey] = $chr;
558
    }
559
560
    /**
561
     * Applies callback to all characters of a string.
562
     *
563
     * @param array|string $callback <p>The callback function.</p>
564
     * @param string       $str      <p>UTF-8 string to run callback on.</p>
565
     *
566
     * @return string[] the outcome of callback
567
     */
568 2
    public static function chr_map($callback, string $str): array
569
    {
570 2
        $chars = self::split($str);
571
572 2
        return \array_map($callback, $chars);
573
    }
574
575
    /**
576
     * Generates an array of byte length of each character of a Unicode string.
577
     *
578
     * 1 byte => U+0000  - U+007F
579
     * 2 byte => U+0080  - U+07FF
580
     * 3 byte => U+0800  - U+FFFF
581
     * 4 byte => U+10000 - U+10FFFF
582
     *
583
     * @param string $str <p>The original unicode string.</p>
584
     *
585
     * @return int[] an array of byte lengths of each character
586
     */
587 4
    public static function chr_size_list(string $str): array
588
    {
589 4
        if ($str === '') {
590 4
            return [];
591
        }
592
593 4
        $strSplit = self::split($str);
594
595 4
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
596
            self::checkForSupport();
597
        }
598
599 4
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
600
            return \array_map(
601
                static function ($data) {
602
                    return self::strlen_in_byte($data);
603
                },
604
                $strSplit
605
            );
606
        }
607
608 4
        return \array_map('\strlen', $strSplit);
609
    }
610
611
    /**
612
     * Get a decimal code representation of a specific character.
613
     *
614
     * @param string $char <p>The input character.</p>
615
     *
616
     * @return int
617
     */
618 4
    public static function chr_to_decimal(string $char): int
619
    {
620 4
        $code = self::ord($char[0]);
621 4
        $bytes = 1;
622
623 4
        if (!($code & 0x80)) {
624
            // 0xxxxxxx
625 4
            return $code;
626
        }
627
628 4
        if (($code & 0xe0) === 0xc0) {
629
            // 110xxxxx
630 4
            $bytes = 2;
631 4
            $code &= ~0xc0;
632 4
        } elseif (($code & 0xf0) === 0xe0) {
633
            // 1110xxxx
634 4
            $bytes = 3;
635 4
            $code &= ~0xe0;
636 2
        } elseif (($code & 0xf8) === 0xf0) {
637
            // 11110xxx
638 2
            $bytes = 4;
639 2
            $code &= ~0xf0;
640
        }
641
642 4
        for ($i = 2; $i <= $bytes; ++$i) {
643
            // 10xxxxxx
644 4
            $code = ($code << 6) + (self::ord($char[$i - 1]) & ~0x80);
645
        }
646
647 4
        return $code;
648
    }
649
650
    /**
651
     * Get hexadecimal code point (U+xxxx) of a UTF-8 encoded character.
652
     *
653
     * @param int|string $char <p>The input character</p>
654
     * @param string     $pfix [optional]
655
     *
656
     * @return string The code point encoded as U+xxxx
657
     */
658 2
    public static function chr_to_hex($char, string $pfix = 'U+'): string
659
    {
660 2
        if ($char === '') {
661 2
            return '';
662
        }
663
664 2
        if ($char === '&#0;') {
665 2
            $char = '';
666
        }
667
668 2
        return self::int_to_hex(self::ord($char), $pfix);
669
    }
670
671
    /**
672
     * alias for "UTF8::chr_to_decimal()"
673
     *
674
     * @see UTF8::chr_to_decimal()
675
     *
676
     * @param string $chr
677
     *
678
     * @return int
679
     */
680 2
    public static function chr_to_int(string $chr): int
681
    {
682 2
        return self::chr_to_decimal($chr);
683
    }
684
685
    /**
686
     * Splits a string into smaller chunks and multiple lines, using the specified line ending character.
687
     *
688
     * @param string $body     <p>The original string to be split.</p>
689
     * @param int    $chunklen [optional] <p>The maximum character length of a chunk.</p>
690
     * @param string $end      [optional] <p>The character(s) to be inserted at the end of each chunk.</p>
691
     *
692
     * @return string the chunked string
693
     */
694 4
    public static function chunk_split(string $body, int $chunklen = 76, string $end = "\r\n"): string
695
    {
696 4
        return \implode($end, self::split($body, $chunklen));
697
    }
698
699
    /**
700
     * Accepts a string and removes all non-UTF-8 characters from it + extras if needed.
701
     *
702
     * @param string $str                           <p>The string to be sanitized.</p>
703
     * @param bool   $remove_bom                    [optional] <p>Set to true, if you need to remove UTF-BOM.</p>
704
     * @param bool   $normalize_whitespace          [optional] <p>Set to true, if you need to normalize the
705
     *                                              whitespace.</p>
706
     * @param bool   $normalize_msword              [optional] <p>Set to true, if you need to normalize MS Word chars
707
     *                                              e.g.: "…"
708
     *                                              => "..."</p>
709
     * @param bool   $keep_non_breaking_space       [optional] <p>Set to true, to keep non-breaking-spaces, in
710
     *                                              combination with
711
     *                                              $normalize_whitespace</p>
712
     * @param bool   $replace_diamond_question_mark [optional] <p>Set to true, if you need to remove diamond question
713
     *                                              mark e.g.: "�"</p>
714
     * @param bool   $remove_invisible_characters   [optional] <p>Set to false, if you not want to remove invisible
715
     *                                              characters e.g.: "\0"</p>
716
     *
717
     * @return string clean UTF-8 encoded string
718
     */
719 111
    public static function clean(
720
        string $str,
721
        bool $remove_bom = false,
722
        bool $normalize_whitespace = false,
723
        bool $normalize_msword = false,
724
        bool $keep_non_breaking_space = false,
725
        bool $replace_diamond_question_mark = false,
726
        bool $remove_invisible_characters = true
727
    ): string {
728
        // http://stackoverflow.com/questions/1401317/remove-non-utf8-characters-from-string
729
        // caused connection reset problem on larger strings
730
731 111
        $regx = '/
732
          (
733
            (?: [\x00-\x7F]               # single-byte sequences   0xxxxxxx
734
            |   [\xC0-\xDF][\x80-\xBF]    # double-byte sequences   110xxxxx 10xxxxxx
735
            |   [\xE0-\xEF][\x80-\xBF]{2} # triple-byte sequences   1110xxxx 10xxxxxx * 2
736
            |   [\xF0-\xF7][\x80-\xBF]{3} # quadruple-byte sequence 11110xxx 10xxxxxx * 3
737
            ){1,100}                      # ...one or more times
738
          )
739
        | ( [\x80-\xBF] )                 # invalid byte in range 10000000 - 10111111
740
        | ( [\xC0-\xFF] )                 # invalid byte in range 11000000 - 11111111
741
        /x';
742 111
        $str = (string) \preg_replace($regx, '$1', $str);
743
744 111
        if ($replace_diamond_question_mark === true) {
745 60
            $str = self::replace_diamond_question_mark($str, '');
746
        }
747
748 111
        if ($remove_invisible_characters === true) {
749 111
            $str = self::remove_invisible_characters($str);
750
        }
751
752 111
        if ($normalize_whitespace === true) {
753 64
            $str = self::normalize_whitespace($str, $keep_non_breaking_space);
754
        }
755
756 111
        if ($normalize_msword === true) {
757 32
            $str = self::normalize_msword($str);
758
        }
759
760 111
        if ($remove_bom === true) {
761 62
            $str = self::remove_bom($str);
762
        }
763
764 111
        return $str;
765
    }
766
767
    /**
768
     * Clean-up a and show only printable UTF-8 chars at the end  + fix UTF-8 encoding.
769
     *
770
     * @param string $str <p>The input string.</p>
771
     *
772
     * @return string
773
     */
774 33
    public static function cleanup($str): string
775
    {
776
        // init
777 33
        $str = (string) $str;
778
779 33
        if ($str === '') {
780 5
            return '';
781
        }
782
783
        // fixed ISO <-> UTF-8 Errors
784 33
        $str = self::fix_simple_utf8($str);
785
786
        // remove all none UTF-8 symbols
787
        // && remove diamond question mark (�)
788
        // && remove remove invisible characters (e.g. "\0")
789
        // && remove BOM
790
        // && normalize whitespace chars (but keep non-breaking-spaces)
791 33
        return self::clean(
792 33
            $str,
793 33
            true,
794 33
            true,
795 33
            false,
796 33
            true,
797 33
            true,
798 33
            true
799
        );
800
    }
801
802
    /**
803
     * Accepts a string or a array of strings and returns an array of Unicode code points.
804
     *
805
     * INFO: opposite to UTF8::string()
806
     *
807
     * @param string|string[] $arg     <p>A UTF-8 encoded string or an array of such strings.</p>
808
     * @param bool            $u_style <p>If True, will return code points in U+xxxx format,
809
     *                                 default, code points will be returned as integers.</p>
810
     *
811
     * @return array<int|string>
812
     *                           The array of code points:<br>
813
     *                           array<int> for $u_style === false<br>
814
     *                           array<string> for $u_style === true<br>
815
     */
816 12
    public static function codepoints($arg, bool $u_style = false): array
817
    {
818 12
        if (\is_string($arg) === true) {
819 12
            $arg = self::split($arg);
820
        }
821
822 12
        $arg = \array_map(
823
            [
824 12
                self::class,
825
                'ord',
826
            ],
827 12
            $arg
828
        );
829
830 12
        if (\count($arg) === 0) {
831 7
            return [];
832
        }
833
834 11
        if ($u_style) {
835 2
            $arg = \array_map(
836
                [
837 2
                    self::class,
838
                    'int_to_hex',
839
                ],
840 2
                $arg
841
            );
842
        }
843
844 11
        return $arg;
845
    }
846
847
    /**
848
     * Trims the string and replaces consecutive whitespace characters with a
849
     * single space. This includes tabs and newline characters, as well as
850
     * multibyte whitespace such as the thin space and ideographic space.
851
     *
852
     * @param string $str <p>The input string.</p>
853
     *
854
     * @return string string with a trimmed $str and condensed whitespace
855
     */
856 13
    public static function collapse_whitespace(string $str): string
857
    {
858 13
        return self::trim(
859 13
            self::regex_replace($str, '[[:space:]]+', ' ')
860
        );
861
    }
862
863
    /**
864
     * Returns count of characters used in a string.
865
     *
866
     * @param string $str       <p>The input string.</p>
867
     * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
868
     *
869
     * @return int[] an associative array of Character as keys and
870
     *               their count as values
871
     */
872 19
    public static function count_chars(string $str, bool $cleanUtf8 = false): array
873
    {
874 19
        return \array_count_values(self::split($str, 1, $cleanUtf8));
875
    }
876
877
    /**
878
     * Remove css media-queries.
879
     *
880
     * @param string $str
881
     *
882
     * @return string
883
     */
884 1
    public static function css_stripe_media_queries(string $str): string
885
    {
886 1
        return (string) \preg_replace(
887 1
            '#@media\\s+(?:only\\s)?(?:[\\s{\\(]|screen|all)\\s?[^{]+{.*}\\s*}\\s*#misU',
888 1
            '',
889 1
            $str
890
        );
891
    }
892
893
    /**
894
     * Checks whether ctype is available on the server.
895
     *
896
     * @return bool
897
     *              <strong>true</strong> if available, <strong>false</strong> otherwise
898
     */
899
    public static function ctype_loaded(): bool
900
    {
901
        return \extension_loaded('ctype');
902
    }
903
904
    /**
905
     * Converts a int-value into an UTF-8 character.
906
     *
907
     * @param mixed $int
908
     *
909
     * @return string
910
     */
911 10
    public static function decimal_to_chr($int): string
912
    {
913 10
        return self::html_entity_decode('&#' . $int . ';', \ENT_QUOTES | \ENT_HTML5);
914
    }
915
916
    /**
917
     * Decodes a MIME header field
918
     *
919
     * @param string $str
920
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
921
     *
922
     * @return false|string
923
     *                      A decoded MIME field on success,
924
     *                      or false if an error occurs during the decoding
925
     */
926
    public static function decode_mimeheader($str, $encoding = 'UTF-8')
927
    {
928
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
929
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
930
        }
931
932
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
933
            self::checkForSupport();
934
        }
935
936
        if (self::$SUPPORT['iconv'] === true) {
937
            return \iconv_mime_decode($str, \ICONV_MIME_DECODE_CONTINUE_ON_ERROR, $encoding);
938
        }
939
940
        if ($encoding !== 'UTF-8') {
941
            $str = self::encode($encoding, $str);
942
        }
943
944
        return \mb_decode_mimeheader($str);
945
    }
946
947
    /**
948
     * Encode a string with a new charset-encoding.
949
     *
950
     * INFO:  The different to "UTF8::utf8_encode()" is that this function, try to fix also broken / double encoding,
951
     *        so you can call this function also on a UTF-8 String and you don't mess the string.
952
     *
953
     * @param string $toEncoding             <p>e.g. 'UTF-16', 'UTF-8', 'ISO-8859-1', etc.</p>
954
     * @param string $str                    <p>The input string</p>
955
     * @param bool   $autodetectFromEncoding [optional] <p>Force the new encoding (we try to fix broken / double
956
     *                                       encoding for UTF-8)<br> otherwise we auto-detect the current
957
     *                                       string-encoding</p>
958
     * @param string $fromEncoding           [optional] <p>e.g. 'UTF-16', 'UTF-8', 'ISO-8859-1', etc.<br>
959
     *                                       A empty string will trigger the autodetect anyway.</p>
960
     *
961
     * @return string
962
     */
963 28
    public static function encode(string $toEncoding, string $str, bool $autodetectFromEncoding = true, string $fromEncoding = ''): string
964
    {
965 28
        if ($str === '' || $toEncoding === '') {
966 12
            return $str;
967
        }
968
969 28
        if ($toEncoding !== 'UTF-8' && $toEncoding !== 'CP850') {
970 6
            $toEncoding = self::normalize_encoding($toEncoding, 'UTF-8');
971
        }
972
973 28
        if ($fromEncoding && $fromEncoding !== 'UTF-8' && $fromEncoding !== 'CP850') {
974 2
            $fromEncoding = self::normalize_encoding($fromEncoding, null);
975
        }
976
977 28
        if ($toEncoding && $fromEncoding && $fromEncoding === $toEncoding) {
978
            return $str;
979
        }
980
981 28
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
982
            self::checkForSupport();
983
        }
984
985 28
        if ($toEncoding === 'JSON') {
986 1
            return self::json_encode($str);
987
        }
988 28
        if ($fromEncoding === 'JSON') {
989 1
            $str = self::json_decode($str);
990 1
            $fromEncoding = '';
991
        }
992
993 28
        if ($toEncoding === 'BASE64') {
994 2
            return \base64_encode($str);
995
        }
996 28
        if ($fromEncoding === 'BASE64') {
997 2
            $str = \base64_decode($str, true);
998 2
            $fromEncoding = '';
999
        }
1000
1001 28
        if ($toEncoding === 'HTML-ENTITIES') {
1002 2
            return self::html_encode($str, true, 'UTF-8');
1003
        }
1004 28
        if ($fromEncoding === 'HTML-ENTITIES') {
1005 2
            $str = self::html_decode($str, \ENT_COMPAT, 'UTF-8');
1006 2
            $fromEncoding = '';
1007
        }
1008
1009 28
        $fromEncodingDetected = false;
1010
        if (
1011 28
            $autodetectFromEncoding === true
1012
            ||
1013 28
            !$fromEncoding
1014
        ) {
1015 28
            $fromEncodingDetected = self::str_detect_encoding($str);
1016
        }
1017
1018
        // DEBUG
1019
        //var_dump($toEncoding, $fromEncoding, $fromEncodingDetected, $str, "\n\n");
1020
1021 28
        if ($fromEncodingDetected !== false) {
1022 24
            $fromEncoding = $fromEncodingDetected;
1023 6
        } elseif ($fromEncodingDetected === false && $autodetectFromEncoding === true) {
1024
            // fallback for the "autodetect"-mode
1025 6
            return self::to_utf8($str);
1026
        }
1027
1028
        if (
1029 24
            !$fromEncoding
1030
            ||
1031 24
            $fromEncoding === $toEncoding
1032
        ) {
1033 15
            return $str;
1034
        }
1035
1036
        if (
1037 18
            $toEncoding === 'UTF-8'
1038
            &&
1039
            (
1040 17
                $fromEncoding === 'WINDOWS-1252'
1041
                ||
1042 18
                $fromEncoding === 'ISO-8859-1'
1043
            )
1044
        ) {
1045 14
            return self::to_utf8($str);
1046
        }
1047
1048
        if (
1049 10
            $toEncoding === 'ISO-8859-1'
1050
            &&
1051
            (
1052 5
                $fromEncoding === 'WINDOWS-1252'
1053
                ||
1054 10
                $fromEncoding === 'UTF-8'
1055
            )
1056
        ) {
1057 5
            return self::to_iso8859($str);
1058
        }
1059
1060
        if (
1061 9
            $toEncoding !== 'UTF-8'
1062
            &&
1063 9
            $toEncoding !== 'ISO-8859-1'
1064
            &&
1065 9
            $toEncoding !== 'WINDOWS-1252'
1066
            &&
1067 9
            self::$SUPPORT['mbstring'] === false
1068
        ) {
1069
            \trigger_error('UTF8::encode() without mbstring cannot handle "' . $toEncoding . '" encoding', \E_USER_WARNING);
1070
        }
1071
1072 9
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
1073
            self::checkForSupport();
1074
        }
1075
1076 9
        if (self::$SUPPORT['mbstring'] === true) {
1077
            // info: do not use the symfony polyfill here
1078 9
            $strEncoded = \mb_convert_encoding(
1079 9
                $str,
1080 9
                $toEncoding,
1081 9
                $fromEncoding
1082
            );
1083
1084 9
            if ($strEncoded) {
1085 9
                return $strEncoded;
1086
            }
1087
        }
1088
1089
        $return = \iconv($fromEncoding, $toEncoding, $str);
1090
        if ($return !== false) {
1091
            return $return;
1092
        }
1093
1094
        return $str;
1095
    }
1096
1097
    /**
1098
     * @param string $str
1099
     * @param string $fromCharset      [optional] <p>Set the input charset.</p>
1100
     * @param string $toCharset        [optional] <p>Set the output charset.</p>
1101
     * @param string $transferEncoding [optional] <p>Set the transfer encoding.</p>
1102
     * @param string $linefeed         [optional] <p>Set the used linefeed.</p>
1103
     * @param int    $indent           [optional] <p>Set the max length indent.</p>
1104
     *
1105
     * @return false|string
1106
     *                      An encoded MIME field on success,
1107
     *                      or false if an error occurs during the encoding
1108
     */
1109
    public static function encode_mimeheader(
1110
        $str,
1111
        $fromCharset = 'UTF-8',
1112
        $toCharset = 'UTF-8',
1113
        $transferEncoding = 'Q',
1114
        $linefeed = "\r\n",
1115
        $indent = 76
1116
    ) {
1117
        if ($fromCharset !== 'UTF-8' && $fromCharset !== 'CP850') {
1118
            $fromCharset = self::normalize_encoding($fromCharset, 'UTF-8');
1119
        }
1120
1121
        if ($toCharset !== 'UTF-8' && $toCharset !== 'CP850') {
1122
            $toCharset = self::normalize_encoding($toCharset, 'UTF-8');
1123
        }
1124
1125
        return \iconv_mime_encode(
1126
            '',
1127
            $str,
1128
            [
1129
                'scheme'           => $transferEncoding,
1130
                'line-length'      => $indent,
1131
                'input-charset'    => $fromCharset,
1132
                'output-charset'   => $toCharset,
1133
                'line-break-chars' => $linefeed,
1134
            ]
1135
        );
1136
    }
1137
1138
    /**
1139
     * Create an extract from a sentence, so if the search-string was found, it try to centered in the output.
1140
     *
1141
     * @param string   $str                    <p>The input string.</p>
1142
     * @param string   $search                 <p>The searched string.</p>
1143
     * @param int|null $length                 [optional] <p>Default: null === text->length / 2</p>
1144
     * @param string   $replacerForSkippedText [optional] <p>Default: …</p>
1145
     * @param string   $encoding               [optional] <p>Set the charset for e.g. "mb_" function</p>
1146
     *
1147
     * @return string
1148
     */
1149 1
    public static function extract_text(string $str, string $search = '', int $length = null, string $replacerForSkippedText = '…', string $encoding = 'UTF-8'): string
1150
    {
1151 1
        if ($str === '') {
1152 1
            return '';
1153
        }
1154
1155 1
        $trimChars = "\t\r\n -_()!~?=+/*\\,.:;\"'[]{}`&";
1156
1157 1
        if ($length === null) {
1158 1
            $length = (int) \round(self::strlen($str, $encoding) / 2, 0);
1159
        }
1160
1161 1
        if (empty($search)) {
1162 1
            $stringLength = self::strlen($str, $encoding);
1163
1164 1
            if ($length > 0) {
1165 1
                $end = ($length - 1) > $stringLength ? $stringLength : ($length - 1);
1166
            } else {
1167 1
                $end = 0;
1168
            }
1169
1170 1
            $pos = (int) \min(
1171 1
                self::strpos($str, ' ', $end, $encoding),
0 ignored issues
show
Bug introduced by
It seems like $end can also be of type false; however, parameter $offset of voku\helper\UTF8::strpos() does only seem to accept integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

1171
                self::strpos($str, ' ', /** @scrutinizer ignore-type */ $end, $encoding),
Loading history...
1172 1
                self::strpos($str, '.', $end, $encoding)
1173
            );
1174
1175 1
            if ($pos) {
1176 1
                $strSub = self::substr($str, 0, $pos, $encoding);
1177 1
                if ($strSub === false) {
1178
                    return '';
1179
                }
1180
1181 1
                return \rtrim($strSub, $trimChars) . $replacerForSkippedText;
1182
            }
1183
1184
            return $str;
1185
        }
1186
1187 1
        $wordPos = self::stripos($str, $search, 0, $encoding);
1188 1
        $halfSide = (int) ($wordPos - $length / 2 + self::strlen($search, $encoding) / 2);
1189
1190 1
        $pos_start = 0;
1191 1
        if ($halfSide > 0) {
1192 1
            $halfText = self::substr($str, 0, $halfSide, $encoding);
1193 1
            if ($halfText !== false) {
1194 1
                $pos_start = (int) \max(
1195 1
                    self::strrpos($halfText, ' ', 0, $encoding),
1196 1
                    self::strrpos($halfText, '.', 0, $encoding)
1197
                );
1198
            }
1199
        }
1200
1201 1
        if ($wordPos && $halfSide > 0) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $wordPos of type false|integer is loosely compared to true; this is ambiguous if the integer can be 0. You might want to explicitly use !== false instead.

In PHP, under loose comparison (like ==, or !=, or switch conditions), values of different types might be equal.

For integer values, zero is a special case, in particular the following results might be unexpected:

0   == false // true
0   == null  // true
123 == false // false
123 == null  // false

// It is often better to use strict comparison
0 === false // false
0 === null  // false
Loading history...
1202 1
            $l = $pos_start + $length - 1;
1203 1
            $realLength = self::strlen($str, $encoding);
1204
1205 1
            if ($l > $realLength) {
1206
                $l = $realLength;
1207
            }
1208
1209 1
            $pos_end = (int) \min(
1210 1
                    self::strpos($str, ' ', $l, $encoding),
1211 1
                    self::strpos($str, '.', $l, $encoding)
1212 1
                ) - $pos_start;
1213
1214 1
            if (!$pos_end || $pos_end <= 0) {
1215 1
                $strSub = self::substr($str, $pos_start, self::strlen($str), $encoding);
0 ignored issues
show
Bug introduced by
It seems like self::strlen($str) can also be of type false; however, parameter $length of voku\helper\UTF8::substr() does only seem to accept integer|null, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

1215
                $strSub = self::substr($str, $pos_start, /** @scrutinizer ignore-type */ self::strlen($str), $encoding);
Loading history...
1216 1
                if ($strSub !== false) {
1217 1
                    $extract = $replacerForSkippedText . \ltrim($strSub, $trimChars);
1218
                } else {
1219 1
                    $extract = '';
1220
                }
1221
            } else {
1222 1
                $strSub = self::substr($str, $pos_start, $pos_end, $encoding);
1223 1
                if ($strSub !== false) {
1224 1
                    $extract = $replacerForSkippedText . \trim($strSub, $trimChars) . $replacerForSkippedText;
1225
                } else {
1226 1
                    $extract = '';
1227
                }
1228
            }
1229
        } else {
1230 1
            $l = $length - 1;
1231 1
            $trueLength = self::strlen($str, $encoding);
1232
1233 1
            if ($l > $trueLength) {
1234
                $l = $trueLength;
1235
            }
1236
1237 1
            $pos_end = \min(
1238 1
                self::strpos($str, ' ', $l, $encoding),
1239 1
                self::strpos($str, '.', $l, $encoding)
1240
            );
1241
1242 1
            if ($pos_end) {
1243 1
                $strSub = self::substr($str, 0, $pos_end, $encoding);
1244 1
                if ($strSub !== false) {
1245 1
                    $extract = \rtrim($strSub, $trimChars) . $replacerForSkippedText;
1246
                } else {
1247 1
                    $extract = '';
1248
                }
1249
            } else {
1250 1
                $extract = $str;
1251
            }
1252
        }
1253
1254 1
        return $extract;
1255
    }
1256
1257
    /**
1258
     * Reads entire file into a string.
1259
     *
1260
     * WARNING: do not use UTF-8 Option ($convertToUtf8) for binary-files (e.g.: images) !!!
1261
     *
1262
     * @see http://php.net/manual/en/function.file-get-contents.php
1263
     *
1264
     * @param string        $filename         <p>
1265
     *                                        Name of the file to read.
1266
     *                                        </p>
1267
     * @param bool          $use_include_path [optional] <p>
1268
     *                                        Prior to PHP 5, this parameter is called
1269
     *                                        use_include_path and is a bool.
1270
     *                                        As of PHP 5 the FILE_USE_INCLUDE_PATH can be used
1271
     *                                        to trigger include path
1272
     *                                        search.
1273
     *                                        </p>
1274
     * @param resource|null $context          [optional] <p>
1275
     *                                        A valid context resource created with
1276
     *                                        stream_context_create. If you don't need to use a
1277
     *                                        custom context, you can skip this parameter by &null;.
1278
     *                                        </p>
1279
     * @param int|null      $offset           [optional] <p>
1280
     *                                        The offset where the reading starts.
1281
     *                                        </p>
1282
     * @param int|null      $maxLength        [optional] <p>
1283
     *                                        Maximum length of data read. The default is to read until end
1284
     *                                        of file is reached.
1285
     *                                        </p>
1286
     * @param int           $timeout          <p>The time in seconds for the timeout.</p>
1287
     * @param bool          $convertToUtf8    <strong>WARNING!!!</strong> <p>Maybe you can't use this option for
1288
     *                                        some files, because they used non default utf-8 chars. Binary files
1289
     *                                        like images or pdf will not be converted.</p>
1290
     * @param string        $fromEncoding     [optional] <p>e.g. 'UTF-16', 'UTF-8', 'ISO-8859-1', etc.<br>
1291
     *                                        A empty string will trigger the autodetect anyway.</p>
1292
     *
1293
     * @return false|string the function returns the read data or false on failure
1294
     */
1295 12
    public static function file_get_contents(
1296
        string $filename,
1297
        bool $use_include_path = false,
1298
        $context = null,
1299
        int $offset = null,
1300
        int $maxLength = null,
1301
        int $timeout = 10,
1302
        bool $convertToUtf8 = true,
1303
        string $fromEncoding = ''
1304
    ) {
1305
        // init
1306 12
        $filename = \filter_var($filename, \FILTER_SANITIZE_STRING);
1307
1308 12
        if ($timeout && $context === null) {
1309 9
            $context = \stream_context_create(
1310
                [
1311
                    'http' => [
1312 9
                        'timeout' => $timeout,
1313
                    ],
1314
                ]
1315
            );
1316
        }
1317
1318 12
        if ($offset === null) {
1319 12
            $offset = 0;
1320
        }
1321
1322 12
        if (\is_int($maxLength) === true) {
1323 2
            $data = \file_get_contents($filename, $use_include_path, $context, $offset, $maxLength);
1324
        } else {
1325 12
            $data = \file_get_contents($filename, $use_include_path, $context, $offset);
1326
        }
1327
1328
        // return false on error
1329 12
        if ($data === false) {
1330
            return false;
1331
        }
1332
1333 12
        if ($convertToUtf8 === true) {
1334
            if (
1335 12
                self::is_binary($data, true) === true
1336
                &&
1337 12
                self::is_utf16($data, false) === false
1338
                &&
1339 12
                self::is_utf32($data, false) === false
1340 7
            ) {
1341
                // do nothing, it's binary and not UTF16 or UTF32
1342
            } else {
1343 9
                $data = self::encode('UTF-8', $data, false, $fromEncoding);
1344 9
                $data = self::cleanup($data);
1345
            }
1346
        }
1347
1348 12
        return $data;
1349
    }
1350
1351
    /**
1352
     * Checks if a file starts with BOM (Byte Order Mark) character.
1353
     *
1354
     * @param string $file_path <p>Path to a valid file.</p>
1355
     *
1356
     * @throws \RuntimeException if file_get_contents() returned false
1357
     *
1358
     * @return bool
1359
     *              <strong>true</strong> if the file has BOM at the start, <strong>false</strong> otherwise
1360
     */
1361 2
    public static function file_has_bom(string $file_path): bool
1362
    {
1363 2
        $file_content = \file_get_contents($file_path);
1364 2
        if ($file_content === false) {
1365
            throw new \RuntimeException('file_get_contents() returned false for:' . $file_path);
1366
        }
1367
1368 2
        return self::string_has_bom($file_content);
1369
    }
1370
1371
    /**
1372
     * Normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1373
     *
1374
     * @param mixed  $var
1375
     * @param int    $normalization_form
1376
     * @param string $leading_combining
1377
     *
1378
     * @return mixed
1379
     */
1380 43
    public static function filter($var, int $normalization_form = 4 /* n::NFC */, string $leading_combining = '◌')
1381
    {
1382 43
        switch (\gettype($var)) {
1383 43
            case 'array':
1384 6
                foreach ($var as $k => $v) {
1385
                    /** @noinspection AlterInForeachInspection */
1386 6
                    $var[$k] = self::filter($v, $normalization_form, $leading_combining);
1387
                }
1388
1389 6
                break;
1390 43
            case 'object':
1391 4
                foreach ($var as $k => $v) {
1392 4
                    $var->{$k} = self::filter($v, $normalization_form, $leading_combining);
1393
                }
1394
1395 4
                break;
1396 43
            case 'string':
1397
1398 43
                if (\strpos($var, "\r") !== false) {
1399
                    // Workaround https://bugs.php.net/65732
1400 3
                    $var = self::normalize_line_ending($var);
1401
                }
1402
1403 43
                if (self::is_ascii($var) === false) {
1404
                    /** @noinspection PhpUndefinedClassInspection */
1405 26
                    if (\Normalizer::isNormalized($var, $normalization_form)) {
1406 21
                        $n = '-';
1407
                    } else {
1408
                        /** @noinspection PhpUndefinedClassInspection */
1409 13
                        $n = \Normalizer::normalize($var, $normalization_form);
1410
1411 13
                        if (isset($n[0])) {
1412 7
                            $var = $n;
1413
                        } else {
1414 9
                            $var = self::encode('UTF-8', $var, true);
1415
                        }
1416
                    }
1417
1418
                    if (
1419 26
                        $var[0] >= "\x80"
1420
                        &&
1421 26
                        isset($n[0], $leading_combining[0])
1422
                        &&
1423 26
                        \preg_match('/^\p{Mn}/u', $var)
1424
                    ) {
1425
                        // Prevent leading combining chars
1426
                        // for NFC-safe concatenations.
1427 3
                        $var = $leading_combining . $var;
1428
                    }
1429
                }
1430
1431 43
                break;
1432
        }
1433
1434 43
        return $var;
1435
    }
1436
1437
    /**
1438
     * "filter_input()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1439
     *
1440
     * Gets a specific external variable by name and optionally filters it
1441
     *
1442
     * @see  http://php.net/manual/en/function.filter-input.php
1443
     *
1444
     * @param int    $type          <p>
1445
     *                              One of <b>INPUT_GET</b>, <b>INPUT_POST</b>,
1446
     *                              <b>INPUT_COOKIE</b>, <b>INPUT_SERVER</b>, or
1447
     *                              <b>INPUT_ENV</b>.
1448
     *                              </p>
1449
     * @param string $variable_name <p>
1450
     *                              Name of a variable to get.
1451
     *                              </p>
1452
     * @param int    $filter        [optional] <p>
1453
     *                              The ID of the filter to apply. The
1454
     *                              manual page lists the available filters.
1455
     *                              </p>
1456
     * @param mixed  $options       [optional] <p>
1457
     *                              Associative array of options or bitwise disjunction of flags. If filter
1458
     *                              accepts options, flags can be provided in "flags" field of array.
1459
     *                              </p>
1460
     *
1461
     * @return mixed Value of the requested variable on success, <b>FALSE</b> if the filter fails, or <b>NULL</b> if the
1462
     *               <i>variable_name</i> variable is not set. If the flag <b>FILTER_NULL_ON_FAILURE</b> is used, it
1463
     *               returns <b>FALSE</b> if the variable is not set and <b>NULL</b> if the filter fails.
1464
     */
1465
    public static function filter_input(int $type, string $variable_name, int $filter = \FILTER_DEFAULT, $options = null)
1466
    {
1467
        if (\func_num_args() < 4) {
1468
            $var = \filter_input($type, $variable_name, $filter);
1469
        } else {
1470
            $var = \filter_input($type, $variable_name, $filter, $options);
1471
        }
1472
1473
        return self::filter($var);
1474
    }
1475
1476
    /**
1477
     * "filter_input_array()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1478
     *
1479
     * Gets external variables and optionally filters them
1480
     *
1481
     * @see  http://php.net/manual/en/function.filter-input-array.php
1482
     *
1483
     * @param int   $type       <p>
1484
     *                          One of <b>INPUT_GET</b>, <b>INPUT_POST</b>,
1485
     *                          <b>INPUT_COOKIE</b>, <b>INPUT_SERVER</b>, or
1486
     *                          <b>INPUT_ENV</b>.
1487
     *                          </p>
1488
     * @param mixed $definition [optional] <p>
1489
     *                          An array defining the arguments. A valid key is a string
1490
     *                          containing a variable name and a valid value is either a filter type, or an array
1491
     *                          optionally specifying the filter, flags and options. If the value is an
1492
     *                          array, valid keys are filter which specifies the
1493
     *                          filter type,
1494
     *                          flags which specifies any flags that apply to the
1495
     *                          filter, and options which specifies any options that
1496
     *                          apply to the filter. See the example below for a better understanding.
1497
     *                          </p>
1498
     *                          <p>
1499
     *                          This parameter can be also an integer holding a filter constant. Then all values in the
1500
     *                          input array are filtered by this filter.
1501
     *                          </p>
1502
     * @param bool  $add_empty  [optional] <p>
1503
     *                          Add missing keys as <b>NULL</b> to the return value.
1504
     *                          </p>
1505
     *
1506
     * @return mixed An array containing the values of the requested variables on success, or <b>FALSE</b> on failure.
1507
     *               An array value will be <b>FALSE</b> if the filter fails, or <b>NULL</b> if the variable is not
1508
     *               set. Or if the flag <b>FILTER_NULL_ON_FAILURE</b> is used, it returns <b>FALSE</b> if the variable
1509
     *               is not set and <b>NULL</b> if the filter fails.
1510
     */
1511
    public static function filter_input_array(int $type, $definition = null, bool $add_empty = true)
1512
    {
1513
        if (\func_num_args() < 2) {
1514
            $a = \filter_input_array($type);
1515
        } else {
1516
            $a = \filter_input_array($type, $definition, $add_empty);
1517
        }
1518
1519
        return self::filter($a);
1520
    }
1521
1522
    /**
1523
     * "filter_var()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1524
     *
1525
     * Filters a variable with a specified filter
1526
     *
1527
     * @see  http://php.net/manual/en/function.filter-var.php
1528
     *
1529
     * @param mixed $variable <p>
1530
     *                        Value to filter.
1531
     *                        </p>
1532
     * @param int   $filter   [optional] <p>
1533
     *                        The ID of the filter to apply. The
1534
     *                        manual page lists the available filters.
1535
     *                        </p>
1536
     * @param mixed $options  [optional] <p>
1537
     *                        Associative array of options or bitwise disjunction of flags. If filter
1538
     *                        accepts options, flags can be provided in "flags" field of array. For
1539
     *                        the "callback" filter, callable type should be passed. The
1540
     *                        callback must accept one argument, the value to be filtered, and return
1541
     *                        the value after filtering/sanitizing it.
1542
     *                        </p>
1543
     *                        <p>
1544
     *                        <code>
1545
     *                        // for filters that accept options, use this format
1546
     *                        $options = array(
1547
     *                        'options' => array(
1548
     *                        'default' => 3, // value to return if the filter fails
1549
     *                        // other options here
1550
     *                        'min_range' => 0
1551
     *                        ),
1552
     *                        'flags' => FILTER_FLAG_ALLOW_OCTAL,
1553
     *                        );
1554
     *                        $var = filter_var('0755', FILTER_VALIDATE_INT, $options);
1555
     *                        // for filter that only accept flags, you can pass them directly
1556
     *                        $var = filter_var('oops', FILTER_VALIDATE_BOOLEAN, FILTER_NULL_ON_FAILURE);
1557
     *                        // for filter that only accept flags, you can also pass as an array
1558
     *                        $var = filter_var('oops', FILTER_VALIDATE_BOOLEAN,
1559
     *                        array('flags' => FILTER_NULL_ON_FAILURE));
1560
     *                        // callback validate filter
1561
     *                        function foo($value)
1562
     *                        {
1563
     *                        // Expected format: Surname, GivenNames
1564
     *                        if (strpos($value, ", ") === false) return false;
1565
     *                        list($surname, $givennames) = explode(", ", $value, 2);
1566
     *                        $empty = (empty($surname) || empty($givennames));
1567
     *                        $notstrings = (!is_string($surname) || !is_string($givennames));
1568
     *                        if ($empty || $notstrings) {
1569
     *                        return false;
1570
     *                        } else {
1571
     *                        return $value;
1572
     *                        }
1573
     *                        }
1574
     *                        $var = filter_var('Doe, Jane Sue', FILTER_CALLBACK, array('options' => 'foo'));
1575
     *                        </code>
1576
     *                        </p>
1577
     *
1578
     * @return mixed the filtered data, or <b>FALSE</b> if the filter fails
1579
     */
1580 2
    public static function filter_var($variable, int $filter = \FILTER_DEFAULT, $options = null)
1581
    {
1582 2
        if (\func_num_args() < 3) {
1583 2
            $variable = \filter_var($variable, $filter);
1584
        } else {
1585 2
            $variable = \filter_var($variable, $filter, $options);
1586
        }
1587
1588 2
        return self::filter($variable);
1589
    }
1590
1591
    /**
1592
     * "filter_var_array()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1593
     *
1594
     * Gets multiple variables and optionally filters them
1595
     *
1596
     * @see  http://php.net/manual/en/function.filter-var-array.php
1597
     *
1598
     * @param array $data       <p>
1599
     *                          An array with string keys containing the data to filter.
1600
     *                          </p>
1601
     * @param mixed $definition [optional] <p>
1602
     *                          An array defining the arguments. A valid key is a string
1603
     *                          containing a variable name and a valid value is either a
1604
     *                          filter type, or an
1605
     *                          array optionally specifying the filter, flags and options.
1606
     *                          If the value is an array, valid keys are filter
1607
     *                          which specifies the filter type,
1608
     *                          flags which specifies any flags that apply to the
1609
     *                          filter, and options which specifies any options that
1610
     *                          apply to the filter. See the example below for a better understanding.
1611
     *                          </p>
1612
     *                          <p>
1613
     *                          This parameter can be also an integer holding a filter constant. Then all values in the
1614
     *                          input array are filtered by this filter.
1615
     *                          </p>
1616
     * @param bool  $add_empty  [optional] <p>
1617
     *                          Add missing keys as <b>NULL</b> to the return value.
1618
     *                          </p>
1619
     *
1620
     * @return mixed an array containing the values of the requested variables on success, or <b>FALSE</b> on failure.
1621
     *               An array value will be <b>FALSE</b> if the filter fails, or <b>NULL</b> if the variable is not
1622
     *               set
1623
     */
1624 2
    public static function filter_var_array(array $data, $definition = null, bool $add_empty = true)
1625
    {
1626 2
        if (\func_num_args() < 2) {
1627 2
            $a = \filter_var_array($data);
1628
        } else {
1629 2
            $a = \filter_var_array($data, $definition, $add_empty);
1630
        }
1631
1632 2
        return self::filter($a);
1633
    }
1634
1635
    /**
1636
     * Checks whether finfo is available on the server.
1637
     *
1638
     * @return bool
1639
     *              <strong>true</strong> if available, <strong>false</strong> otherwise
1640
     */
1641
    public static function finfo_loaded(): bool
1642
    {
1643
        return \class_exists('finfo');
1644
    }
1645
1646
    /**
1647
     * Returns the first $n characters of the string.
1648
     *
1649
     * @param string $str      <p>The input string.</p>
1650
     * @param int    $n        <p>Number of characters to retrieve from the start.</p>
1651
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
1652
     *
1653
     * @return string
1654
     */
1655 13
    public static function first_char(string $str, int $n = 1, string $encoding = 'UTF-8'): string
1656
    {
1657 13
        if ($n <= 0) {
1658 4
            return '';
1659
        }
1660
1661 9
        $strSub = self::substr($str, 0, $n, $encoding);
1662 9
        if ($strSub === false) {
1663
            return '';
1664
        }
1665
1666 9
        return $strSub;
1667
    }
1668
1669
    /**
1670
     * Check if the number of unicode characters are not more than the specified integer.
1671
     *
1672
     * @param string $str      the original string to be checked
1673
     * @param int    $box_size the size in number of chars to be checked against string
1674
     *
1675
     * @return bool true if string is less than or equal to $box_size, false otherwise
1676
     */
1677 2
    public static function fits_inside(string $str, int $box_size): bool
1678
    {
1679 2
        return self::strlen($str) <= $box_size;
1680
    }
1681
1682
    /**
1683
     * Try to fix simple broken UTF-8 strings.
1684
     *
1685
     * INFO: Take a look at "UTF8::fix_utf8()" if you need a more advanced fix for broken UTF-8 strings.
1686
     *
1687
     * If you received an UTF-8 string that was converted from Windows-1252 as it was ISO-8859-1
1688
     * (ignoring Windows-1252 chars from 80 to 9F) use this function to fix it.
1689
     * See: http://en.wikipedia.org/wiki/Windows-1252
1690
     *
1691
     * @param string $str <p>The input string</p>
1692
     *
1693
     * @return string
1694
     */
1695 42
    public static function fix_simple_utf8(string $str): string
1696
    {
1697 42
        if ($str === '') {
1698 4
            return '';
1699
        }
1700
1701 42
        static $BROKEN_UTF8_TO_UTF8_KEYS_CACHE = null;
1702 42
        static $BROKEN_UTF8_TO_UTF8_VALUES_CACHE = null;
1703
1704 42
        if ($BROKEN_UTF8_TO_UTF8_KEYS_CACHE === null) {
1705 1
            if (self::$BROKEN_UTF8_FIX === null) {
1706 1
                self::$BROKEN_UTF8_FIX = self::getData('utf8_fix');
1707
            }
1708
1709 1
            $BROKEN_UTF8_TO_UTF8_KEYS_CACHE = \array_keys(self::$BROKEN_UTF8_FIX);
1710 1
            $BROKEN_UTF8_TO_UTF8_VALUES_CACHE = \array_values(self::$BROKEN_UTF8_FIX);
1711
        }
1712
1713 42
        return \str_replace($BROKEN_UTF8_TO_UTF8_KEYS_CACHE, $BROKEN_UTF8_TO_UTF8_VALUES_CACHE, $str);
1714
    }
1715
1716
    /**
1717
     * Fix a double (or multiple) encoded UTF8 string.
1718
     *
1719
     * @param string|string[] $str you can use a string or an array of strings
1720
     *
1721
     * @return string|string[]
1722
     *                         Will return the fixed input-"array" or
1723
     *                         the fixed input-"string"
1724
     */
1725 2
    public static function fix_utf8($str)
1726
    {
1727 2
        if (\is_array($str) === true) {
1728 2
            foreach ($str as $k => $v) {
1729 2
                $str[$k] = self::fix_utf8($v);
1730
            }
1731
1732 2
            return $str;
1733
        }
1734
1735 2
        $str = (string) $str;
1736 2
        $last = '';
1737 2
        while ($last !== $str) {
1738 2
            $last = $str;
1739 2
            $str = self::to_utf8(
1740 2
                self::utf8_decode($str, true)
1741
            );
1742
        }
1743
1744 2
        return $str;
1745
    }
1746
1747
    /**
1748
     * Get character of a specific character.
1749
     *
1750
     * @param string $char
1751
     *
1752
     * @return string 'RTL' or 'LTR'
1753
     */
1754 2
    public static function getCharDirection(string $char): string
1755
    {
1756 2
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
1757
            self::checkForSupport();
1758
        }
1759
1760 2
        if (self::$SUPPORT['intlChar'] === true) {
1761
            /** @noinspection PhpComposerExtensionStubsInspection */
1762 2
            $tmpReturn = \IntlChar::charDirection($char);
1763
1764
            // from "IntlChar"-Class
1765
            $charDirection = [
1766 2
                'RTL' => [1, 13, 14, 15, 21],
1767
                'LTR' => [0, 11, 12, 20],
1768
            ];
1769
1770 2
            if (\in_array($tmpReturn, $charDirection['LTR'], true)) {
1771
                return 'LTR';
1772
            }
1773
1774 2
            if (\in_array($tmpReturn, $charDirection['RTL'], true)) {
1775 2
                return 'RTL';
1776
            }
1777
        }
1778
1779 2
        $c = static::chr_to_decimal($char);
1780
1781 2
        if (!($c >= 0x5be && $c <= 0x10b7f)) {
1782 2
            return 'LTR';
1783
        }
1784
1785 2
        if ($c <= 0x85e) {
1786 2
            if ($c === 0x5be ||
1787 2
                $c === 0x5c0 ||
1788 2
                $c === 0x5c3 ||
1789 2
                $c === 0x5c6 ||
1790 2
                ($c >= 0x5d0 && $c <= 0x5ea) ||
1791 2
                ($c >= 0x5f0 && $c <= 0x5f4) ||
1792 2
                $c === 0x608 ||
1793 2
                $c === 0x60b ||
1794 2
                $c === 0x60d ||
1795 2
                $c === 0x61b ||
1796 2
                ($c >= 0x61e && $c <= 0x64a) ||
1797
                ($c >= 0x66d && $c <= 0x66f) ||
1798
                ($c >= 0x671 && $c <= 0x6d5) ||
1799
                ($c >= 0x6e5 && $c <= 0x6e6) ||
1800
                ($c >= 0x6ee && $c <= 0x6ef) ||
1801
                ($c >= 0x6fa && $c <= 0x70d) ||
1802
                $c === 0x710 ||
1803
                ($c >= 0x712 && $c <= 0x72f) ||
1804
                ($c >= 0x74d && $c <= 0x7a5) ||
1805
                $c === 0x7b1 ||
1806
                ($c >= 0x7c0 && $c <= 0x7ea) ||
1807
                ($c >= 0x7f4 && $c <= 0x7f5) ||
1808
                $c === 0x7fa ||
1809
                ($c >= 0x800 && $c <= 0x815) ||
1810
                $c === 0x81a ||
1811
                $c === 0x824 ||
1812
                $c === 0x828 ||
1813
                ($c >= 0x830 && $c <= 0x83e) ||
1814
                ($c >= 0x840 && $c <= 0x858) ||
1815 2
                $c === 0x85e
1816
            ) {
1817 2
                return 'RTL';
1818
            }
1819 2
        } elseif ($c === 0x200f) {
1820
            return 'RTL';
1821 2
        } elseif ($c >= 0xfb1d) {
1822 2
            if ($c === 0xfb1d ||
1823 2
                ($c >= 0xfb1f && $c <= 0xfb28) ||
1824 2
                ($c >= 0xfb2a && $c <= 0xfb36) ||
1825 2
                ($c >= 0xfb38 && $c <= 0xfb3c) ||
1826 2
                $c === 0xfb3e ||
1827 2
                ($c >= 0xfb40 && $c <= 0xfb41) ||
1828 2
                ($c >= 0xfb43 && $c <= 0xfb44) ||
1829 2
                ($c >= 0xfb46 && $c <= 0xfbc1) ||
1830 2
                ($c >= 0xfbd3 && $c <= 0xfd3d) ||
1831 2
                ($c >= 0xfd50 && $c <= 0xfd8f) ||
1832 2
                ($c >= 0xfd92 && $c <= 0xfdc7) ||
1833 2
                ($c >= 0xfdf0 && $c <= 0xfdfc) ||
1834 2
                ($c >= 0xfe70 && $c <= 0xfe74) ||
1835 2
                ($c >= 0xfe76 && $c <= 0xfefc) ||
1836 2
                ($c >= 0x10800 && $c <= 0x10805) ||
1837 2
                $c === 0x10808 ||
1838 2
                ($c >= 0x1080a && $c <= 0x10835) ||
1839 2
                ($c >= 0x10837 && $c <= 0x10838) ||
1840 2
                $c === 0x1083c ||
1841 2
                ($c >= 0x1083f && $c <= 0x10855) ||
1842 2
                ($c >= 0x10857 && $c <= 0x1085f) ||
1843 2
                ($c >= 0x10900 && $c <= 0x1091b) ||
1844 2
                ($c >= 0x10920 && $c <= 0x10939) ||
1845 2
                $c === 0x1093f ||
1846 2
                $c === 0x10a00 ||
1847 2
                ($c >= 0x10a10 && $c <= 0x10a13) ||
1848 2
                ($c >= 0x10a15 && $c <= 0x10a17) ||
1849 2
                ($c >= 0x10a19 && $c <= 0x10a33) ||
1850 2
                ($c >= 0x10a40 && $c <= 0x10a47) ||
1851 2
                ($c >= 0x10a50 && $c <= 0x10a58) ||
1852 2
                ($c >= 0x10a60 && $c <= 0x10a7f) ||
1853 2
                ($c >= 0x10b00 && $c <= 0x10b35) ||
1854 2
                ($c >= 0x10b40 && $c <= 0x10b55) ||
1855 2
                ($c >= 0x10b58 && $c <= 0x10b72) ||
1856 2
                ($c >= 0x10b78 && $c <= 0x10b7f)
1857
            ) {
1858 2
                return 'RTL';
1859
            }
1860
        }
1861
1862 2
        return 'LTR';
1863
    }
1864
1865
    /**
1866
     * Check for php-support.
1867
     *
1868
     * @param string|null $key
1869
     *
1870
     * @return mixed
1871
     *               Return the full support-"array", if $key === null<br>
1872
     *               return bool-value, if $key is used and available<br>
1873
     *               otherwise return <strong>null</strong>
1874
     */
1875 26
    public static function getSupportInfo(string $key = null)
1876
    {
1877 26
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
1878
            self::checkForSupport();
1879
        }
1880
1881 26
        if ($key === null) {
1882 4
            return self::$SUPPORT;
1883
        }
1884
1885 24
        if (!isset(self::$SUPPORT[$key])) {
1886 2
            return null;
1887
        }
1888
1889 22
        return self::$SUPPORT[$key];
1890
    }
1891
1892
    /**
1893
     * Warning: this method only works for some file-types (png bmp gif jpg rar zip midi exe pdf)
1894
     *          if you need more supported types, please use e.g. "finfo"
1895
     *
1896
     * @param string $str
1897
     * @param array  $fallback with this keys: 'ext', 'mime', 'type'
1898
     *
1899
     * @return array
1900
     *               with this keys: 'ext', 'mime', 'type'
1901
     */
1902 39
    public static function get_file_type(
1903
        string $str,
1904
        array $fallback = [
1905
            'ext'  => null,
1906
            'mime' => 'application/octet-stream',
1907
            'type' => null,
1908
        ]
1909
    ): array {
1910 39
        if ($str === '') {
1911
            return $fallback;
1912
        }
1913
1914 39
        $str_info = self::substr_in_byte($str, 0, 2);
1915 39
        if (self::strlen_in_byte($str_info) !== 2) {
1916 10
            return $fallback;
1917
        }
1918
1919 35
        $str_info = \unpack('C2chars', $str_info);
1920 35
        $type_code = (int) ($str_info['chars1'] . $str_info['chars2']);
1921
1922
        // DEBUG
1923
        //var_dump($type_code);
1924
1925
        switch ($type_code) {
1926 35
            case 3780:
1927 5
                $ext = 'pdf';
1928 5
                $mime = 'application/pdf';
1929 5
                $type = 'binary';
1930
1931 5
                break;
1932 35
            case 7790:
1933
                $ext = 'exe';
1934
                $mime = 'application/octet-stream';
1935
                $type = 'binary';
1936
1937
                break;
1938 35
            case 7784:
1939
                $ext = 'midi';
1940
                $mime = 'audio/x-midi';
1941
                $type = 'binary';
1942
1943
                break;
1944 35
            case 8075:
1945 7
                $ext = 'zip';
1946 7
                $mime = 'application/zip';
1947 7
                $type = 'binary';
1948
1949 7
                break;
1950 35
            case 8297:
1951
                $ext = 'rar';
1952
                $mime = 'application/rar';
1953
                $type = 'binary';
1954
1955
                break;
1956 35
            case 255216:
1957
                $ext = 'jpg';
1958
                $mime = 'image/jpeg';
1959
                $type = 'binary';
1960
1961
                break;
1962 35
            case 7173:
1963
                $ext = 'gif';
1964
                $mime = 'image/gif';
1965
                $type = 'binary';
1966
1967
                break;
1968 35
            case 6677:
1969
                $ext = 'bmp';
1970
                $mime = 'image/bmp';
1971
                $type = 'binary';
1972
1973
                break;
1974 35
            case 13780:
1975 7
                $ext = 'png';
1976 7
                $mime = 'image/png';
1977 7
                $type = 'binary';
1978
1979 7
                break;
1980
            default:
1981 32
                return $fallback;
1982
        }
1983
1984
        return [
1985 7
            'ext'  => $ext,
1986 7
            'mime' => $mime,
1987 7
            'type' => $type,
1988
        ];
1989
    }
1990
1991
    /**
1992
     * @param int    $length        <p>Length of the random string.</p>
1993
     * @param string $possibleChars [optional] <p>Characters string for the random selection.</p>
1994
     * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
1995
     *
1996
     * @return string
1997
     */
1998 1
    public static function get_random_string(int $length, string $possibleChars = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789', string $encoding = 'UTF-8'): string
1999
    {
2000
        // init
2001 1
        $i = 0;
2002 1
        $str = '';
2003 1
        $maxlength = self::strlen($possibleChars, $encoding);
2004
2005 1
        if ($maxlength === 0) {
2006 1
            return '';
2007
        }
2008
2009
        // add random chars
2010 1
        while ($i < $length) {
2011
            try {
2012 1
                $randInt = \random_int(0, $maxlength - 1);
2013
            } catch (\Exception $e) {
2014
                /** @noinspection RandomApiMigrationInspection */
2015
                $randInt = \mt_rand(0, $maxlength - 1);
2016
            }
2017 1
            $char = self::substr($possibleChars, $randInt, 1, $encoding);
2018 1
            $str .= $char;
2019 1
            ++$i;
2020
        }
2021
2022 1
        return $str;
2023
    }
2024
2025
    /**
2026
     * @param int|string $entropyExtra [optional] <p>Extra entropy via a string or int value.</p>
2027
     * @param bool       $md5          [optional] <p>Return the unique identifier as md5-hash? Default: true</p>
2028
     *
2029
     * @return string
2030
     */
2031 1
    public static function get_unique_string($entropyExtra = '', bool $md5 = true): string
2032
    {
2033 1
        $uniqueHelper = \random_int(0, \mt_getrandmax()) .
2034 1
                        \session_id() .
2035 1
                        ($_SERVER['REMOTE_ADDR'] ?? '') .
2036 1
                        ($_SERVER['SERVER_ADDR'] ?? '') .
2037 1
                        $entropyExtra;
2038
2039 1
        $uniqueString = \uniqid($uniqueHelper, true);
2040
2041 1
        if ($md5) {
2042 1
            $uniqueString = \md5($uniqueString . $uniqueHelper);
2043
        }
2044
2045 1
        return $uniqueString;
2046
    }
2047
2048
    /**
2049
     * alias for "UTF8::string_has_bom()"
2050
     *
2051
     * @see        UTF8::string_has_bom()
2052
     *
2053
     * @param string $str
2054
     *
2055
     * @return bool
2056
     *
2057
     * @deprecated <p>use "UTF8::string_has_bom()"</p>
2058
     */
2059 2
    public static function hasBom(string $str): bool
2060
    {
2061 2
        return self::string_has_bom($str);
2062
    }
2063
2064
    /**
2065
     * Returns true if the string contains a lower case char, false otherwise.
2066
     *
2067
     * @param string $str <p>The input string.</p>
2068
     *
2069
     * @return bool whether or not the string contains a lower case character
2070
     */
2071 47
    public static function has_lowercase(string $str): bool
2072
    {
2073 47
        return self::str_matches_pattern($str, '.*[[:lower:]]');
2074
    }
2075
2076
    /**
2077
     * Returns true if the string contains an upper case char, false otherwise.
2078
     *
2079
     * @param string $str <p>The input string.</p>
2080
     *
2081
     * @return bool whether or not the string contains an upper case character
2082
     */
2083 12
    public static function has_uppercase(string $str): bool
2084
    {
2085 12
        return self::str_matches_pattern($str, '.*[[:upper:]]');
2086
    }
2087
2088
    /**
2089
     * Converts a hexadecimal-value into an UTF-8 character.
2090
     *
2091
     * @param string $hexdec <p>The hexadecimal value.</p>
2092
     *
2093
     * @return false|string one single UTF-8 character
2094
     */
2095 4
    public static function hex_to_chr(string $hexdec)
2096
    {
2097 4
        return self::decimal_to_chr(\hexdec($hexdec));
2098
    }
2099
2100
    /**
2101
     * Converts hexadecimal U+xxxx code point representation to integer.
2102
     *
2103
     * INFO: opposite to UTF8::int_to_hex()
2104
     *
2105
     * @param string $hexDec <p>The hexadecimal code point representation.</p>
2106
     *
2107
     * @return false|int the code point, or false on failure
2108
     */
2109 2
    public static function hex_to_int($hexDec)
2110
    {
2111
        // init
2112 2
        $hexDec = (string) $hexDec;
2113
2114 2
        if ($hexDec === '') {
2115 2
            return false;
2116
        }
2117
2118 2
        if (\preg_match('/^(?:\\\u|U\+|)([a-z0-9]{4,6})$/i', $hexDec, $match)) {
2119 2
            return \intval($match[1], 16);
2120
        }
2121
2122 2
        return false;
2123
    }
2124
2125
    /**
2126
     * alias for "UTF8::html_entity_decode()"
2127
     *
2128
     * @see UTF8::html_entity_decode()
2129
     *
2130
     * @param string $str
2131
     * @param int    $flags
2132
     * @param string $encoding
2133
     *
2134
     * @return string
2135
     */
2136 4
    public static function html_decode(string $str, int $flags = null, string $encoding = 'UTF-8'): string
2137
    {
2138 4
        return self::html_entity_decode($str, $flags, $encoding);
2139
    }
2140
2141
    /**
2142
     * Converts a UTF-8 string to a series of HTML numbered entities.
2143
     *
2144
     * INFO: opposite to UTF8::html_decode()
2145
     *
2146
     * @param string $str            <p>The Unicode string to be encoded as numbered entities.</p>
2147
     * @param bool   $keepAsciiChars [optional] <p>Keep ASCII chars.</p>
2148
     * @param string $encoding       [optional] <p>Set the charset for e.g. "mb_" function</p>
2149
     *
2150
     * @return string HTML numbered entities
2151
     */
2152 13
    public static function html_encode(string $str, bool $keepAsciiChars = false, string $encoding = 'UTF-8'): string
2153
    {
2154 13
        if ($str === '') {
2155 4
            return '';
2156
        }
2157
2158 13
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
2159 4
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
2160
        }
2161
2162 13
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
2163
            self::checkForSupport();
2164
        }
2165
2166
        // INFO: http://stackoverflow.com/questions/35854535/better-explanation-of-convmap-in-mb-encode-numericentity
2167 13
        if (self::$SUPPORT['mbstring'] === true) {
2168 13
            $startCode = 0x00;
2169 13
            if ($keepAsciiChars === true) {
2170 13
                $startCode = 0x80;
2171
            }
2172
2173 13
            return \mb_encode_numericentity(
2174 13
                $str,
2175 13
                [$startCode, 0xfffff, 0, 0xfffff, 0],
2176 13
                $encoding
2177
            );
2178
        }
2179
2180
        //
2181
        // fallback via vanilla php
2182
        //
2183
2184
        return \implode(
2185
            '',
2186
            \array_map(
2187
                static function ($chr) use ($keepAsciiChars, $encoding) {
2188
                    return self::single_chr_html_encode($chr, $keepAsciiChars, $encoding);
2189
                },
2190
                self::split($str)
2191
            )
2192
        );
2193
    }
2194
2195
    /**
2196
     * UTF-8 version of html_entity_decode()
2197
     *
2198
     * The reason we are not using html_entity_decode() by itself is because
2199
     * while it is not technically correct to leave out the semicolon
2200
     * at the end of an entity most browsers will still interpret the entity
2201
     * correctly. html_entity_decode() does not convert entities without
2202
     * semicolons, so we are left with our own little solution here. Bummer.
2203
     *
2204
     * Convert all HTML entities to their applicable characters
2205
     *
2206
     * INFO: opposite to UTF8::html_encode()
2207
     *
2208
     * @see http://php.net/manual/en/function.html-entity-decode.php
2209
     *
2210
     * @param string $str      <p>
2211
     *                         The input string.
2212
     *                         </p>
2213
     * @param int    $flags    [optional] <p>
2214
     *                         A bitmask of one or more of the following flags, which specify how to handle quotes and
2215
     *                         which document type to use. The default is ENT_COMPAT | ENT_HTML401.
2216
     *                         <table>
2217
     *                         Available <i>flags</i> constants
2218
     *                         <tr valign="top">
2219
     *                         <td>Constant Name</td>
2220
     *                         <td>Description</td>
2221
     *                         </tr>
2222
     *                         <tr valign="top">
2223
     *                         <td><b>ENT_COMPAT</b></td>
2224
     *                         <td>Will convert double-quotes and leave single-quotes alone.</td>
2225
     *                         </tr>
2226
     *                         <tr valign="top">
2227
     *                         <td><b>ENT_QUOTES</b></td>
2228
     *                         <td>Will convert both double and single quotes.</td>
2229
     *                         </tr>
2230
     *                         <tr valign="top">
2231
     *                         <td><b>ENT_NOQUOTES</b></td>
2232
     *                         <td>Will leave both double and single quotes unconverted.</td>
2233
     *                         </tr>
2234
     *                         <tr valign="top">
2235
     *                         <td><b>ENT_HTML401</b></td>
2236
     *                         <td>
2237
     *                         Handle code as HTML 4.01.
2238
     *                         </td>
2239
     *                         </tr>
2240
     *                         <tr valign="top">
2241
     *                         <td><b>ENT_XML1</b></td>
2242
     *                         <td>
2243
     *                         Handle code as XML 1.
2244
     *                         </td>
2245
     *                         </tr>
2246
     *                         <tr valign="top">
2247
     *                         <td><b>ENT_XHTML</b></td>
2248
     *                         <td>
2249
     *                         Handle code as XHTML.
2250
     *                         </td>
2251
     *                         </tr>
2252
     *                         <tr valign="top">
2253
     *                         <td><b>ENT_HTML5</b></td>
2254
     *                         <td>
2255
     *                         Handle code as HTML 5.
2256
     *                         </td>
2257
     *                         </tr>
2258
     *                         </table>
2259
     *                         </p>
2260
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
2261
     *
2262
     * @return string the decoded string
2263
     */
2264 40
    public static function html_entity_decode(string $str, int $flags = null, string $encoding = 'UTF-8'): string
2265
    {
2266 40
        if ($str === '') {
2267 12
            return '';
2268
        }
2269
2270 40
        if (!isset($str[3])) { // examples: &; || &x;
2271 19
            return $str;
2272
        }
2273
2274
        if (
2275 39
            \strpos($str, '&') === false
2276
            ||
2277
            (
2278 39
                \strpos($str, '&#') === false
2279
                &&
2280 39
                \strpos($str, ';') === false
2281
            )
2282
        ) {
2283 18
            return $str;
2284
        }
2285
2286 39
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
2287 9
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
2288
        }
2289
2290 39
        if ($flags === null) {
2291 10
            $flags = \ENT_QUOTES | \ENT_HTML5;
2292
        }
2293
2294
        if (
2295 39
            $encoding !== 'UTF-8'
2296
            &&
2297 39
            $encoding !== 'ISO-8859-1'
2298
            &&
2299 39
            $encoding !== 'WINDOWS-1252'
2300
            &&
2301 39
            self::$SUPPORT['mbstring'] === false
2302
        ) {
2303
            \trigger_error('UTF8::html_entity_decode() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
2304
        }
2305
2306 39
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
2307
            self::checkForSupport();
2308
        }
2309
2310
        do {
2311 39
            $str_compare = $str;
2312
2313
            // INFO: http://stackoverflow.com/questions/35854535/better-explanation-of-convmap-in-mb-encode-numericentity
2314 39
            if (self::$SUPPORT['mbstring'] === true) {
2315 39
                $str = \mb_decode_numericentity(
2316 39
                    $str,
2317 39
                    [0x80, 0xfffff, 0, 0xfffff, 0],
2318 39
                    $encoding
2319
                );
2320
            } else {
2321
                $str = (string) \preg_replace_callback(
2322
                    "/&#\d{2,6};/",
2323
                    static function ($matches) use ($encoding) {
2324
                        // always fallback via symfony polyfill
2325
                        $returnTmp = \mb_convert_encoding($matches[0], $encoding, 'HTML-ENTITIES');
2326
2327
                        if ($returnTmp !== '"' && $returnTmp !== "'") {
2328
                            return $returnTmp;
2329
                        }
2330
2331
                        return $matches[0];
2332
                    },
2333
                    $str
2334
                );
2335
            }
2336
2337
            // decode numeric & UTF16 two byte entities
2338 39
            $str = \html_entity_decode(
2339 39
                \preg_replace('/(&#(?:x0*[0-9a-f]{2,6}(?![0-9a-f;])|(?:0*\d{2,6}(?![0-9;]))))/iS', '$1;', $str),
2340 39
                $flags,
2341 39
                $encoding
2342
            );
2343 39
        } while ($str_compare !== $str);
2344
2345 39
        return $str;
2346
    }
2347
2348
    /**
2349
     * Create a escape html version of the string via "UTF8::htmlspecialchars()".
2350
     *
2351
     * @param string $str
2352
     * @param string $encoding [optional] <p>Default: UTF-8</p>
2353
     *
2354
     * @return string
2355
     */
2356 6
    public static function html_escape(string $str, string $encoding = 'UTF-8'): string
2357
    {
2358 6
        return self::htmlspecialchars(
2359 6
            $str,
2360 6
            \ENT_QUOTES | \ENT_SUBSTITUTE,
2361 6
            $encoding
2362
        );
2363
    }
2364
2365
    /**
2366
     * Remove empty html-tag.
2367
     *
2368
     * e.g.: <tag></tag>
2369
     *
2370
     * @param string $str
2371
     *
2372
     * @return string
2373
     */
2374 1
    public static function html_stripe_empty_tags(string $str): string
2375
    {
2376 1
        return (string) \preg_replace(
2377 1
            "/<[^\/>]*>(([\s]?)*|)<\/[^>]*>/iu",
2378 1
            '',
2379 1
            $str
2380
        );
2381
    }
2382
2383
    /**
2384
     * Convert all applicable characters to HTML entities: UTF-8 version of htmlentities()
2385
     *
2386
     * @see http://php.net/manual/en/function.htmlentities.php
2387
     *
2388
     * @param string $str           <p>
2389
     *                              The input string.
2390
     *                              </p>
2391
     * @param int    $flags         [optional] <p>
2392
     *                              A bitmask of one or more of the following flags, which specify how to handle quotes,
2393
     *                              invalid code unit sequences and the used document type. The default is
2394
     *                              ENT_COMPAT | ENT_HTML401.
2395
     *                              <table>
2396
     *                              Available <i>flags</i> constants
2397
     *                              <tr valign="top">
2398
     *                              <td>Constant Name</td>
2399
     *                              <td>Description</td>
2400
     *                              </tr>
2401
     *                              <tr valign="top">
2402
     *                              <td><b>ENT_COMPAT</b></td>
2403
     *                              <td>Will convert double-quotes and leave single-quotes alone.</td>
2404
     *                              </tr>
2405
     *                              <tr valign="top">
2406
     *                              <td><b>ENT_QUOTES</b></td>
2407
     *                              <td>Will convert both double and single quotes.</td>
2408
     *                              </tr>
2409
     *                              <tr valign="top">
2410
     *                              <td><b>ENT_NOQUOTES</b></td>
2411
     *                              <td>Will leave both double and single quotes unconverted.</td>
2412
     *                              </tr>
2413
     *                              <tr valign="top">
2414
     *                              <td><b>ENT_IGNORE</b></td>
2415
     *                              <td>
2416
     *                              Silently discard invalid code unit sequences instead of returning
2417
     *                              an empty string. Using this flag is discouraged as it
2418
     *                              may have security implications.
2419
     *                              </td>
2420
     *                              </tr>
2421
     *                              <tr valign="top">
2422
     *                              <td><b>ENT_SUBSTITUTE</b></td>
2423
     *                              <td>
2424
     *                              Replace invalid code unit sequences with a Unicode Replacement Character
2425
     *                              U+FFFD (UTF-8) or &#38;#38;#FFFD; (otherwise) instead of returning an empty string.
2426
     *                              </td>
2427
     *                              </tr>
2428
     *                              <tr valign="top">
2429
     *                              <td><b>ENT_DISALLOWED</b></td>
2430
     *                              <td>
2431
     *                              Replace invalid code points for the given document type with a
2432
     *                              Unicode Replacement Character U+FFFD (UTF-8) or &#38;#38;#FFFD;
2433
     *                              (otherwise) instead of leaving them as is. This may be useful, for
2434
     *                              instance, to ensure the well-formedness of XML documents with
2435
     *                              embedded external content.
2436
     *                              </td>
2437
     *                              </tr>
2438
     *                              <tr valign="top">
2439
     *                              <td><b>ENT_HTML401</b></td>
2440
     *                              <td>
2441
     *                              Handle code as HTML 4.01.
2442
     *                              </td>
2443
     *                              </tr>
2444
     *                              <tr valign="top">
2445
     *                              <td><b>ENT_XML1</b></td>
2446
     *                              <td>
2447
     *                              Handle code as XML 1.
2448
     *                              </td>
2449
     *                              </tr>
2450
     *                              <tr valign="top">
2451
     *                              <td><b>ENT_XHTML</b></td>
2452
     *                              <td>
2453
     *                              Handle code as XHTML.
2454
     *                              </td>
2455
     *                              </tr>
2456
     *                              <tr valign="top">
2457
     *                              <td><b>ENT_HTML5</b></td>
2458
     *                              <td>
2459
     *                              Handle code as HTML 5.
2460
     *                              </td>
2461
     *                              </tr>
2462
     *                              </table>
2463
     *                              </p>
2464
     * @param string $encoding      [optional] <p>
2465
     *                              Like <b>htmlspecialchars</b>,
2466
     *                              <b>htmlentities</b> takes an optional third argument
2467
     *                              <i>encoding</i> which defines encoding used in
2468
     *                              conversion.
2469
     *                              Although this argument is technically optional, you are highly
2470
     *                              encouraged to specify the correct value for your code.
2471
     *                              </p>
2472
     * @param bool   $double_encode [optional] <p>
2473
     *                              When <i>double_encode</i> is turned off PHP will not
2474
     *                              encode existing html entities. The default is to convert everything.
2475
     *                              </p>
2476
     *
2477
     * @return string
2478
     *                <p>
2479
     *                The encoded string.
2480
     *                <br><br>
2481
     *                If the input <i>string</i> contains an invalid code unit
2482
     *                sequence within the given <i>encoding</i> an empty string
2483
     *                will be returned, unless either the <b>ENT_IGNORE</b> or
2484
     *                <b>ENT_SUBSTITUTE</b> flags are set.
2485
     *                </p>
2486
     */
2487 9
    public static function htmlentities(string $str, int $flags = \ENT_COMPAT, string $encoding = 'UTF-8', bool $double_encode = true): string
2488
    {
2489 9
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
2490 7
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
2491
        }
2492
2493 9
        $str = \htmlentities($str, $flags, $encoding, $double_encode);
2494
2495
        /**
2496
         * PHP doesn't replace a backslash to its html entity since this is something
2497
         * that's mostly used to escape characters when inserting in a database. Since
2498
         * we're using a decent database layer, we don't need this shit and we're replacing
2499
         * the double backslashes by its' html entity equivalent.
2500
         *
2501
         * https://github.com/forkcms/library/blob/master/spoon/filter/filter.php#L303
2502
         */
2503 9
        $str = \str_replace('\\', '&#92;', $str);
2504
2505 9
        return self::html_encode($str, true, $encoding);
2506
    }
2507
2508
    /**
2509
     * Convert only special characters to HTML entities: UTF-8 version of htmlspecialchars()
2510
     *
2511
     * INFO: Take a look at "UTF8::htmlentities()"
2512
     *
2513
     * @see http://php.net/manual/en/function.htmlspecialchars.php
2514
     *
2515
     * @param string $str           <p>
2516
     *                              The string being converted.
2517
     *                              </p>
2518
     * @param int    $flags         [optional] <p>
2519
     *                              A bitmask of one or more of the following flags, which specify how to handle quotes,
2520
     *                              invalid code unit sequences and the used document type. The default is
2521
     *                              ENT_COMPAT | ENT_HTML401.
2522
     *                              <table>
2523
     *                              Available <i>flags</i> constants
2524
     *                              <tr valign="top">
2525
     *                              <td>Constant Name</td>
2526
     *                              <td>Description</td>
2527
     *                              </tr>
2528
     *                              <tr valign="top">
2529
     *                              <td><b>ENT_COMPAT</b></td>
2530
     *                              <td>Will convert double-quotes and leave single-quotes alone.</td>
2531
     *                              </tr>
2532
     *                              <tr valign="top">
2533
     *                              <td><b>ENT_QUOTES</b></td>
2534
     *                              <td>Will convert both double and single quotes.</td>
2535
     *                              </tr>
2536
     *                              <tr valign="top">
2537
     *                              <td><b>ENT_NOQUOTES</b></td>
2538
     *                              <td>Will leave both double and single quotes unconverted.</td>
2539
     *                              </tr>
2540
     *                              <tr valign="top">
2541
     *                              <td><b>ENT_IGNORE</b></td>
2542
     *                              <td>
2543
     *                              Silently discard invalid code unit sequences instead of returning
2544
     *                              an empty string. Using this flag is discouraged as it
2545
     *                              may have security implications.
2546
     *                              </td>
2547
     *                              </tr>
2548
     *                              <tr valign="top">
2549
     *                              <td><b>ENT_SUBSTITUTE</b></td>
2550
     *                              <td>
2551
     *                              Replace invalid code unit sequences with a Unicode Replacement Character
2552
     *                              U+FFFD (UTF-8) or &#38;#38;#FFFD; (otherwise) instead of returning an empty string.
2553
     *                              </td>
2554
     *                              </tr>
2555
     *                              <tr valign="top">
2556
     *                              <td><b>ENT_DISALLOWED</b></td>
2557
     *                              <td>
2558
     *                              Replace invalid code points for the given document type with a
2559
     *                              Unicode Replacement Character U+FFFD (UTF-8) or &#38;#38;#FFFD;
2560
     *                              (otherwise) instead of leaving them as is. This may be useful, for
2561
     *                              instance, to ensure the well-formedness of XML documents with
2562
     *                              embedded external content.
2563
     *                              </td>
2564
     *                              </tr>
2565
     *                              <tr valign="top">
2566
     *                              <td><b>ENT_HTML401</b></td>
2567
     *                              <td>
2568
     *                              Handle code as HTML 4.01.
2569
     *                              </td>
2570
     *                              </tr>
2571
     *                              <tr valign="top">
2572
     *                              <td><b>ENT_XML1</b></td>
2573
     *                              <td>
2574
     *                              Handle code as XML 1.
2575
     *                              </td>
2576
     *                              </tr>
2577
     *                              <tr valign="top">
2578
     *                              <td><b>ENT_XHTML</b></td>
2579
     *                              <td>
2580
     *                              Handle code as XHTML.
2581
     *                              </td>
2582
     *                              </tr>
2583
     *                              <tr valign="top">
2584
     *                              <td><b>ENT_HTML5</b></td>
2585
     *                              <td>
2586
     *                              Handle code as HTML 5.
2587
     *                              </td>
2588
     *                              </tr>
2589
     *                              </table>
2590
     *                              </p>
2591
     * @param string $encoding      [optional] <p>
2592
     *                              Defines encoding used in conversion.
2593
     *                              </p>
2594
     *                              <p>
2595
     *                              For the purposes of this function, the encodings
2596
     *                              ISO-8859-1, ISO-8859-15,
2597
     *                              UTF-8, cp866,
2598
     *                              cp1251, cp1252, and
2599
     *                              KOI8-R are effectively equivalent, provided the
2600
     *                              <i>string</i> itself is valid for the encoding, as
2601
     *                              the characters affected by <b>htmlspecialchars</b> occupy
2602
     *                              the same positions in all of these encodings.
2603
     *                              </p>
2604
     * @param bool   $double_encode [optional] <p>
2605
     *                              When <i>double_encode</i> is turned off PHP will not
2606
     *                              encode existing html entities, the default is to convert everything.
2607
     *                              </p>
2608
     *
2609
     * @return string the converted string.
2610
     *                </p>
2611
     *                <p>
2612
     *                If the input <i>string</i> contains an invalid code unit
2613
     *                sequence within the given <i>encoding</i> an empty string
2614
     *                will be returned, unless either the <b>ENT_IGNORE</b> or
2615
     *                <b>ENT_SUBSTITUTE</b> flags are set
2616
     */
2617 8
    public static function htmlspecialchars(string $str, int $flags = \ENT_COMPAT, string $encoding = 'UTF-8', bool $double_encode = true): string
2618
    {
2619 8
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
2620 8
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
2621
        }
2622
2623 8
        return \htmlspecialchars($str, $flags, $encoding, $double_encode);
2624
    }
2625
2626
    /**
2627
     * Checks whether iconv is available on the server.
2628
     *
2629
     * @return bool
2630
     *              <strong>true</strong> if available, <strong>false</strong> otherwise
2631
     */
2632
    public static function iconv_loaded(): bool
2633
    {
2634
        return \extension_loaded('iconv');
2635
    }
2636
2637
    /**
2638
     * alias for "UTF8::decimal_to_chr()"
2639
     *
2640
     * @see UTF8::decimal_to_chr()
2641
     *
2642
     * @param mixed $int
2643
     *
2644
     * @return string
2645
     */
2646 4
    public static function int_to_chr($int): string
2647
    {
2648 4
        return self::decimal_to_chr($int);
2649
    }
2650
2651
    /**
2652
     * Converts Integer to hexadecimal U+xxxx code point representation.
2653
     *
2654
     * INFO: opposite to UTF8::hex_to_int()
2655
     *
2656
     * @param int    $int  <p>The integer to be converted to hexadecimal code point.</p>
2657
     * @param string $pfix [optional]
2658
     *
2659
     * @return string the code point, or empty string on failure
2660
     */
2661 6
    public static function int_to_hex(int $int, string $pfix = 'U+'): string
2662
    {
2663 6
        $hex = \dechex($int);
2664
2665 6
        $hex = (\strlen($hex) < 4 ? \substr('0000' . $hex, -4) : $hex);
2666
2667 6
        return $pfix . $hex . '';
2668
    }
2669
2670
    /**
2671
     * Checks whether intl-char is available on the server.
2672
     *
2673
     * @return bool
2674
     *              <strong>true</strong> if available, <strong>false</strong> otherwise
2675
     */
2676
    public static function intlChar_loaded(): bool
2677
    {
2678
        return \class_exists('IntlChar');
2679
    }
2680
2681
    /**
2682
     * Checks whether intl is available on the server.
2683
     *
2684
     * @return bool
2685
     *              <strong>true</strong> if available, <strong>false</strong> otherwise
2686
     */
2687 5
    public static function intl_loaded(): bool
2688
    {
2689 5
        return \extension_loaded('intl');
2690
    }
2691
2692
    /**
2693
     * alias for "UTF8::is_ascii()"
2694
     *
2695
     * @see        UTF8::is_ascii()
2696
     *
2697
     * @param string $str
2698
     *
2699
     * @return bool
2700
     *
2701
     * @deprecated <p>use "UTF8::is_ascii()"</p>
2702
     */
2703 2
    public static function isAscii(string $str): bool
2704
    {
2705 2
        return self::is_ascii($str);
2706
    }
2707
2708
    /**
2709
     * alias for "UTF8::is_base64()"
2710
     *
2711
     * @see        UTF8::is_base64()
2712
     *
2713
     * @param string $str
2714
     *
2715
     * @return bool
2716
     *
2717
     * @deprecated <p>use "UTF8::is_base64()"</p>
2718
     */
2719 2
    public static function isBase64($str): bool
2720
    {
2721 2
        return self::is_base64($str);
2722
    }
2723
2724
    /**
2725
     * alias for "UTF8::is_binary()"
2726
     *
2727
     * @see        UTF8::is_binary()
2728
     *
2729
     * @param mixed $str
2730
     * @param bool  $strict
2731
     *
2732
     * @return bool
2733
     *
2734
     * @deprecated <p>use "UTF8::is_binary()"</p>
2735
     */
2736 4
    public static function isBinary($str, $strict = false): bool
2737
    {
2738 4
        return self::is_binary($str, $strict);
2739
    }
2740
2741
    /**
2742
     * alias for "UTF8::is_bom()"
2743
     *
2744
     * @see        UTF8::is_bom()
2745
     *
2746
     * @param string $utf8_chr
2747
     *
2748
     * @return bool
2749
     *
2750
     * @deprecated <p>use "UTF8::is_bom()"</p>
2751
     */
2752 2
    public static function isBom(string $utf8_chr): bool
2753
    {
2754 2
        return self::is_bom($utf8_chr);
2755
    }
2756
2757
    /**
2758
     * alias for "UTF8::is_html()"
2759
     *
2760
     * @see        UTF8::is_html()
2761
     *
2762
     * @param string $str
2763
     *
2764
     * @return bool
2765
     *
2766
     * @deprecated <p>use "UTF8::is_html()"</p>
2767
     */
2768 2
    public static function isHtml(string $str): bool
2769
    {
2770 2
        return self::is_html($str);
2771
    }
2772
2773
    /**
2774
     * alias for "UTF8::is_json()"
2775
     *
2776
     * @see        UTF8::is_json()
2777
     *
2778
     * @param string $str
2779
     *
2780
     * @return bool
2781
     *
2782
     * @deprecated <p>use "UTF8::is_json()"</p>
2783
     */
2784
    public static function isJson(string $str): bool
2785
    {
2786
        return self::is_json($str);
2787
    }
2788
2789
    /**
2790
     * alias for "UTF8::is_utf16()"
2791
     *
2792
     * @see        UTF8::is_utf16()
2793
     *
2794
     * @param mixed $str
2795
     *
2796
     * @return false|int
2797
     *                   <strong>false</strong> if is't not UTF16,<br>
2798
     *                   <strong>1</strong> for UTF-16LE,<br>
2799
     *                   <strong>2</strong> for UTF-16BE
2800
     *
2801
     * @deprecated <p>use "UTF8::is_utf16()"</p>
2802
     */
2803 2
    public static function isUtf16($str)
2804
    {
2805 2
        return self::is_utf16($str);
2806
    }
2807
2808
    /**
2809
     * alias for "UTF8::is_utf32()"
2810
     *
2811
     * @see        UTF8::is_utf32()
2812
     *
2813
     * @param mixed $str
2814
     *
2815
     * @return false|int
2816
     *                   <strong>false</strong> if is't not UTF16,
2817
     *                   <strong>1</strong> for UTF-32LE,
2818
     *                   <strong>2</strong> for UTF-32BE
2819
     *
2820
     * @deprecated <p>use "UTF8::is_utf32()"</p>
2821
     */
2822 2
    public static function isUtf32($str)
2823
    {
2824 2
        return self::is_utf32($str);
2825
    }
2826
2827
    /**
2828
     * alias for "UTF8::is_utf8()"
2829
     *
2830
     * @see        UTF8::is_utf8()
2831
     *
2832
     * @param string $str
2833
     * @param bool   $strict
2834
     *
2835
     * @return bool
2836
     *
2837
     * @deprecated <p>use "UTF8::is_utf8()"</p>
2838
     */
2839 17
    public static function isUtf8($str, $strict = false): bool
2840
    {
2841 17
        return self::is_utf8($str, $strict);
2842
    }
2843
2844
    /**
2845
     * Returns true if the string contains only alphabetic chars, false otherwise.
2846
     *
2847
     * @param string $str
2848
     *
2849
     * @return bool
2850
     *              Whether or not $str contains only alphabetic chars
2851
     */
2852 10
    public static function is_alpha(string $str): bool
2853
    {
2854 10
        return self::str_matches_pattern($str, '^[[:alpha:]]*$');
2855
    }
2856
2857
    /**
2858
     * Returns true if the string contains only alphabetic and numeric chars, false otherwise.
2859
     *
2860
     * @param string $str
2861
     *
2862
     * @return bool
2863
     *              Whether or not $str contains only alphanumeric chars
2864
     */
2865 13
    public static function is_alphanumeric(string $str): bool
2866
    {
2867 13
        return self::str_matches_pattern($str, '^[[:alnum:]]*$');
2868
    }
2869
2870
    /**
2871
     * Checks if a string is 7 bit ASCII.
2872
     *
2873
     * @param string $str <p>The string to check.</p>
2874
     *
2875
     * @return bool
2876
     *              <strong>true</strong> if it is ASCII<br>
2877
     *              <strong>false</strong> otherwise
2878
     */
2879 201
    public static function is_ascii(string $str): bool
2880
    {
2881 201
        if ($str === '') {
2882 10
            return true;
2883
        }
2884
2885 200
        return !\preg_match('/[^\x09\x10\x13\x0A\x0D\x20-\x7E]/', $str);
2886
    }
2887
2888
    /**
2889
     * Returns true if the string is base64 encoded, false otherwise.
2890
     *
2891
     * @param string $str <p>The input string.</p>
2892
     *
2893
     * @return bool whether or not $str is base64 encoded
2894
     */
2895 9
    public static function is_base64($str): bool
2896
    {
2897 9
        if ($str === '') {
2898 3
            return false;
2899
        }
2900
2901 8
        if (\is_string($str) === false) {
0 ignored issues
show
introduced by
The condition is_string($str) === false is always false.
Loading history...
2902 2
            return false;
2903
        }
2904
2905 8
        $base64String = (string) \base64_decode($str, true);
2906
2907 8
        return $base64String && \base64_encode($base64String) === $str;
2908
    }
2909
2910
    /**
2911
     * Check if the input is binary... (is look like a hack).
2912
     *
2913
     * @param mixed $input
2914
     * @param bool  $strict
2915
     *
2916
     * @return bool
2917
     */
2918 39
    public static function is_binary($input, bool $strict = false): bool
2919
    {
2920 39
        $input = (string) $input;
2921 39
        if ($input === '') {
2922 10
            return false;
2923
        }
2924
2925 39
        if (\preg_match('~^[01]+$~', $input)) {
2926 12
            return true;
2927
        }
2928
2929 39
        $ext = self::get_file_type($input);
2930 39
        if ($ext['type'] === 'binary') {
2931 7
            return true;
2932
        }
2933
2934 36
        $testLength = self::strlen_in_byte($input);
2935 36
        if ($testLength) {
2936 36
            if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
2937
                self::checkForSupport();
2938
            }
2939
2940 36
            $testNull = self::substr_count_in_byte($input, "\x0", 0, $testLength);
2941 36
            if (($testNull / $testLength) > 0.256) {
2942 12
                return true;
2943
            }
2944
        }
2945
2946 34
        if ($strict === true) {
2947 34
            if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
2948
                self::checkForSupport();
2949
            }
2950
2951 34
            if (self::$SUPPORT['finfo'] === false) {
2952
                throw new \RuntimeException('ext-fileinfo: is not installed');
2953
            }
2954
2955
            /** @noinspection PhpComposerExtensionStubsInspection */
2956 34
            $finfo = new \finfo(\FILEINFO_MIME_ENCODING);
2957 34
            $finfo_encoding = $finfo->buffer($input);
2958 34
            if ($finfo_encoding && $finfo_encoding === 'binary') {
2959 14
                return true;
2960
            }
2961
        }
2962
2963 30
        return false;
2964
    }
2965
2966
    /**
2967
     * Check if the file is binary.
2968
     *
2969
     * @param string $file
2970
     *
2971
     * @return bool
2972
     */
2973 6
    public static function is_binary_file($file): bool
2974
    {
2975
        // init
2976 6
        $block = '';
2977
2978 6
        $fp = \fopen($file, 'rb');
2979 6
        if (\is_resource($fp)) {
2980 6
            $block = \fread($fp, 512);
2981 6
            \fclose($fp);
2982
        }
2983
2984 6
        if ($block === '') {
2985 2
            return false;
2986
        }
2987
2988 6
        return self::is_binary($block, true);
2989
    }
2990
2991
    /**
2992
     * Returns true if the string contains only whitespace chars, false otherwise.
2993
     *
2994
     * @param string $str
2995
     *
2996
     * @return bool
2997
     *              Whether or not $str contains only whitespace characters
2998
     */
2999 15
    public static function is_blank(string $str): bool
3000
    {
3001 15
        return self::str_matches_pattern($str, '^[[:space:]]*$');
3002
    }
3003
3004
    /**
3005
     * Checks if the given string is equal to any "Byte Order Mark".
3006
     *
3007
     * WARNING: Use "UTF8::string_has_bom()" if you will check BOM in a string.
3008
     *
3009
     * @param string $str <p>The input string.</p>
3010
     *
3011
     * @return bool
3012
     *              <strong>true</strong> if the $utf8_chr is Byte Order Mark, <strong>false</strong> otherwise
3013
     */
3014 2
    public static function is_bom($str): bool
3015
    {
3016 2
        foreach (self::$BOM as $bomString => $bomByteLength) {
3017 2
            if ($str === $bomString) {
3018 2
                return true;
3019
            }
3020
        }
3021
3022 2
        return false;
3023
    }
3024
3025
    /**
3026
     * Determine whether the string is considered to be empty.
3027
     *
3028
     * A variable is considered empty if it does not exist or if its value equals FALSE.
3029
     * empty() does not generate a warning if the variable does not exist.
3030
     *
3031
     * @param mixed $str
3032
     *
3033
     * @return bool whether or not $str is empty()
3034
     */
3035
    public static function is_empty($str): bool
3036
    {
3037
        return empty($str);
3038
    }
3039
3040
    /**
3041
     * Returns true if the string contains only hexadecimal chars, false otherwise.
3042
     *
3043
     * @param string $str
3044
     *
3045
     * @return bool
3046
     *              Whether or not $str contains only hexadecimal chars
3047
     */
3048 13
    public static function is_hexadecimal(string $str): bool
3049
    {
3050 13
        return self::str_matches_pattern($str, '^[[:xdigit:]]*$');
3051
    }
3052
3053
    /**
3054
     * Check if the string contains any html-tags <lall>.
3055
     *
3056
     * @param string $str <p>The input string.</p>
3057
     *
3058
     * @return bool
3059
     */
3060 3
    public static function is_html(string $str): bool
3061
    {
3062 3
        if ($str === '') {
3063 3
            return false;
3064
        }
3065
3066
        // init
3067 3
        $matches = [];
3068
3069 3
        \preg_match("/<\/?\w+(?:(?:\s+\w+(?:\s*=\s*(?:\".*?\"|'.*?'|[^'\">\s]+))?)*+\s*|\s*)\/?>/", $str, $matches);
3070
3071 3
        return \count($matches) !== 0;
3072
    }
3073
3074
    /**
3075
     * Try to check if "$str" is an json-string.
3076
     *
3077
     * @param string $str <p>The input string.</p>
3078
     *
3079
     * @return bool
3080
     */
3081 22
    public static function is_json(string $str): bool
3082
    {
3083 22
        if ($str === '') {
3084 3
            return false;
3085
        }
3086
3087 21
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
3088
            self::checkForSupport();
3089
        }
3090
3091 21
        if (self::$SUPPORT['json'] === false) {
3092
            throw new \RuntimeException('ext-json: is not installed');
3093
        }
3094
3095 21
        $json = self::json_decode($str);
3096
3097
        /** @noinspection PhpComposerExtensionStubsInspection */
3098
        return (
3099 21
                   \is_object($json) === true
3100
                   ||
3101 21
                   \is_array($json) === true
3102
               )
3103
               &&
3104 21
               \json_last_error() === \JSON_ERROR_NONE;
3105
    }
3106
3107
    /**
3108
     * @param string $str
3109
     *
3110
     * @return bool
3111
     */
3112 8
    public static function is_lowercase(string $str): bool
3113
    {
3114 8
        return self::str_matches_pattern($str, '^[[:lower:]]*$');
3115
    }
3116
3117
    /**
3118
     * Returns true if the string is serialized, false otherwise.
3119
     *
3120
     * @param string $str
3121
     *
3122
     * @return bool whether or not $str is serialized
3123
     */
3124 7
    public static function is_serialized(string $str): bool
3125
    {
3126 7
        if ($str === '') {
3127 1
            return false;
3128
        }
3129
3130
        /** @noinspection PhpUsageOfSilenceOperatorInspection */
3131
        /** @noinspection UnserializeExploitsInspection */
3132 6
        return $str === 'b:0;'
3133
               ||
3134 6
               @\unserialize($str) !== false;
3135
    }
3136
3137
    /**
3138
     * Returns true if the string contains only lower case chars, false
3139
     * otherwise.
3140
     *
3141
     * @param string $str <p>The input string.</p>
3142
     *
3143
     * @return bool
3144
     *              Whether or not $str contains only lower case characters
3145
     */
3146 8
    public static function is_uppercase(string $str): bool
3147
    {
3148 8
        return self::str_matches_pattern($str, '^[[:upper:]]*$');
3149
    }
3150
3151
    /**
3152
     * Check if the string is UTF-16.
3153
     *
3154
     * @param mixed $str                   <p>The input string.</p>
3155
     * @param bool  $checkIfStringIsBinary
3156
     *
3157
     * @return false|int
3158
     *                   <strong>false</strong> if is't not UTF-16,<br>
3159
     *                   <strong>1</strong> for UTF-16LE,<br>
3160
     *                   <strong>2</strong> for UTF-16BE
3161
     */
3162 21
    public static function is_utf16($str, $checkIfStringIsBinary = true)
3163
    {
3164
        // init
3165 21
        $str = (string) $str;
3166 21
        $strChars = [];
3167
3168
        if (
3169 21
            $checkIfStringIsBinary === true
3170
            &&
3171 21
            self::is_binary($str, true) === false
3172
        ) {
3173 2
            return false;
3174
        }
3175
3176 21
        if (self::$SUPPORT['mbstring'] === false) {
3177 2
            \trigger_error('UTF8::is_utf16() without mbstring may did not work correctly', \E_USER_WARNING);
3178
        }
3179
3180 21
        $str = self::remove_bom($str);
3181
3182 21
        $maybeUTF16LE = 0;
3183 21
        $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-16LE');
3184 21
        if ($test) {
3185 15
            $test2 = \mb_convert_encoding($test, 'UTF-16LE', 'UTF-8');
3186 15
            $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-16LE');
3187 15
            if ($test3 === $test) {
3188 15
                if (\count($strChars) === 0) {
3189 15
                    $strChars = self::count_chars($str, true);
3190
                }
3191 15
                foreach (self::count_chars($test3, true) as $test3char => $test3charEmpty) {
3192 15
                    if (\in_array($test3char, $strChars, true) === true) {
3193 15
                        ++$maybeUTF16LE;
3194
                    }
3195
                }
3196
            }
3197
        }
3198
3199 21
        $maybeUTF16BE = 0;
3200 21
        $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-16BE');
3201 21
        if ($test) {
3202 15
            $test2 = \mb_convert_encoding($test, 'UTF-16BE', 'UTF-8');
3203 15
            $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-16BE');
3204 15
            if ($test3 === $test) {
3205 15
                if (\count($strChars) === 0) {
3206 7
                    $strChars = self::count_chars($str, true);
3207
                }
3208 15
                foreach (self::count_chars($test3, true) as $test3char => $test3charEmpty) {
3209 15
                    if (\in_array($test3char, $strChars, true) === true) {
3210 15
                        ++$maybeUTF16BE;
3211
                    }
3212
                }
3213
            }
3214
        }
3215
3216 21
        if ($maybeUTF16BE !== $maybeUTF16LE) {
3217 6
            if ($maybeUTF16LE > $maybeUTF16BE) {
3218 4
                return 1;
3219
            }
3220
3221 6
            return 2;
3222
        }
3223
3224 17
        return false;
3225
    }
3226
3227
    /**
3228
     * Check if the string is UTF-32.
3229
     *
3230
     * @param mixed $str                   <p>The input string.</p>
3231
     * @param bool  $checkIfStringIsBinary
3232
     *
3233
     * @return false|int
3234
     *                   <strong>false</strong> if is't not UTF-32,<br>
3235
     *                   <strong>1</strong> for UTF-32LE,<br>
3236
     *                   <strong>2</strong> for UTF-32BE
3237
     */
3238 17
    public static function is_utf32($str, $checkIfStringIsBinary = true)
3239
    {
3240
        // init
3241 17
        $str = (string) $str;
3242 17
        $strChars = [];
3243
3244
        if (
3245 17
            $checkIfStringIsBinary === true
3246
            &&
3247 17
            self::is_binary($str, true) === false
3248
        ) {
3249 2
            return false;
3250
        }
3251
3252 17
        if (self::$SUPPORT['mbstring'] === false) {
3253 2
            \trigger_error('UTF8::is_utf32() without mbstring may did not work correctly', \E_USER_WARNING);
3254
        }
3255
3256 17
        $str = self::remove_bom($str);
3257
3258 17
        $maybeUTF32LE = 0;
3259 17
        $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-32LE');
3260 17
        if ($test) {
3261 11
            $test2 = \mb_convert_encoding($test, 'UTF-32LE', 'UTF-8');
3262 11
            $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-32LE');
3263 11
            if ($test3 === $test) {
3264 11
                if (\count($strChars) === 0) {
3265 11
                    $strChars = self::count_chars($str, true);
3266
                }
3267 11
                foreach (self::count_chars($test3, true) as $test3char => $test3charEmpty) {
3268 11
                    if (\in_array($test3char, $strChars, true) === true) {
3269 11
                        ++$maybeUTF32LE;
3270
                    }
3271
                }
3272
            }
3273
        }
3274
3275 17
        $maybeUTF32BE = 0;
3276 17
        $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-32BE');
3277 17
        if ($test) {
3278 11
            $test2 = \mb_convert_encoding($test, 'UTF-32BE', 'UTF-8');
3279 11
            $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-32BE');
3280 11
            if ($test3 === $test) {
3281 11
                if (\count($strChars) === 0) {
3282 7
                    $strChars = self::count_chars($str, true);
3283
                }
3284 11
                foreach (self::count_chars($test3, true) as $test3char => $test3charEmpty) {
3285 11
                    if (\in_array($test3char, $strChars, true) === true) {
3286 11
                        ++$maybeUTF32BE;
3287
                    }
3288
                }
3289
            }
3290
        }
3291
3292 17
        if ($maybeUTF32BE !== $maybeUTF32LE) {
3293 2
            if ($maybeUTF32LE > $maybeUTF32BE) {
3294 2
                return 1;
3295
            }
3296
3297 2
            return 2;
3298
        }
3299
3300 17
        return false;
3301
    }
3302
3303
    /**
3304
     * Checks whether the passed string contains only byte sequences that appear valid UTF-8 characters.
3305
     *
3306
     * @see    http://hsivonen.iki.fi/php-utf8/
3307
     *
3308
     * @param string|string[] $str    <p>The string to be checked.</p>
3309
     * @param bool            $strict <p>Check also if the string is not UTF-16 or UTF-32.</p>
3310
     *
3311
     * @return bool
3312
     */
3313 106
    public static function is_utf8($str, bool $strict = false): bool
3314
    {
3315 106
        if (\is_array($str) === true) {
3316 2
            foreach ($str as $v) {
3317 2
                if (self::is_utf8($v, $strict) === false) {
3318 2
                    return false;
3319
                }
3320
            }
3321
3322
            return true;
3323
        }
3324
3325 106
        if ($str === '') {
3326 12
            return true;
3327
        }
3328
3329 102
        if ($strict === true) {
3330 2
            $isBinary = self::is_binary($str, true);
3331
3332 2
            if ($isBinary && self::is_utf16($str, false) !== false) {
3333 2
                return false;
3334
            }
3335
3336
            if ($isBinary && self::is_utf32($str, false) !== false) {
3337
                return false;
3338
            }
3339
        }
3340
3341 102
        if (self::pcre_utf8_support() !== true) {
3342
3343
            // If even just the first character can be matched, when the /u
3344
            // modifier is used, then it's valid UTF-8. If the UTF-8 is somehow
3345
            // invalid, nothing at all will match, even if the string contains
3346
            // some valid sequences
3347
            return \preg_match('/^.{1}/us', $str, $ar) === 1;
3348
        }
3349
3350 102
        $mState = 0; // cached expected number of octets after the current octet
3351
        // until the beginning of the next UTF8 character sequence
3352 102
        $mUcs4 = 0; // cached Unicode character
3353 102
        $mBytes = 1; // cached expected number of octets in the current sequence
3354
3355 102
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
3356
            self::checkForSupport();
3357
        }
3358
3359 102
        if (self::$ORD === null) {
3360
            self::$ORD = self::getData('ord');
3361
        }
3362
3363 102
        $len = self::strlen_in_byte((string) $str);
3364
        /** @noinspection ForeachInvariantsInspection */
3365 102
        for ($i = 0; $i < $len; ++$i) {
3366 102
            $in = self::$ORD[$str[$i]];
3367 102
            if ($mState === 0) {
3368
                // When mState is zero we expect either a US-ASCII character or a
3369
                // multi-octet sequence.
3370 102
                if ((0x80 & $in) === 0) {
3371
                    // US-ASCII, pass straight through.
3372 98
                    $mBytes = 1;
3373 83
                } elseif ((0xE0 & $in) === 0xC0) {
3374
                    // First octet of 2 octet sequence.
3375 74
                    $mUcs4 = $in;
3376 74
                    $mUcs4 = ($mUcs4 & 0x1F) << 6;
3377 74
                    $mState = 1;
3378 74
                    $mBytes = 2;
3379 58
                } elseif ((0xF0 & $in) === 0xE0) {
3380
                    // First octet of 3 octet sequence.
3381 41
                    $mUcs4 = $in;
3382 41
                    $mUcs4 = ($mUcs4 & 0x0F) << 12;
3383 41
                    $mState = 2;
3384 41
                    $mBytes = 3;
3385 30
                } elseif ((0xF8 & $in) === 0xF0) {
3386
                    // First octet of 4 octet sequence.
3387 19
                    $mUcs4 = $in;
3388 19
                    $mUcs4 = ($mUcs4 & 0x07) << 18;
3389 19
                    $mState = 3;
3390 19
                    $mBytes = 4;
3391 13
                } elseif ((0xFC & $in) === 0xF8) {
3392
                    /* First octet of 5 octet sequence.
3393
                     *
3394
                     * This is illegal because the encoded codepoint must be either
3395
                     * (a) not the shortest form or
3396
                     * (b) outside the Unicode range of 0-0x10FFFF.
3397
                     * Rather than trying to resynchronize, we will carry on until the end
3398
                     * of the sequence and let the later error handling code catch it.
3399
                     */
3400 5
                    $mUcs4 = $in;
3401 5
                    $mUcs4 = ($mUcs4 & 0x03) << 24;
3402 5
                    $mState = 4;
3403 5
                    $mBytes = 5;
3404 10
                } elseif ((0xFE & $in) === 0xFC) {
3405
                    // First octet of 6 octet sequence, see comments for 5 octet sequence.
3406 5
                    $mUcs4 = $in;
3407 5
                    $mUcs4 = ($mUcs4 & 1) << 30;
3408 5
                    $mState = 5;
3409 5
                    $mBytes = 6;
3410
                } else {
3411
                    // Current octet is neither in the US-ASCII range nor a legal first
3412
                    // octet of a multi-octet sequence.
3413 102
                    return false;
3414
                }
3415
            } else {
3416
                // When mState is non-zero, we expect a continuation of the multi-octet
3417
                // sequence
3418 83
                if ((0xC0 & $in) === 0x80) {
3419
                    // Legal continuation.
3420 75
                    $shift = ($mState - 1) * 6;
3421 75
                    $tmp = $in;
3422 75
                    $tmp = ($tmp & 0x0000003F) << $shift;
3423 75
                    $mUcs4 |= $tmp;
3424
                    // Prefix: End of the multi-octet sequence. mUcs4 now contains the final
3425
                    // Unicode code point to be output.
3426 75
                    if (--$mState === 0) {
3427
                        // Check for illegal sequences and code points.
3428
                        //
3429
                        // From Unicode 3.1, non-shortest form is illegal
3430
                        if (
3431 75
                            ($mBytes === 2 && $mUcs4 < 0x0080)
3432
                            ||
3433 75
                            ($mBytes === 3 && $mUcs4 < 0x0800)
3434
                            ||
3435 75
                            ($mBytes === 4 && $mUcs4 < 0x10000)
3436
                            ||
3437 75
                            ($mBytes > 4)
3438
                            ||
3439
                            // From Unicode 3.2, surrogate characters are illegal.
3440 75
                            (($mUcs4 & 0xFFFFF800) === 0xD800)
3441
                            ||
3442
                            // Code points outside the Unicode range are illegal.
3443 75
                            ($mUcs4 > 0x10FFFF)
3444
                        ) {
3445 8
                            return false;
3446
                        }
3447
                        // initialize UTF8 cache
3448 75
                        $mState = 0;
3449 75
                        $mUcs4 = 0;
3450 75
                        $mBytes = 1;
3451
                    }
3452
                } else {
3453
                    // ((0xC0 & (*in) != 0x80) && (mState != 0))
3454
                    // Incomplete multi-octet sequence.
3455 36
                    return false;
3456
                }
3457
            }
3458
        }
3459
3460 66
        return true;
3461
    }
3462
3463
    /**
3464
     * (PHP 5 &gt;= 5.2.0, PECL json &gt;= 1.2.0)<br/>
3465
     * Decodes a JSON string
3466
     *
3467
     * @see http://php.net/manual/en/function.json-decode.php
3468
     *
3469
     * @param string $json    <p>
3470
     *                        The <i>json</i> string being decoded.
3471
     *                        </p>
3472
     *                        <p>
3473
     *                        This function only works with UTF-8 encoded strings.
3474
     *                        </p>
3475
     *                        <p>PHP implements a superset of
3476
     *                        JSON - it will also encode and decode scalar types and <b>NULL</b>. The JSON standard
3477
     *                        only supports these values when they are nested inside an array or an object.
3478
     *                        </p>
3479
     * @param bool   $assoc   [optional] <p>
3480
     *                        When <b>TRUE</b>, returned objects will be converted into
3481
     *                        associative arrays.
3482
     *                        </p>
3483
     * @param int    $depth   [optional] <p>
3484
     *                        User specified recursion depth.
3485
     *                        </p>
3486
     * @param int    $options [optional] <p>
3487
     *                        Bitmask of JSON decode options. Currently only
3488
     *                        <b>JSON_BIGINT_AS_STRING</b>
3489
     *                        is supported (default is to cast large integers as floats)
3490
     *                        </p>
3491
     *
3492
     * @return mixed
3493
     *               The value encoded in <i>json</i> in appropriate PHP type. Values true, false and
3494
     *               null (case-insensitive) are returned as <b>TRUE</b>, <b>FALSE</b> and <b>NULL</b> respectively.
3495
     *               <b>NULL</b> is returned if the <i>json</i> cannot be decoded or if the encoded data
3496
     *               is deeper than the recursion limit.
3497
     */
3498 24
    public static function json_decode(string $json, bool $assoc = false, int $depth = 512, int $options = 0)
3499
    {
3500 24
        $json = self::filter($json);
3501
3502 24
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
3503
            self::checkForSupport();
3504
        }
3505
3506 24
        if (self::$SUPPORT['json'] === false) {
3507
            throw new \RuntimeException('ext-json: is not installed');
3508
        }
3509
3510
        /** @noinspection PhpComposerExtensionStubsInspection */
3511 24
        return \json_decode($json, $assoc, $depth, $options);
3512
    }
3513
3514
    /**
3515
     * (PHP 5 &gt;= 5.2.0, PECL json &gt;= 1.2.0)<br/>
3516
     * Returns the JSON representation of a value.
3517
     *
3518
     * @see http://php.net/manual/en/function.json-encode.php
3519
     *
3520
     * @param mixed $value   <p>
3521
     *                       The <i>value</i> being encoded. Can be any type except
3522
     *                       a resource.
3523
     *                       </p>
3524
     *                       <p>
3525
     *                       All string data must be UTF-8 encoded.
3526
     *                       </p>
3527
     *                       <p>PHP implements a superset of
3528
     *                       JSON - it will also encode and decode scalar types and <b>NULL</b>. The JSON standard
3529
     *                       only supports these values when they are nested inside an array or an object.
3530
     *                       </p>
3531
     * @param int   $options [optional] <p>
3532
     *                       Bitmask consisting of <b>JSON_HEX_QUOT</b>,
3533
     *                       <b>JSON_HEX_TAG</b>,
3534
     *                       <b>JSON_HEX_AMP</b>,
3535
     *                       <b>JSON_HEX_APOS</b>,
3536
     *                       <b>JSON_NUMERIC_CHECK</b>,
3537
     *                       <b>JSON_PRETTY_PRINT</b>,
3538
     *                       <b>JSON_UNESCAPED_SLASHES</b>,
3539
     *                       <b>JSON_FORCE_OBJECT</b>,
3540
     *                       <b>JSON_UNESCAPED_UNICODE</b>. The behaviour of these
3541
     *                       constants is described on
3542
     *                       the JSON constants page.
3543
     *                       </p>
3544
     * @param int   $depth   [optional] <p>
3545
     *                       Set the maximum depth. Must be greater than zero.
3546
     *                       </p>
3547
     *
3548
     * @return false|string
3549
     *                      A JSON encoded <strong>string</strong> on success or<br>
3550
     *                      <strong>FALSE</strong> on failure
3551
     */
3552 5
    public static function json_encode($value, int $options = 0, int $depth = 512)
3553
    {
3554 5
        $value = self::filter($value);
3555
3556 5
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
3557
            self::checkForSupport();
3558
        }
3559
3560 5
        if (self::$SUPPORT['json'] === false) {
3561
            throw new \RuntimeException('ext-json: is not installed');
3562
        }
3563
3564
        /** @noinspection PhpComposerExtensionStubsInspection */
3565 5
        return \json_encode($value, $options, $depth);
3566
    }
3567
3568
    /**
3569
     * Checks whether JSON is available on the server.
3570
     *
3571
     * @return bool
3572
     *              <strong>true</strong> if available, <strong>false</strong> otherwise
3573
     */
3574
    public static function json_loaded(): bool
3575
    {
3576
        return \function_exists('json_decode');
3577
    }
3578
3579
    /**
3580
     * Makes string's first char lowercase.
3581
     *
3582
     * @param string      $str                   <p>The input string</p>
3583
     * @param string      $encoding              [optional] <p>Set the charset for e.g. "mb_" function</p>
3584
     * @param bool        $cleanUtf8             [optional] <p>Remove non UTF-8 chars from the string.</p>
3585
     * @param string|null $lang                  [optional] <p>Set the language for special cases: az, el, lt, tr</p>
3586
     * @param bool        $tryToKeepStringLength [optional] <p>true === try to keep the string length: e.g. ẞ -> ß</p>
3587
     *
3588
     * @return string the resulting string
3589
     */
3590 46
    public static function lcfirst(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false, string $lang = null, bool $tryToKeepStringLength = false): string
3591
    {
3592 46
        $strPartTwo = self::substr($str, 1, null, $encoding, $cleanUtf8);
3593 46
        if ($strPartTwo === false) {
3594
            $strPartTwo = '';
3595
        }
3596
3597 46
        $strPartOne = self::strtolower(
3598 46
            (string) self::substr($str, 0, 1, $encoding, $cleanUtf8),
3599 46
            $encoding,
3600 46
            $cleanUtf8,
3601 46
            $lang,
3602 46
            $tryToKeepStringLength
3603
        );
3604
3605 46
        return $strPartOne . $strPartTwo;
3606
    }
3607
3608
    /**
3609
     * alias for "UTF8::lcfirst()"
3610
     *
3611
     * @see UTF8::lcfirst()
3612
     *
3613
     * @param string      $str
3614
     * @param string      $encoding
3615
     * @param bool        $cleanUtf8
3616
     * @param string|null $lang
3617
     * @param bool        $tryToKeepStringLength
3618
     *
3619
     * @return string
3620
     */
3621 2
    public static function lcword(
3622
        string $str,
3623
        string $encoding = 'UTF-8',
3624
        bool $cleanUtf8 = false,
3625
        string $lang = null,
3626
        bool $tryToKeepStringLength = false
3627
    ): string {
3628 2
        return self::lcfirst($str, $encoding, $cleanUtf8, $lang, $tryToKeepStringLength);
3629
    }
3630
3631
    /**
3632
     * Lowercase for all words in the string.
3633
     *
3634
     * @param string      $str                   <p>The input string.</p>
3635
     * @param string[]    $exceptions            [optional] <p>Exclusion for some words.</p>
3636
     * @param string      $charlist              [optional] <p>Additional chars that contains to words and do not start
3637
     *                                           a new word.</p>
3638
     * @param string      $encoding              [optional] <p>Set the charset.</p>
3639
     * @param bool        $cleanUtf8             [optional] <p>Remove non UTF-8 chars from the string.</p>
3640
     * @param string|null $lang                  [optional] <p>Set the language for special cases: az, el, lt, tr</p>
3641
     * @param bool        $tryToKeepStringLength [optional] <p>true === try to keep the string length: e.g. ẞ -> ß</p>
3642
     *
3643
     * @return string
3644
     */
3645 2
    public static function lcwords(
3646
        string $str,
3647
        array $exceptions = [],
3648
        string $charlist = '',
3649
        string $encoding = 'UTF-8',
3650
        bool $cleanUtf8 = false,
3651
        string $lang = null,
3652
        bool $tryToKeepStringLength = false
3653
    ): string {
3654 2
        if (!$str) {
3655 2
            return '';
3656
        }
3657
3658 2
        $words = self::str_to_words($str, $charlist);
3659 2
        $newWords = [];
3660
3661 2
        $useExceptions = \count($exceptions) > 0;
3662
3663 2
        foreach ($words as $word) {
3664 2
            if (!$word) {
3665 2
                continue;
3666
            }
3667
3668
            if (
3669 2
                $useExceptions === false
3670
                ||
3671
                (
3672 2
                    $useExceptions === true
3673
                    &&
3674 2
                    !\in_array($word, $exceptions, true)
3675
                )
3676
            ) {
3677 2
                $word = self::lcfirst($word, $encoding, $cleanUtf8, $lang, $tryToKeepStringLength);
3678
            }
3679
3680 2
            $newWords[] = $word;
3681
        }
3682
3683 2
        return \implode('', $newWords);
3684
    }
3685
3686
    /**
3687
     * alias for "UTF8::lcfirst()"
3688
     *
3689
     * @see UTF8::lcfirst()
3690
     *
3691
     * @param string      $str
3692
     * @param string      $encoding
3693
     * @param bool        $cleanUtf8
3694
     * @param string|null $lang
3695
     * @param bool        $tryToKeepStringLength
3696
     *
3697
     * @return string
3698
     */
3699 5
    public static function lowerCaseFirst(
3700
        string $str,
3701
        string $encoding = 'UTF-8',
3702
        bool $cleanUtf8 = false,
3703
        string $lang = null,
3704
        bool $tryToKeepStringLength = false
3705
    ): string {
3706 5
        return self::lcfirst($str, $encoding, $cleanUtf8, $lang, $tryToKeepStringLength);
3707
    }
3708
3709
    /**
3710
     * Strip whitespace or other characters from beginning of a UTF-8 string.
3711
     *
3712
     * @param string $str   <p>The string to be trimmed</p>
3713
     * @param mixed  $chars <p>Optional characters to be stripped</p>
3714
     *
3715
     * @return string the string with unwanted characters stripped from the left
3716
     */
3717 22
    public static function ltrim(string $str = '', $chars = \INF): string
3718
    {
3719 22
        if ($str === '') {
3720 3
            return '';
3721
        }
3722
3723
        // Info: http://nadeausoftware.com/articles/2007/9/php_tip_how_strip_punctuation_characters_web_page#Unicodecharactercategories
3724 21
        if ($chars === \INF || !$chars) {
3725 14
            $pattern = "^[\pZ\pC]+";
3726
        } else {
3727 10
            $chars = \preg_quote($chars, '/');
3728 10
            $pattern = "^[${chars}]+";
3729
        }
3730
3731
        return self::regex_replace($str, $pattern, '', '', '/');
3732
    }
3733
3734
    /**
3735
     * Returns the UTF-8 character with the maximum code point in the given data.
3736
     *
3737
     * @param array<string>|string $arg <p>A UTF-8 encoded string or an array of such strings.</p>
3738
     *
3739
     * @return string|null the character with the highest code point than others, returns null on failure or empty input
3740
     */
3741
    public static function max($arg)
3742
    {
3743 2
        if (\is_array($arg) === true) {
3744 2
            $arg = \implode('', $arg);
3745
        }
3746
3747 2
        $codepoints = self::codepoints($arg, false);
3748 2
        if (\count($codepoints) === 0) {
3749 2
            return null;
3750
        }
3751
3752 2
        $codepoint_max = \max($codepoints);
3753
3754 2
        return self::chr($codepoint_max);
3755
    }
3756
3757
    /**
3758
     * Calculates and returns the maximum number of bytes taken by any
3759
     * UTF-8 encoded character in the given string.
3760
     *
3761
     * @param string $str <p>The original Unicode string.</p>
3762
     *
3763
     * @return int max byte lengths of the given chars
3764
     */
3765
    public static function max_chr_width(string $str): int
3766
    {
3767 2
        $bytes = self::chr_size_list($str);
3768 2
        if (\count($bytes) > 0) {
3769 2
            return (int) \max($bytes);
3770
        }
3771
3772 2
        return 0;
3773
    }
3774
3775
    /**
3776
     * Checks whether mbstring is available on the server.
3777
     *
3778
     * @return bool
3779
     *              <strong>true</strong> if available, <strong>false</strong> otherwise
3780
     */
3781
    public static function mbstring_loaded(): bool
3782
    {
3783 27
        $return = \extension_loaded('mbstring');
3784 27
        if ($return === true) {
3785 27
            \mb_internal_encoding('UTF-8');
3786
        }
3787
3788 27
        return $return;
3789
    }
3790
3791
    /**
3792
     * Returns the UTF-8 character with the minimum code point in the given data.
3793
     *
3794
     * @param mixed $arg <strong>A UTF-8 encoded string or an array of such strings.</strong>
3795
     *
3796
     * @return string|null the character with the lowest code point than others, returns null on failure or empty input
3797
     */
3798
    public static function min($arg)
3799
    {
3800 2
        if (\is_array($arg) === true) {
3801 2
            $arg = \implode('', $arg);
3802
        }
3803
3804 2
        $codepoints = self::codepoints($arg, false);
3805 2
        if (\count($codepoints) === 0) {
3806 2
            return null;
3807
        }
3808
3809 2
        $codepoint_min = \min($codepoints);
3810
3811 2
        return self::chr($codepoint_min);
3812
    }
3813
3814
    /**
3815
     * alias for "UTF8::normalize_encoding()"
3816
     *
3817
     * @see        UTF8::normalize_encoding()
3818
     *
3819
     * @param mixed $encoding
3820
     * @param mixed $fallback
3821
     *
3822
     * @return mixed
3823
     *
3824
     * @deprecated <p>use "UTF8::normalize_encoding()"</p>
3825
     */
3826
    public static function normalizeEncoding($encoding, $fallback = '')
3827
    {
3828 2
        return self::normalize_encoding($encoding, $fallback);
3829
    }
3830
3831
    /**
3832
     * Normalize the encoding-"name" input.
3833
     *
3834
     * @param mixed $encoding <p>e.g.: ISO, UTF8, WINDOWS-1251 etc.</p>
3835
     * @param mixed $fallback <p>e.g.: UTF-8</p>
3836
     *
3837
     * @return mixed e.g.: ISO-8859-1, UTF-8, WINDOWS-1251 etc.<br>Will return a empty string as fallback (by default)
3838
     */
3839
    public static function normalize_encoding($encoding, $fallback = '')
3840
    {
3841 341
        static $STATIC_NORMALIZE_ENCODING_CACHE = [];
3842
3843
        // init
3844 341
        $encoding = (string) $encoding;
3845
3846
        if (
3847 341
            !$encoding
3848
            ||
3849 50
            $encoding === '1' // only a fallback, for non "strict_types" usage ...
3850
            ||
3851 341
            $encoding === '0' // only a fallback, for non "strict_types" usage ...
3852
        ) {
3853 296
            return $fallback;
3854
        }
3855
3856
        if (
3857 49
            $encoding === 'UTF-8'
3858
            ||
3859 49
            $encoding === 'UTF8'
3860
        ) {
3861 22
            return 'UTF-8';
3862
        }
3863
3864
        if (
3865 42
            $encoding === '8BIT'
3866
            ||
3867 42
            $encoding === 'BINARY'
3868
        ) {
3869
            return 'CP850';
3870
        }
3871
3872
        if (
3873 42
            $encoding === 'HTML'
3874
            ||
3875 42
            $encoding === 'HTML-ENTITIES'
3876
        ) {
3877 2
            return 'HTML-ENTITIES';
3878
        }
3879
3880 42
        if (isset($STATIC_NORMALIZE_ENCODING_CACHE[$encoding])) {
3881 40
            return $STATIC_NORMALIZE_ENCODING_CACHE[$encoding];
3882
        }
3883
3884 6
        if (self::$ENCODINGS === null) {
3885 1
            self::$ENCODINGS = self::getData('encodings');
3886
        }
3887
3888 6
        if (\in_array($encoding, self::$ENCODINGS, true)) {
3889 4
            $STATIC_NORMALIZE_ENCODING_CACHE[$encoding] = $encoding;
3890
3891 4
            return $encoding;
3892
        }
3893
3894 5
        $encodingOrig = $encoding;
3895 5
        $encoding = \strtoupper($encoding);
3896 5
        $encodingUpperHelper = \preg_replace('/[^a-zA-Z0-9\s]/', '', $encoding);
3897
3898
        $equivalences = [
3899 5
            'ISO8859'     => 'ISO-8859-1',
3900
            'ISO88591'    => 'ISO-8859-1',
3901
            'ISO'         => 'ISO-8859-1',
3902
            'LATIN'       => 'ISO-8859-1',
3903
            'LATIN1'      => 'ISO-8859-1', // Western European
3904
            'ISO88592'    => 'ISO-8859-2',
3905
            'LATIN2'      => 'ISO-8859-2', // Central European
3906
            'ISO88593'    => 'ISO-8859-3',
3907
            'LATIN3'      => 'ISO-8859-3', // Southern European
3908
            'ISO88594'    => 'ISO-8859-4',
3909
            'LATIN4'      => 'ISO-8859-4', // Northern European
3910
            'ISO88595'    => 'ISO-8859-5',
3911
            'ISO88596'    => 'ISO-8859-6', // Greek
3912
            'ISO88597'    => 'ISO-8859-7',
3913
            'ISO88598'    => 'ISO-8859-8', // Hebrew
3914
            'ISO88599'    => 'ISO-8859-9',
3915
            'LATIN5'      => 'ISO-8859-9', // Turkish
3916
            'ISO885911'   => 'ISO-8859-11',
3917
            'TIS620'      => 'ISO-8859-11', // Thai
3918
            'ISO885910'   => 'ISO-8859-10',
3919
            'LATIN6'      => 'ISO-8859-10', // Nordic
3920
            'ISO885913'   => 'ISO-8859-13',
3921
            'LATIN7'      => 'ISO-8859-13', // Baltic
3922
            'ISO885914'   => 'ISO-8859-14',
3923
            'LATIN8'      => 'ISO-8859-14', // Celtic
3924
            'ISO885915'   => 'ISO-8859-15',
3925
            'LATIN9'      => 'ISO-8859-15', // Western European (with some extra chars e.g. €)
3926
            'ISO885916'   => 'ISO-8859-16',
3927
            'LATIN10'     => 'ISO-8859-16', // Southeast European
3928
            'CP1250'      => 'WINDOWS-1250',
3929
            'WIN1250'     => 'WINDOWS-1250',
3930
            'WINDOWS1250' => 'WINDOWS-1250',
3931
            'CP1251'      => 'WINDOWS-1251',
3932
            'WIN1251'     => 'WINDOWS-1251',
3933
            'WINDOWS1251' => 'WINDOWS-1251',
3934
            'CP1252'      => 'WINDOWS-1252',
3935
            'WIN1252'     => 'WINDOWS-1252',
3936
            'WINDOWS1252' => 'WINDOWS-1252',
3937
            'CP1253'      => 'WINDOWS-1253',
3938
            'WIN1253'     => 'WINDOWS-1253',
3939
            'WINDOWS1253' => 'WINDOWS-1253',
3940
            'CP1254'      => 'WINDOWS-1254',
3941
            'WIN1254'     => 'WINDOWS-1254',
3942
            'WINDOWS1254' => 'WINDOWS-1254',
3943
            'CP1255'      => 'WINDOWS-1255',
3944
            'WIN1255'     => 'WINDOWS-1255',
3945
            'WINDOWS1255' => 'WINDOWS-1255',
3946
            'CP1256'      => 'WINDOWS-1256',
3947
            'WIN1256'     => 'WINDOWS-1256',
3948
            'WINDOWS1256' => 'WINDOWS-1256',
3949
            'CP1257'      => 'WINDOWS-1257',
3950
            'WIN1257'     => 'WINDOWS-1257',
3951
            'WINDOWS1257' => 'WINDOWS-1257',
3952
            'CP1258'      => 'WINDOWS-1258',
3953
            'WIN1258'     => 'WINDOWS-1258',
3954
            'WINDOWS1258' => 'WINDOWS-1258',
3955
            'UTF16'       => 'UTF-16',
3956
            'UTF32'       => 'UTF-32',
3957
            'UTF8'        => 'UTF-8',
3958
            'UTF'         => 'UTF-8',
3959
            'UTF7'        => 'UTF-7',
3960
            '8BIT'        => 'CP850',
3961
            'BINARY'      => 'CP850',
3962
        ];
3963
3964 5
        if (!empty($equivalences[$encodingUpperHelper])) {
3965 4
            $encoding = $equivalences[$encodingUpperHelper];
3966
        }
3967
3968 5
        $STATIC_NORMALIZE_ENCODING_CACHE[$encodingOrig] = $encoding;
3969
3970 5
        return $encoding;
3971
    }
3972
3973
    /**
3974
     * Standardize line ending to unix-like.
3975
     *
3976
     * @param string $str
3977
     *
3978
     * @return string
3979
     */
3980
    public static function normalize_line_ending(string $str): string
3981
    {
3982 5
        return (string) \str_replace(["\r\n", "\r"], "\n", $str);
3983
    }
3984
3985
    /**
3986
     * Normalize some MS Word special characters.
3987
     *
3988
     * @param string $str <p>The string to be normalized.</p>
3989
     *
3990
     * @return string
3991
     */
3992
    public static function normalize_msword(string $str): string
3993
    {
3994 38
        if ($str === '') {
3995 2
            return '';
3996
        }
3997
3998 38
        static $UTF8_MSWORD_KEYS_CACHE = null;
3999 38
        static $UTF8_MSWORD_VALUES_CACHE = null;
4000
4001 38
        if ($UTF8_MSWORD_KEYS_CACHE === null) {
4002 1
            if (self::$UTF8_MSWORD === null) {
4003 1
                self::$UTF8_MSWORD = self::getData('utf8_msword');
4004
            }
4005
4006 1
            $UTF8_MSWORD_KEYS_CACHE = \array_keys(self::$UTF8_MSWORD);
4007 1
            $UTF8_MSWORD_VALUES_CACHE = \array_values(self::$UTF8_MSWORD);
4008
        }
4009
4010 38
        return \str_replace($UTF8_MSWORD_KEYS_CACHE, $UTF8_MSWORD_VALUES_CACHE, $str);
4011
    }
4012
4013
    /**
4014
     * Normalize the whitespace.
4015
     *
4016
     * @param string $str                     <p>The string to be normalized.</p>
4017
     * @param bool   $keepNonBreakingSpace    [optional] <p>Set to true, to keep non-breaking-spaces.</p>
4018
     * @param bool   $keepBidiUnicodeControls [optional] <p>Set to true, to keep non-printable (for the web)
4019
     *                                        bidirectional text chars.</p>
4020
     *
4021
     * @return string
4022
     */
4023
    public static function normalize_whitespace(string $str, bool $keepNonBreakingSpace = false, bool $keepBidiUnicodeControls = false): string
4024
    {
4025 86
        if ($str === '') {
4026 9
            return '';
4027
        }
4028
4029 86
        static $WHITESPACE_CACHE = [];
4030 86
        $cacheKey = (int) $keepNonBreakingSpace;
4031
4032 86
        if (!isset($WHITESPACE_CACHE[$cacheKey])) {
4033 2
            $WHITESPACE_CACHE[$cacheKey] = self::$WHITESPACE_TABLE;
4034
4035 2
            if ($keepNonBreakingSpace === true) {
4036 1
                unset($WHITESPACE_CACHE[$cacheKey]['NO-BREAK SPACE']);
4037
            }
4038
4039 2
            $WHITESPACE_CACHE[$cacheKey] = \array_values($WHITESPACE_CACHE[$cacheKey]);
4040
        }
4041
4042 86
        if ($keepBidiUnicodeControls === false) {
4043 86
            static $BIDI_UNICODE_CONTROLS_CACHE = null;
4044
4045 86
            if ($BIDI_UNICODE_CONTROLS_CACHE === null) {
4046 1
                $BIDI_UNICODE_CONTROLS_CACHE = \array_values(self::$BIDI_UNI_CODE_CONTROLS_TABLE);
4047
            }
4048
4049 86
            $str = \str_replace($BIDI_UNICODE_CONTROLS_CACHE, '', $str);
4050
        }
4051
4052 86
        return \str_replace($WHITESPACE_CACHE[$cacheKey], ' ', $str);
4053
    }
4054
4055
    /**
4056
     * Calculates Unicode code point of the given UTF-8 encoded character.
4057
     *
4058
     * INFO: opposite to UTF8::chr()
4059
     *
4060
     * @param string $chr      <p>The character of which to calculate code point.<p/>
4061
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
4062
     *
4063
     * @return int
4064
     *             Unicode code point of the given character,<br>
4065
     *             0 on invalid UTF-8 byte sequence
4066
     */
4067
    public static function ord($chr, string $encoding = 'UTF-8'): int
4068
    {
4069
        // init
4070 30
        $chr = (string) $chr;
4071
4072 30
        static $CHAR_CACHE = [];
4073
4074
        // save the original string
4075 30
        $chr_orig = $chr;
4076
4077 30
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
4078 5
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
4079
        }
4080
4081 30
        $cacheKey = $chr_orig . $encoding;
4082 30
        if (isset($CHAR_CACHE[$cacheKey]) === true) {
4083 23
            return $CHAR_CACHE[$cacheKey];
4084
        }
4085
4086 25
        if (self::$ORD === null) {
4087
            self::$ORD = self::getData('ord');
4088
        }
4089
4090 25
        if (isset(self::$ORD[$chr])) {
4091 25
            return self::$ORD[$chr];
4092
        }
4093
4094
        // check again, if it's still not UTF-8
4095 7
        if ($encoding !== 'UTF-8') {
4096 1
            $chr = self::encode($encoding, $chr);
4097
        }
4098
4099 7
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
4100
            self::checkForSupport();
4101
        }
4102
4103 7
        if (self::$SUPPORT['intlChar'] === true) {
4104
            /** @noinspection PhpComposerExtensionStubsInspection */
4105 6
            $code = \IntlChar::ord($chr);
4106 6
            if ($code) {
4107 5
                return $CHAR_CACHE[$cacheKey] = $code;
4108
            }
4109
        }
4110
4111
        /** @noinspection CallableParameterUseCaseInTypeContextInspection */
4112 2
        $chr = \unpack('C*', (string) self::substr($chr, 0, 4, 'CP850'));
4113 2
        $code = $chr ? $chr[1] : 0;
4114
4115 2
        if ($code >= 0xF0 && isset($chr[4])) {
4116
            /** @noinspection UnnecessaryCastingInspection */
4117
            return $CHAR_CACHE[$cacheKey] = (int) ((($code - 0xF0) << 18) + (($chr[2] - 0x80) << 12) + (($chr[3] - 0x80) << 6) + $chr[4] - 0x80);
4118
        }
4119
4120 2
        if ($code >= 0xE0 && isset($chr[3])) {
4121
            /** @noinspection UnnecessaryCastingInspection */
4122 1
            return $CHAR_CACHE[$cacheKey] = (int) ((($code - 0xE0) << 12) + (($chr[2] - 0x80) << 6) + $chr[3] - 0x80);
4123
        }
4124
4125 2
        if ($code >= 0xC0 && isset($chr[2])) {
4126
            /** @noinspection UnnecessaryCastingInspection */
4127 1
            return $CHAR_CACHE[$cacheKey] = (int) ((($code - 0xC0) << 6) + $chr[2] - 0x80);
4128
        }
4129
4130 1
        return $CHAR_CACHE[$cacheKey] = $code;
4131
    }
4132
4133
    /**
4134
     * Parses the string into an array (into the the second parameter).
4135
     *
4136
     * WARNING: Instead of "parse_str()" this method do not (re-)placing variables in the current scope,
4137
     *          if the second parameter is not set!
4138
     *
4139
     * @see http://php.net/manual/en/function.parse-str.php
4140
     *
4141
     * @param string $str       <p>The input string.</p>
4142
     * @param array  $result    <p>The result will be returned into this reference parameter.</p>
4143
     * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
4144
     *
4145
     * @return bool
4146
     *              Will return <strong>false</strong> if php can't parse the string and we haven't any $result
4147
     */
4148
    public static function parse_str(string $str, &$result, bool $cleanUtf8 = false): bool
4149
    {
4150 2
        if ($cleanUtf8 === true) {
4151 2
            $str = self::clean($str);
4152
        }
4153
4154 2
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
4155
            self::checkForSupport();
4156
        }
4157
4158 2
        if (self::$SUPPORT['mbstring'] === true) {
4159 2
            $return = \mb_parse_str($str, $result);
4160
4161 2
            return $return !== false && !empty($result);
4162
        }
4163
4164
        /** @noinspection PhpVoidFunctionResultUsedInspection */
4165
        \parse_str($str, $result);
4166
4167
        return !empty($result);
4168
    }
4169
4170
    /**
4171
     * Checks if \u modifier is available that enables Unicode support in PCRE.
4172
     *
4173
     * @return bool
4174
     *              <strong>true</strong> if support is available,<br>
4175
     *              <strong>false</strong> otherwise
4176
     */
4177
    public static function pcre_utf8_support(): bool
4178
    {
4179
        /** @noinspection PhpUsageOfSilenceOperatorInspection */
4180 102
        return (bool) @\preg_match('//u', '');
4181
    }
4182
4183
    /**
4184
     * Create an array containing a range of UTF-8 characters.
4185
     *
4186
     * @param mixed $var1 <p>Numeric or hexadecimal code points, or a UTF-8 character to start from.</p>
4187
     * @param mixed $var2 <p>Numeric or hexadecimal code points, or a UTF-8 character to end at.</p>
4188
     *
4189
     * @return string[]
4190
     */
4191
    public static function range($var1, $var2): array
4192
    {
4193 2
        if (!$var1 || !$var2) {
4194 2
            return [];
4195
        }
4196
4197 2
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
4198
            self::checkForSupport();
4199
        }
4200
4201 2
        if (self::$SUPPORT['ctype'] === false) {
4202
            throw new \RuntimeException('ext-ctype: is not installed');
4203
        }
4204
4205
        /** @noinspection PhpComposerExtensionStubsInspection */
4206 2
        if (\ctype_digit((string) $var1)) {
4207 2
            $start = (int) $var1;
4208 2
        } /** @noinspection PhpComposerExtensionStubsInspection */ elseif (\ctype_xdigit($var1)) {
4209
            $start = (int) self::hex_to_int($var1);
4210
        } else {
4211 2
            $start = self::ord($var1);
4212
        }
4213
4214 2
        if (!$start) {
4215
            return [];
4216
        }
4217
4218
        /** @noinspection PhpComposerExtensionStubsInspection */
4219 2
        if (\ctype_digit((string) $var2)) {
4220 2
            $end = (int) $var2;
4221 2
        } /** @noinspection PhpComposerExtensionStubsInspection */ elseif (\ctype_xdigit($var2)) {
4222
            $end = (int) self::hex_to_int($var2);
4223
        } else {
4224 2
            $end = self::ord($var2);
4225
        }
4226
4227 2
        if (!$end) {
4228
            return [];
4229
        }
4230
4231 2
        return \array_map(
4232
            [
4233 2
                self::class,
4234
                'chr',
4235
            ],
4236 2
            \range($start, $end)
4237
        );
4238
    }
4239
4240
    /**
4241
     * Multi decode html entity & fix urlencoded-win1252-chars.
4242
     *
4243
     * e.g:
4244
     * 'test+test'                     => 'test+test'
4245
     * 'D&#252;sseldorf'               => 'Düsseldorf'
4246
     * 'D%FCsseldorf'                  => 'Düsseldorf'
4247
     * 'D&#xFC;sseldorf'               => 'Düsseldorf'
4248
     * 'D%26%23xFC%3Bsseldorf'         => 'Düsseldorf'
4249
     * 'Düsseldorf'                   => 'Düsseldorf'
4250
     * 'D%C3%BCsseldorf'               => 'Düsseldorf'
4251
     * 'D%C3%83%C2%BCsseldorf'         => 'Düsseldorf'
4252
     * 'D%25C3%2583%25C2%25BCsseldorf' => 'Düsseldorf'
4253
     *
4254
     * @param string $str          <p>The input string.</p>
4255
     * @param bool   $multi_decode <p>Decode as often as possible.</p>
4256
     *
4257
     * @return string
4258
     */
4259
    public static function rawurldecode(string $str, bool $multi_decode = true): string
4260
    {
4261 3
        if ($str === '') {
4262 2
            return '';
4263
        }
4264
4265 3
        $pattern = '/%u([0-9a-f]{3,4})/i';
4266 3
        if (\preg_match($pattern, $str)) {
4267 2
            $str = (string) \preg_replace($pattern, '&#x\\1;', \rawurldecode($str));
4268
        }
4269
4270 3
        $flags = \ENT_QUOTES | \ENT_HTML5;
4271
4272
        do {
4273 3
            $str_compare = $str;
4274
4275 3
            $str = self::fix_simple_utf8(
4276 3
                \rawurldecode(
4277 3
                    self::html_entity_decode(
4278 3
                        self::to_utf8($str),
4279 3
                        $flags
4280
                    )
4281
                )
4282
            );
4283 3
        } while ($multi_decode === true && $str_compare !== $str);
4284
4285 3
        return $str;
4286
    }
4287
4288
    /**
4289
     * Replaces all occurrences of $pattern in $str by $replacement.
4290
     *
4291
     * @param string $str         <p>The input string.</p>
4292
     * @param string $pattern     <p>The regular expression pattern.</p>
4293
     * @param string $replacement <p>The string to replace with.</p>
4294
     * @param string $options     [optional] <p>Matching conditions to be used.</p>
4295
     * @param string $delimiter   [optional] <p>Delimiter the the regex. Default: '/'</p>
4296
     *
4297
     * @return string
4298
     */
4299
    public static function regex_replace(string $str, string $pattern, string $replacement, string $options = '', string $delimiter = '/'): string
4300
    {
4301 259
        if ($options === 'msr') {
4302 9
            $options = 'ms';
4303
        }
4304
4305
        // fallback
4306 259
        if (!$delimiter) {
4307
            $delimiter = '/';
4308
        }
4309
4310 259
        return (string) \preg_replace(
4311 259
            $delimiter . $pattern . $delimiter . 'u' . $options,
4312 259
            $replacement,
4313 259
            $str
4314
        );
4315
    }
4316
4317
    /**
4318
     * alias for "UTF8::remove_bom()"
4319
     *
4320
     * @see        UTF8::remove_bom()
4321
     *
4322
     * @param string $str
4323
     *
4324
     * @return string
4325
     *
4326
     * @deprecated <p>use "UTF8::remove_bom()"</p>
4327
     */
4328
    public static function removeBOM(string $str): string
4329
    {
4330
        return self::remove_bom($str);
4331
    }
4332
4333
    /**
4334
     * Remove the BOM from UTF-8 / UTF-16 / UTF-32 strings.
4335
     *
4336
     * @param string $str <p>The input string.</p>
4337
     *
4338
     * @return string string without UTF-BOM
4339
     */
4340
    public static function remove_bom(string $str): string
4341
    {
4342 79
        if ($str === '') {
4343 7
            return '';
4344
        }
4345
4346 79
        $strLength = self::strlen_in_byte($str);
4347 79
        foreach (self::$BOM as $bomString => $bomByteLength) {
4348 79
            if (self::strpos_in_byte($str, $bomString, 0) === 0) {
4349 10
                $strTmp = self::substr_in_byte($str, $bomByteLength, $strLength);
4350 10
                if ($strTmp === false) {
4351
                    return '';
4352
                }
4353
4354 10
                $strLength -= $bomByteLength;
4355
4356 79
                $str = (string) $strTmp;
4357
            }
4358
        }
4359
4360 79
        return $str;
4361
    }
4362
4363
    /**
4364
     * Removes duplicate occurrences of a string in another string.
4365
     *
4366
     * @param string          $str  <p>The base string.</p>
4367
     * @param string|string[] $what <p>String to search for in the base string.</p>
4368
     *
4369
     * @return string the result string with removed duplicates
4370
     */
4371
    public static function remove_duplicates(string $str, $what = ' '): string
4372
    {
4373 2
        if (\is_string($what) === true) {
4374 2
            $what = [$what];
4375
        }
4376
4377 2
        if (\is_array($what) === true) {
0 ignored issues
show
introduced by
The condition is_array($what) === true is always true.
Loading history...
4378
            /** @noinspection ForeachSourceInspection */
4379 2
            foreach ($what as $item) {
4380 2
                $str = (string) \preg_replace('/(' . \preg_quote($item, '/') . ')+/', $item, $str);
4381
            }
4382
        }
4383
4384 2
        return $str;
4385
    }
4386
4387
    /**
4388
     * Remove html via "strip_tags()" from the string.
4389
     *
4390
     * @param string $str
4391
     * @param string $allowableTags [optional] <p>You can use the optional second parameter to specify tags which should
4392
     *                              not be stripped. Default: null
4393
     *                              </p>
4394
     *
4395
     * @return string
4396
     */
4397
    public static function remove_html(string $str, string $allowableTags = ''): string
4398
    {
4399 6
        return \strip_tags($str, $allowableTags);
4400
    }
4401
4402
    /**
4403
     * Remove all breaks [<br> | \r\n | \r | \n | ...] from the string.
4404
     *
4405
     * @param string $str
4406
     * @param string $replacement [optional] <p>Default is a empty string.</p>
4407
     *
4408
     * @return string
4409
     */
4410
    public static function remove_html_breaks(string $str, string $replacement = ''): string
4411
    {
4412 6
        return (string) \preg_replace("#/\r\n|\r|\n|<br.*/?>#isU", $replacement, $str);
4413
    }
4414
4415
    /**
4416
     * Remove invisible characters from a string.
4417
     *
4418
     * e.g.: This prevents sandwiching null characters between ascii characters, like Java\0script.
4419
     *
4420
     * copy&past from https://github.com/bcit-ci/CodeIgniter/blob/develop/system/core/Common.php
4421
     *
4422
     * @param string $str
4423
     * @param bool   $url_encoded
4424
     * @param string $replacement
4425
     *
4426
     * @return string
4427
     */
4428
    public static function remove_invisible_characters(string $str, bool $url_encoded = true, string $replacement = ''): string
4429
    {
4430
        // init
4431 113
        $non_displayables = [];
4432
4433
        // every control character except newline (dec 10),
4434
        // carriage return (dec 13) and horizontal tab (dec 09)
4435 113
        if ($url_encoded) {
4436 113
            $non_displayables[] = '/%0[0-8bcef]/'; // url encoded 00-08, 11, 12, 14, 15
4437 113
            $non_displayables[] = '/%1[0-9a-f]/'; // url encoded 16-31
4438
        }
4439
4440 113
        $non_displayables[] = '/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]+/S'; // 00-08, 11, 12, 14-31, 127
4441
4442
        do {
4443 113
            $str = (string) \preg_replace($non_displayables, $replacement, $str, -1, $count);
4444 113
        } while ($count !== 0);
4445
4446 113
        return $str;
4447
    }
4448
4449
    /**
4450
     * Returns a new string with the prefix $substring removed, if present.
4451
     *
4452
     * @param string $str
4453
     * @param string $substring <p>The prefix to remove.</p>
4454
     * @param string $encoding  [optional] <p>Default: UTF-8</p>
4455
     *
4456
     * @return string string without the prefix $substring
4457
     */
4458
    public static function remove_left(string $str, string $substring, string $encoding = 'UTF-8'): string
4459
    {
4460 12
        if (self::str_starts_with($str, $substring)) {
4461 6
            return (string) self::substr(
4462 6
                $str,
4463 6
                self::strlen($substring, $encoding),
0 ignored issues
show
Bug introduced by
It seems like self::strlen($substring, $encoding) can also be of type false; however, parameter $offset of voku\helper\UTF8::substr() does only seem to accept integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

4463
                /** @scrutinizer ignore-type */ self::strlen($substring, $encoding),
Loading history...
4464 6
                null,
4465 6
                $encoding
4466
            );
4467
        }
4468
4469 6
        return $str;
4470
    }
4471
4472
    /**
4473
     * Returns a new string with the suffix $substring removed, if present.
4474
     *
4475
     * @param string $str
4476
     * @param string $substring <p>The suffix to remove.</p>
4477
     * @param string $encoding  [optional] <p>Default: UTF-8</p>
4478
     *
4479
     * @return string string having a $str without the suffix $substring
4480
     */
4481
    public static function remove_right(string $str, string $substring, string $encoding = 'UTF-8'): string
4482
    {
4483 12
        if (self::str_ends_with($str, $substring)) {
4484 6
            return (string) self::substr(
4485 6
                $str,
4486 6
                0,
4487 6
                self::strlen($str, $encoding) - self::strlen($substring, $encoding)
4488
            );
4489
        }
4490
4491 6
        return $str;
4492
    }
4493
4494
    /**
4495
     * Replaces all occurrences of $search in $str by $replacement.
4496
     *
4497
     * @param string $str           <p>The input string.</p>
4498
     * @param string $search        <p>The needle to search for.</p>
4499
     * @param string $replacement   <p>The string to replace with.</p>
4500
     * @param bool   $caseSensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
4501
     *
4502
     * @return string string after the replacements
4503
     */
4504
    public static function replace(string $str, string $search, string $replacement, bool $caseSensitive = true): string
4505
    {
4506 29
        if ($caseSensitive) {
4507 22
            return self::str_replace($search, $replacement, $str);
4508
        }
4509
4510 7
        return self::str_ireplace($search, $replacement, $str);
4511
    }
4512
4513
    /**
4514
     * Replaces all occurrences of $search in $str by $replacement.
4515
     *
4516
     * @param string       $str           <p>The input string.</p>
4517
     * @param array        $search        <p>The elements to search for.</p>
4518
     * @param array|string $replacement   <p>The string to replace with.</p>
4519
     * @param bool         $caseSensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
4520
     *
4521
     * @return string string after the replacements
4522
     */
4523
    public static function replace_all(string $str, array $search, $replacement, bool $caseSensitive = true): string
4524
    {
4525 30
        if ($caseSensitive) {
4526 23
            return self::str_replace($search, $replacement, $str);
4527
        }
4528
4529 7
        return self::str_ireplace($search, $replacement, $str);
4530
    }
4531
4532
    /**
4533
     * Replace the diamond question mark (�) and invalid-UTF8 chars with the replacement.
4534
     *
4535
     * @param string $str                <p>The input string</p>
4536
     * @param string $replacementChar    <p>The replacement character.</p>
4537
     * @param bool   $processInvalidUtf8 <p>Convert invalid UTF-8 chars </p>
4538
     *
4539
     * @return string
4540
     */
4541
    public static function replace_diamond_question_mark(string $str, string $replacementChar = '', bool $processInvalidUtf8 = true): string
4542
    {
4543 62
        if ($str === '') {
4544 9
            return '';
4545
        }
4546
4547 62
        if ($processInvalidUtf8 === true) {
4548 62
            $replacementCharHelper = $replacementChar;
4549 62
            if ($replacementChar === '') {
4550 62
                $replacementCharHelper = 'none';
4551
            }
4552
4553 62
            if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
4554
                self::checkForSupport();
4555
            }
4556
4557 62
            if (self::$SUPPORT['mbstring'] === false) {
4558
                // if there is no native support for "mbstring",
4559
                // then we need to clean the string before ...
4560
                $str = self::clean($str);
4561
            }
4562
4563
            // always fallback via symfony polyfill
4564 62
            $save = \mb_substitute_character();
4565 62
            \mb_substitute_character($replacementCharHelper);
4566 62
            $strTmp = \mb_convert_encoding($str, 'UTF-8', 'UTF-8');
4567 62
            \mb_substitute_character($save);
4568
4569 62
            $str = \is_string($strTmp) ? $strTmp : '';
0 ignored issues
show
introduced by
The condition is_string($strTmp) is always true.
Loading history...
4570
        }
4571
4572 62
        return \str_replace(
4573
            [
4574 62
                "\xEF\xBF\xBD",
4575
                '�',
4576
            ],
4577
            [
4578 62
                $replacementChar,
4579 62
                $replacementChar,
4580
            ],
4581 62
            $str
4582
        );
4583
    }
4584
4585
    /**
4586
     * Strip whitespace or other characters from end of a UTF-8 string.
4587
     *
4588
     * @param string $str   <p>The string to be trimmed.</p>
4589
     * @param mixed  $chars <p>Optional characters to be stripped.</p>
4590
     *
4591
     * @return string the string with unwanted characters stripped from the right
4592
     */
4593
    public static function rtrim(string $str = '', $chars = \INF): string
4594
    {
4595 22
        if ($str === '') {
4596 3
            return '';
4597
        }
4598
4599
        // Info: http://nadeausoftware.com/articles/2007/9/php_tip_how_strip_punctuation_characters_web_page#Unicodecharactercategories
4600 21
        if ($chars === \INF || !$chars) {
4601 16
            $pattern = "[\pZ\pC]+\$";
4602
        } else {
4603 8
            $chars = \preg_quote($chars, '/');
4604 8
            $pattern = "[${chars}]+\$";
4605
        }
4606
4607 21
        return self::regex_replace($str, $pattern, '', '', '/');
4608
    }
4609
4610
    /**
4611
     * WARNING: Print native UTF-8 support (libs), e.g. for debugging.
4612
     */
4613
    public static function showSupport()
4614
    {
4615 2
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
4616
            self::checkForSupport();
4617
        }
4618
4619 2
        echo '<pre>';
4620 2
        foreach (self::$SUPPORT as $key => $value) {
4621 2
            echo $key . ' - ' . \print_r($value, true) . "\n<br>";
4622
        }
4623 2
        echo '</pre>';
4624 2
    }
4625
4626
    /**
4627
     * Converts a UTF-8 character to HTML Numbered Entity like "&#123;".
4628
     *
4629
     * @param string $char           <p>The Unicode character to be encoded as numbered entity.</p>
4630
     * @param bool   $keepAsciiChars <p>Set to <strong>true</strong> to keep ASCII chars.</>
4631
     * @param string $encoding       [optional] <p>Set the charset for e.g. "mb_" function</p>
4632
     *
4633
     * @return string the HTML numbered entity
4634
     */
4635
    public static function single_chr_html_encode(string $char, bool $keepAsciiChars = false, string $encoding = 'UTF-8'): string
4636
    {
4637 2
        if ($char === '') {
4638 2
            return '';
4639
        }
4640
4641
        if (
4642 2
            $keepAsciiChars === true
4643
            &&
4644 2
            self::is_ascii($char) === true
4645
        ) {
4646 2
            return $char;
4647
        }
4648
4649 2
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
4650 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
4651
        }
4652
4653 2
        return '&#' . self::ord($char, $encoding) . ';';
4654
    }
4655
4656
    /**
4657
     * @param string $str
4658
     * @param int    $tabLength
4659
     *
4660
     * @return string
4661
     */
4662
    public static function spaces_to_tabs(string $str, int $tabLength = 4): string
4663
    {
4664 5
        return \str_replace(\str_repeat(' ', $tabLength), "\t", $str);
4665
    }
4666
4667
    /**
4668
     * Convert a string to an array of Unicode characters.
4669
     *
4670
     * @param int|int[]|string|string[] $str       <p>The string to split into array.</p>
4671
     * @param int                       $length    [optional] <p>Max character length of each array element.</p>
4672
     * @param bool                      $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
4673
     *
4674
     * @return string[] an array containing chunks of the string
4675
     */
4676
    public static function split($str, int $length = 1, bool $cleanUtf8 = false): array
4677
    {
4678 87
        if ($length <= 0) {
4679 3
            return [];
4680
        }
4681
4682 86
        if (\is_array($str) === true) {
4683 2
            foreach ($str as $k => $v) {
4684 2
                $str[$k] = self::split($v, $length);
4685
            }
4686
4687 2
            return $str;
4688
        }
4689
4690
        // init
4691 86
        $str = (string) $str;
4692
4693 86
        if ($str === '') {
4694 13
            return [];
4695
        }
4696
4697
        // init
4698 83
        $ret = [];
4699
4700 83
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
4701
            self::checkForSupport();
4702
        }
4703
4704 83
        if ($cleanUtf8 === true) {
4705 19
            $str = self::clean($str);
4706
        }
4707
4708 83
        if (self::$SUPPORT['pcre_utf8'] === true) {
4709 79
            \preg_match_all('/./us', $str, $retArray);
4710 79
            if (isset($retArray[0])) {
4711 79
                $ret = $retArray[0];
4712
            }
4713 79
            unset($retArray);
4714
        } else {
4715
4716
            // fallback
4717
4718 8
            if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
4719
                self::checkForSupport();
4720
            }
4721
4722 8
            $len = self::strlen_in_byte($str);
4723
4724
            /** @noinspection ForeachInvariantsInspection */
4725 8
            for ($i = 0; $i < $len; ++$i) {
4726 8
                if (($str[$i] & "\x80") === "\x00") {
4727 8
                    $ret[] = $str[$i];
4728
                } elseif (
4729 8
                    isset($str[$i + 1])
4730
                    &&
4731 8
                    ($str[$i] & "\xE0") === "\xC0"
4732
                ) {
4733 4
                    if (($str[$i + 1] & "\xC0") === "\x80") {
4734 4
                        $ret[] = $str[$i] . $str[$i + 1];
4735
4736 4
                        ++$i;
4737
                    }
4738
                } elseif (
4739 6
                    isset($str[$i + 2])
4740
                    &&
4741 6
                    ($str[$i] & "\xF0") === "\xE0"
4742
                ) {
4743
                    if (
4744 6
                        ($str[$i + 1] & "\xC0") === "\x80"
4745
                        &&
4746 6
                        ($str[$i + 2] & "\xC0") === "\x80"
4747
                    ) {
4748 6
                        $ret[] = $str[$i] . $str[$i + 1] . $str[$i + 2];
4749
4750 6
                        $i += 2;
4751
                    }
4752
                } elseif (
4753
                    isset($str[$i + 3])
4754
                    &&
4755
                    ($str[$i] & "\xF8") === "\xF0"
4756
                ) {
4757
                    if (
4758
                        ($str[$i + 1] & "\xC0") === "\x80"
4759
                        &&
4760
                        ($str[$i + 2] & "\xC0") === "\x80"
4761
                        &&
4762
                        ($str[$i + 3] & "\xC0") === "\x80"
4763
                    ) {
4764
                        $ret[] = $str[$i] . $str[$i + 1] . $str[$i + 2] . $str[$i + 3];
4765
4766
                        $i += 3;
4767
                    }
4768
                }
4769
            }
4770
        }
4771
4772 83
        if ($length > 1) {
4773 11
            $ret = \array_chunk($ret, $length);
4774
4775 11
            return \array_map(
4776
                static function ($item) {
4777 11
                    return \implode('', $item);
4778 11
                },
4779 11
                $ret
4780
            );
4781
        }
4782
4783 76
        if (isset($ret[0]) && $ret[0] === '') {
4784
            return [];
4785
        }
4786
4787 76
        return $ret;
4788
    }
4789
4790
    /**
4791
     * Returns a camelCase version of the string. Trims surrounding spaces,
4792
     * capitalizes letters following digits, spaces, dashes and underscores,
4793
     * and removes spaces, dashes, as well as underscores.
4794
     *
4795
     * @param string      $str                   <p>The input string.</p>
4796
     * @param string      $encoding              [optional] <p>Default: UTF-8</p>
4797
     * @param bool        $cleanUtf8             [optional] <p>Remove non UTF-8 chars from the string.</p>
4798
     * @param string|null $lang                  [optional] <p>Set the language for special cases: az, el, lt, tr</p>
4799
     * @param bool        $tryToKeepStringLength [optional] <p>true === try to keep the string length: e.g. ẞ -> ß</p>
4800
     *
4801
     * @return string
4802
     */
4803
    public static function str_camelize(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false, string $lang = null, bool $tryToKeepStringLength = false): string
4804
    {
4805 32
        $str = self::lcfirst(self::trim($str), $encoding, $cleanUtf8, $lang, $tryToKeepStringLength);
4806 32
        $str = (string) \preg_replace('/^[-_]+/', '', $str);
4807
4808 32
        $str = (string) \preg_replace_callback(
4809 32
            '/[-_\s]+(.)?/u',
4810
            static function ($match) use ($encoding, $cleanUtf8, $lang, $tryToKeepStringLength) {
4811 27
                if (isset($match[1])) {
4812 27
                    return self::strtoupper($match[1], $encoding, $cleanUtf8, $lang, $tryToKeepStringLength);
4813
                }
4814
4815 1
                return '';
4816 32
            },
4817 32
            $str
4818
        );
4819
4820 32
        return (string) \preg_replace_callback(
4821 32
            '/[\d]+(.)?/u',
4822
            static function ($match) use ($encoding, $cleanUtf8, $lang, $tryToKeepStringLength) {
4823 6
                return self::strtoupper($match[0], $encoding, $cleanUtf8, $lang, $tryToKeepStringLength);
4824 32
            },
4825 32
            $str
4826
        );
4827
    }
4828
4829
    /**
4830
     * Returns the string with the first letter of each word capitalized,
4831
     * except for when the word is a name which shouldn't be capitalized.
4832
     *
4833
     * @param string $str
4834
     *
4835
     * @return string string with $str capitalized
4836
     */
4837
    public static function str_capitalize_name(string $str): string
4838
    {
4839 1
        $str = self::collapse_whitespace($str);
4840
4841 1
        $str = self::str_capitalize_name_helper($str, ' ');
4842
4843 1
        return self::str_capitalize_name_helper($str, '-');
4844
    }
4845
4846
    /**
4847
     * Returns true if the string contains $needle, false otherwise. By default
4848
     * the comparison is case-sensitive, but can be made insensitive by setting
4849
     * $caseSensitive to false.
4850
     *
4851
     * @param string $haystack      <p>The input string.</p>
4852
     * @param string $needle        <p>Substring to look for.</p>
4853
     * @param bool   $caseSensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
4854
     * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
4855
     *
4856
     * @return bool whether or not $haystack contains $needle
4857
     */
4858
    public static function str_contains(string $haystack, string $needle, $caseSensitive = true, string $encoding = 'UTF-8'): bool
4859
    {
4860 106
        if ($haystack === '' || $needle === '') {
4861 1
            return false;
4862
        }
4863
4864
        // only a fallback to prevent BC in the api ...
4865 105
        if ($caseSensitive !== false && $caseSensitive !== true) {
0 ignored issues
show
introduced by
The condition $caseSensitive !== true is always false.
Loading history...
4866 2
            $encoding = (string) $caseSensitive;
4867
        }
4868
4869 105
        if ($caseSensitive) {
4870 55
            return self::strpos($haystack, $needle, 0, $encoding) !== false;
4871
        }
4872
4873 50
        return self::stripos($haystack, $needle, 0, $encoding) !== false;
4874
    }
4875
4876
    /**
4877
     * Returns true if the string contains all $needles, false otherwise. By
4878
     * default the comparison is case-sensitive, but can be made insensitive by
4879
     * setting $caseSensitive to false.
4880
     *
4881
     * @param string $haystack      <p>The input string.</p>
4882
     * @param array  $needles       <p>SubStrings to look for.</p>
4883
     * @param bool   $caseSensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
4884
     * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
4885
     *
4886
     * @return bool whether or not $haystack contains $needle
4887
     */
4888
    public static function str_contains_all(string $haystack, array $needles, $caseSensitive = true, string $encoding = 'UTF-8'): bool
4889
    {
4890 44
        if ($haystack === '') {
4891
            return false;
4892
        }
4893
4894 44
        if (empty($needles)) {
4895 1
            return false;
4896
        }
4897
4898
        // only a fallback to prevent BC in the api ...
4899 43
        if ($caseSensitive !== false && $caseSensitive !== true) {
0 ignored issues
show
introduced by
The condition $caseSensitive !== true is always false.
Loading history...
4900 1
            $encoding = (string) $caseSensitive;
4901
        }
4902
4903 43
        foreach ($needles as $needle) {
4904 43
            if (!self::str_contains($haystack, $needle, $caseSensitive, $encoding)) {
4905 43
                return false;
4906
            }
4907
        }
4908
4909 24
        return true;
4910
    }
4911
4912
    /**
4913
     * Returns true if the string contains any $needles, false otherwise. By
4914
     * default the comparison is case-sensitive, but can be made insensitive by
4915
     * setting $caseSensitive to false.
4916
     *
4917
     * @param string $haystack      <p>The input string.</p>
4918
     * @param array  $needles       <p>SubStrings to look for.</p>
4919
     * @param bool   $caseSensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
4920
     * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
4921
     *
4922
     * @return bool
4923
     *              Whether or not $str contains $needle
4924
     */
4925
    public static function str_contains_any(string $haystack, array $needles, $caseSensitive = true, string $encoding = 'UTF-8'): bool
4926
    {
4927 43
        if (empty($needles)) {
4928 1
            return false;
4929
        }
4930
4931 42
        foreach ($needles as $needle) {
4932 42
            if (self::str_contains($haystack, $needle, $caseSensitive, $encoding)) {
4933 42
                return true;
4934
            }
4935
        }
4936
4937 18
        return false;
4938
    }
4939
4940
    /**
4941
     * Returns a lowercase and trimmed string separated by dashes. Dashes are
4942
     * inserted before uppercase characters (with the exception of the first
4943
     * character of the string), and in place of spaces as well as underscores.
4944
     *
4945
     * @param string $str      <p>The input string.</p>
4946
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
4947
     *
4948
     * @return string
4949
     */
4950
    public static function str_dasherize(string $str, string $encoding = 'UTF-8'): string
4951
    {
4952 19
        return self::str_delimit($str, '-', $encoding);
4953
    }
4954
4955
    /**
4956
     * Returns a lowercase and trimmed string separated by the given delimiter.
4957
     * Delimiters are inserted before uppercase characters (with the exception
4958
     * of the first character of the string), and in place of spaces, dashes,
4959
     * and underscores. Alpha delimiters are not converted to lowercase.
4960
     *
4961
     * @param string      $str                   <p>The input string.</p>
4962
     * @param string      $delimiter             <p>Sequence used to separate parts of the string.</p>
4963
     * @param string      $encoding              [optional] <p>Set the charset for e.g. "mb_" function</p>
4964
     * @param bool        $cleanUtf8             [optional] <p>Remove non UTF-8 chars from the string.</p>
4965
     * @param string|null $lang                  [optional] <p>Set the language for special cases: az, el, lt,
4966
     *                                           tr</p>
4967
     * @param bool        $tryToKeepStringLength [optional] <p>true === try to keep the string length: e.g. ẞ ->
4968
     *                                           ß</p>
4969
     *
4970
     * @return string
4971
     */
4972
    public static function str_delimit(
4973
        string $str,
4974
        string $delimiter,
4975
        string $encoding = 'UTF-8',
4976
        bool $cleanUtf8 = false,
4977
        string $lang = null,
4978
        bool $tryToKeepStringLength = false
4979
    ): string {
4980 49
        $str = self::trim($str);
4981
4982 49
        $str = (string) \preg_replace('/\B([A-Z])/u', '-\1', $str);
4983
4984 49
        $str = self::strtolower($str, $encoding, $cleanUtf8, $lang, $tryToKeepStringLength);
4985
4986 49
        return (string) \preg_replace('/[-_\s]+/u', $delimiter, $str);
4987
    }
4988
4989
    /**
4990
     * Optimized "mb_detect_encoding()"-function -> with support for UTF-16 and UTF-32.
4991
     *
4992
     * @param string $str <p>The input string.</p>
4993
     *
4994
     * @return false|string
4995
     *                      The detected string-encoding e.g. UTF-8 or UTF-16BE,<br>
4996
     *                      otherwise it will return false e.g. for BINARY or not detected encoding.
4997
     */
4998
    public static function str_detect_encoding($str)
4999
    {
5000
        // init
5001 30
        $str = (string) $str;
5002
5003
        //
5004
        // 1.) check binary strings (010001001...) like UTF-16 / UTF-32 / PDF / Images / ...
5005
        //
5006
5007 30
        if (self::is_binary($str, true) === true) {
5008 10
            $isUtf16 = self::is_utf16($str, false);
5009 10
            if ($isUtf16 === 1) {
5010 2
                return 'UTF-16LE';
5011
            }
5012 10
            if ($isUtf16 === 2) {
5013 2
                return 'UTF-16BE';
5014
            }
5015
5016 8
            $isUtf32 = self::is_utf32($str, false);
5017 8
            if ($isUtf32 === 1) {
5018
                return 'UTF-32LE';
5019
            }
5020 8
            if ($isUtf32 === 2) {
5021
                return 'UTF-32BE';
5022
            }
5023
5024
            // is binary but not "UTF-16" or "UTF-32"
5025 8
            return false;
5026
        }
5027
5028
        //
5029
        // 2.) simple check for ASCII chars
5030
        //
5031
5032 26
        if (self::is_ascii($str) === true) {
5033 9
            return 'ASCII';
5034
        }
5035
5036
        //
5037
        // 3.) simple check for UTF-8 chars
5038
        //
5039
5040 26
        if (self::is_utf8($str) === true) {
5041 18
            return 'UTF-8';
5042
        }
5043
5044
        //
5045
        // 4.) check via "mb_detect_encoding()"
5046
        //
5047
        // INFO: UTF-16, UTF-32, UCS2 and UCS4, encoding detection will fail always with "mb_detect_encoding()"
5048
5049
        $detectOrder = [
5050 16
            'ISO-8859-1',
5051
            'ISO-8859-2',
5052
            'ISO-8859-3',
5053
            'ISO-8859-4',
5054
            'ISO-8859-5',
5055
            'ISO-8859-6',
5056
            'ISO-8859-7',
5057
            'ISO-8859-8',
5058
            'ISO-8859-9',
5059
            'ISO-8859-10',
5060
            'ISO-8859-13',
5061
            'ISO-8859-14',
5062
            'ISO-8859-15',
5063
            'ISO-8859-16',
5064
            'WINDOWS-1251',
5065
            'WINDOWS-1252',
5066
            'WINDOWS-1254',
5067
            'CP932',
5068
            'CP936',
5069
            'CP950',
5070
            'CP866',
5071
            'CP850',
5072
            'CP51932',
5073
            'CP50220',
5074
            'CP50221',
5075
            'CP50222',
5076
            'ISO-2022-JP',
5077
            'ISO-2022-KR',
5078
            'JIS',
5079
            'JIS-ms',
5080
            'EUC-CN',
5081
            'EUC-JP',
5082
        ];
5083
5084 16
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
5085
            self::checkForSupport();
5086
        }
5087
5088 16
        if (self::$SUPPORT['mbstring'] === true) {
5089
            // info: do not use the symfony polyfill here
5090 16
            $encoding = \mb_detect_encoding($str, $detectOrder, true);
5091 16
            if ($encoding) {
5092 16
                return $encoding;
5093
            }
5094
        }
5095
5096
        //
5097
        // 5.) check via "iconv()"
5098
        //
5099
5100
        if (self::$ENCODINGS === null) {
5101
            self::$ENCODINGS = self::getData('encodings');
5102
        }
5103
5104
        foreach (self::$ENCODINGS as $encodingTmp) {
5105
            // INFO: //IGNORE but still throw notice
5106
            /** @noinspection PhpUsageOfSilenceOperatorInspection */
5107
            if ((string) @\iconv($encodingTmp, $encodingTmp . '//IGNORE', $str) === $str) {
5108
                return $encodingTmp;
5109
            }
5110
        }
5111
5112
        return false;
5113
    }
5114
5115
    /**
5116
     * Check if the string ends with the given substring.
5117
     *
5118
     * @param string $haystack <p>The string to search in.</p>
5119
     * @param string $needle   <p>The substring to search for.</p>
5120
     *
5121
     * @return bool
5122
     */
5123
    public static function str_ends_with(string $haystack, string $needle): bool
5124
    {
5125 40
        if ($haystack === '' || $needle === '') {
5126 4
            return false;
5127
        }
5128
5129 38
        return \substr($haystack, -\strlen($needle)) === $needle;
5130
    }
5131
5132
    /**
5133
     * Returns true if the string ends with any of $substrings, false otherwise.
5134
     *
5135
     * - case-sensitive
5136
     *
5137
     * @param string   $str        <p>The input string.</p>
5138
     * @param string[] $substrings <p>Substrings to look for.</p>
5139
     *
5140
     * @return bool whether or not $str ends with $substring
5141
     */
5142
    public static function str_ends_with_any(string $str, array $substrings): bool
5143
    {
5144 7
        if (empty($substrings)) {
5145
            return false;
5146
        }
5147
5148 7
        foreach ($substrings as $substring) {
5149 7
            if (self::str_ends_with($str, $substring)) {
5150 7
                return true;
5151
            }
5152
        }
5153
5154 6
        return false;
5155
    }
5156
5157
    /**
5158
     * Ensures that the string begins with $substring. If it doesn't, it's
5159
     * prepended.
5160
     *
5161
     * @param string $str       <p>The input string.</p>
5162
     * @param string $substring <p>The substring to add if not present.</p>
5163
     *
5164
     * @return string
5165
     */
5166
    public static function str_ensure_left(string $str, string $substring): string
5167
    {
5168 10
        if (!self::str_starts_with($str, $substring)) {
5169 4
            $str = $substring . $str;
5170
        }
5171
5172 10
        return $str;
5173
    }
5174
5175
    /**
5176
     * Ensures that the string ends with $substring. If it doesn't, it's appended.
5177
     *
5178
     * @param string $str       <p>The input string.</p>
5179
     * @param string $substring <p>The substring to add if not present.</p>
5180
     *
5181
     * @return string
5182
     */
5183
    public static function str_ensure_right(string $str, string $substring): string
5184
    {
5185 10
        if (!self::str_ends_with($str, $substring)) {
5186 4
            $str .= $substring;
5187
        }
5188
5189 10
        return $str;
5190
    }
5191
5192
    /**
5193
     * Capitalizes the first word of the string, replaces underscores with
5194
     * spaces, and strips '_id'.
5195
     *
5196
     * @param string $str
5197
     *
5198
     * @return string
5199
     */
5200
    public static function str_humanize($str): string
5201
    {
5202 3
        $str = self::str_replace(
5203
            [
5204 3
                '_id',
5205
                '_',
5206
            ],
5207
            [
5208 3
                '',
5209
                ' ',
5210
            ],
5211 3
            $str
5212
        );
5213
5214 3
        return self::ucfirst(self::trim($str));
5215
    }
5216
5217
    /**
5218
     * Check if the string ends with the given substring, case insensitive.
5219
     *
5220
     * @param string $haystack <p>The string to search in.</p>
5221
     * @param string $needle   <p>The substring to search for.</p>
5222
     *
5223
     * @return bool
5224
     */
5225
    public static function str_iends_with(string $haystack, string $needle): bool
5226
    {
5227 12
        if ($haystack === '' || $needle === '') {
5228 2
            return false;
5229
        }
5230
5231 12
        return self::strcasecmp(\substr($haystack, -\strlen($needle)), $needle) === 0;
5232
    }
5233
5234
    /**
5235
     * Returns true if the string ends with any of $substrings, false otherwise.
5236
     *
5237
     * - case-insensitive
5238
     *
5239
     * @param string   $str        <p>The input string.</p>
5240
     * @param string[] $substrings <p>Substrings to look for.</p>
5241
     *
5242
     * @return bool whether or not $str ends with $substring
5243
     */
5244
    public static function str_iends_with_any(string $str, array $substrings): bool
5245
    {
5246 4
        if (empty($substrings)) {
5247
            return false;
5248
        }
5249
5250 4
        foreach ($substrings as $substring) {
5251 4
            if (self::str_iends_with($str, $substring)) {
5252 4
                return true;
5253
            }
5254
        }
5255
5256
        return false;
5257
    }
5258
5259
    /**
5260
     * Returns the index of the first occurrence of $needle in the string,
5261
     * and false if not found. Accepts an optional offset from which to begin
5262
     * the search.
5263
     *
5264
     * @param string $str      <p>The input string.</p>
5265
     * @param string $needle   <p>Substring to look for.</p>
5266
     * @param int    $offset   [optional] <p>Offset from which to search. Default: 0</p>
5267
     * @param string $encoding [optional] <p>Default: UTF-8</p>
5268
     *
5269
     * @return false|int
5270
     *                   The occurrence's <strong>index</strong> if found, otherwise <strong>false</strong>
5271
     */
5272
    public static function str_iindex_first(string $str, string $needle, int $offset = 0, string $encoding = 'UTF-8')
5273
    {
5274 2
        return self::stripos(
5275 2
            $str,
5276 2
            $needle,
5277 2
            $offset,
5278 2
            $encoding
5279
        );
5280
    }
5281
5282
    /**
5283
     * Returns the index of the last occurrence of $needle in the string,
5284
     * and false if not found. Accepts an optional offset from which to begin
5285
     * the search. Offsets may be negative to count from the last character
5286
     * in the string.
5287
     *
5288
     * @param string $str      <p>The input string.</p>
5289
     * @param string $needle   <p>Substring to look for.</p>
5290
     * @param int    $offset   [optional] <p>Offset from which to search. Default: 0</p>
5291
     * @param string $encoding [optional] <p>Default: UTF-8</p>
5292
     *
5293
     * @return false|int
5294
     *                   The last occurrence's <strong>index</strong> if found, otherwise <strong>false</strong>
5295
     */
5296
    public static function str_iindex_last(string $str, string $needle, int $offset = 0, string $encoding = 'UTF-8')
5297
    {
5298 2
        return self::strripos(
5299 2
            $str,
5300 2
            $needle,
5301 2
            $offset,
5302 2
            $encoding
5303
        );
5304
    }
5305
5306
    /**
5307
     * Returns the index of the first occurrence of $needle in the string,
5308
     * and false if not found. Accepts an optional offset from which to begin
5309
     * the search.
5310
     *
5311
     * @param string $str      <p>The input string.</p>
5312
     * @param string $needle   <p>Substring to look for.</p>
5313
     * @param int    $offset   [optional] <p>Offset from which to search. Default: 0</p>
5314
     * @param string $encoding [optional] <p>Default: UTF-8</p>
5315
     *
5316
     * @return false|int
5317
     *                   The occurrence's <strong>index</strong> if found, otherwise <strong>false</strong>
5318
     */
5319
    public static function str_index_first(string $str, string $needle, int $offset = 0, string $encoding = 'UTF-8')
5320
    {
5321 12
        return self::strpos(
5322 12
            $str,
5323 12
            $needle,
5324 12
            $offset,
5325 12
            $encoding
5326
        );
5327
    }
5328
5329
    /**
5330
     * Returns the index of the last occurrence of $needle in the string,
5331
     * and false if not found. Accepts an optional offset from which to begin
5332
     * the search. Offsets may be negative to count from the last character
5333
     * in the string.
5334
     *
5335
     * @param string $str      <p>The input string.</p>
5336
     * @param string $needle   <p>Substring to look for.</p>
5337
     * @param int    $offset   [optional] <p>Offset from which to search. Default: 0</p>
5338
     * @param string $encoding [optional] <p>Default: UTF-8</p>
5339
     *
5340
     * @return false|int
5341
     *                   The last occurrence's <strong>index</strong> if found, otherwise <strong>false</strong>
5342
     */
5343
    public static function str_index_last(string $str, string $needle, int $offset = 0, string $encoding = 'UTF-8')
5344
    {
5345 12
        return self::strrpos(
5346 12
            $str,
5347 12
            $needle,
5348 12
            $offset,
5349 12
            $encoding
5350
        );
5351
    }
5352
5353
    /**
5354
     * Inserts $substring into the string at the $index provided.
5355
     *
5356
     * @param string $str       <p>The input string.</p>
5357
     * @param string $substring <p>String to be inserted.</p>
5358
     * @param int    $index     <p>The index at which to insert the substring.</p>
5359
     * @param string $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
5360
     *
5361
     * @return string
5362
     */
5363
    public static function str_insert(string $str, string $substring, int $index, string $encoding = 'UTF-8'): string
5364
    {
5365 8
        $len = self::strlen($str, $encoding);
5366
5367 8
        if ($index > $len) {
5368 1
            return $str;
5369
        }
5370
5371 7
        $start = self::substr($str, 0, $index, $encoding);
5372 7
        $end = self::substr($str, $index, $len, $encoding);
0 ignored issues
show
Bug introduced by
It seems like $len can also be of type false; however, parameter $length of voku\helper\UTF8::substr() does only seem to accept integer|null, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

5372
        $end = self::substr($str, $index, /** @scrutinizer ignore-type */ $len, $encoding);
Loading history...
5373
5374 7
        return $start . $substring . $end;
0 ignored issues
show
Bug introduced by
Are you sure $end of type false|string can be used in concatenation? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

5374
        return $start . $substring . /** @scrutinizer ignore-type */ $end;
Loading history...
Bug introduced by
Are you sure $start of type false|string can be used in concatenation? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

5374
        return /** @scrutinizer ignore-type */ $start . $substring . $end;
Loading history...
5375
    }
5376
5377
    /**
5378
     * Case-insensitive and UTF-8 safe version of <function>str_replace</function>.
5379
     *
5380
     * @see  http://php.net/manual/en/function.str-ireplace.php
5381
     *
5382
     * @param mixed $search  <p>
5383
     *                       Every replacement with search array is
5384
     *                       performed on the result of previous replacement.
5385
     *                       </p>
5386
     * @param mixed $replace <p>
5387
     *                       </p>
5388
     * @param mixed $subject <p>
5389
     *                       If subject is an array, then the search and
5390
     *                       replace is performed with every entry of
5391
     *                       subject, and the return value is an array as
5392
     *                       well.
5393
     *                       </p>
5394
     * @param int   $count   [optional] <p>
5395
     *                       The number of matched and replaced needles will
5396
     *                       be returned in count which is passed by
5397
     *                       reference.
5398
     *                       </p>
5399
     *
5400
     * @return mixed a string or an array of replacements
5401
     */
5402
    public static function str_ireplace($search, $replace, $subject, &$count = null)
5403
    {
5404 29
        $search = (array) $search;
5405
5406
        /** @noinspection AlterInForeachInspection */
5407 29
        foreach ($search as &$s) {
5408 29
            $s = (string) $s;
5409 29
            if ($s === '') {
5410 6
                $s = '/^(?<=.)$/';
5411
            } else {
5412 29
                $s = '/' . \preg_quote($s, '/') . '/ui';
5413
            }
5414
        }
5415
5416 29
        $subject = \preg_replace($search, $replace, $subject, -1, $replace);
5417 29
        $count = $replace; // used as reference parameter
5418
5419 29
        return $subject;
5420
    }
5421
5422
    /**
5423
     * Replaces $search from the beginning of string with $replacement.
5424
     *
5425
     * @param string $str         <p>The input string.</p>
5426
     * @param string $search      <p>The string to search for.</p>
5427
     * @param string $replacement <p>The replacement.</p>
5428
     *
5429
     * @return string string after the replacements
5430
     */
5431
    public static function str_ireplace_beginning(string $str, string $search, string $replacement): string
5432
    {
5433 17
        if ($str === '') {
5434 4
            if ($replacement === '') {
5435 2
                return '';
5436
            }
5437
5438 2
            if ($search === '') {
5439 2
                return $replacement;
5440
            }
5441
        }
5442
5443 13
        if ($search === '') {
5444 2
            return $str . $replacement;
5445
        }
5446
5447 11
        if (\stripos($str, $search) === 0) {
5448 10
            return $replacement . \substr($str, \strlen($search));
5449
        }
5450
5451 1
        return $str;
5452
    }
5453
5454
    /**
5455
     * Replaces $search from the ending of string with $replacement.
5456
     *
5457
     * @param string $str         <p>The input string.</p>
5458
     * @param string $search      <p>The string to search for.</p>
5459
     * @param string $replacement <p>The replacement.</p>
5460
     *
5461
     * @return string string after the replacements
5462
     */
5463
    public static function str_ireplace_ending(string $str, string $search, string $replacement): string
5464
    {
5465 17
        if ($str === '') {
5466 4
            if ($replacement === '') {
5467 2
                return '';
5468
            }
5469
5470 2
            if ($search === '') {
5471 2
                return $replacement;
5472
            }
5473
        }
5474
5475 13
        if ($search === '') {
5476 2
            return $str . $replacement;
5477
        }
5478
5479 11
        if (\stripos($str, $search, \strlen($str) - \strlen($search)) !== false) {
5480 9
            $str = \substr($str, 0, -\strlen($search)) . $replacement;
5481
        }
5482
5483 11
        return $str;
5484
    }
5485
5486
    /**
5487
     * Check if the string starts with the given substring, case insensitive.
5488
     *
5489
     * @param string $haystack <p>The string to search in.</p>
5490
     * @param string $needle   <p>The substring to search for.</p>
5491
     *
5492
     * @return bool
5493
     */
5494
    public static function str_istarts_with(string $haystack, string $needle): bool
5495
    {
5496 12
        if ($haystack === '' || $needle === '') {
5497 2
            return false;
5498
        }
5499
5500 12
        return self::stripos($haystack, $needle) === 0;
5501
    }
5502
5503
    /**
5504
     * Returns true if the string begins with any of $substrings, false otherwise.
5505
     *
5506
     * - case-insensitive
5507
     *
5508
     * @param string $str        <p>The input string.</p>
5509
     * @param array  $substrings <p>Substrings to look for.</p>
5510
     *
5511
     * @return bool whether or not $str starts with $substring
5512
     */
5513
    public static function str_istarts_with_any(string $str, array $substrings): bool
5514
    {
5515 4
        if ($str === '') {
5516
            return false;
5517
        }
5518
5519 4
        if (empty($substrings)) {
5520
            return false;
5521
        }
5522
5523 4
        foreach ($substrings as $substring) {
5524 4
            if (self::str_istarts_with($str, $substring)) {
5525 4
                return true;
5526
            }
5527
        }
5528
5529
        return false;
5530
    }
5531
5532
    /**
5533
     * Gets the substring after the first occurrence of a separator.
5534
     *
5535
     * @param string $str       <p>The input string.</p>
5536
     * @param string $separator <p>The string separator.</p>
5537
     * @param string $encoding  [optional] <p>Default: UTF-8</p>
5538
     *
5539
     * @return string
5540
     */
5541
    public static function str_isubstr_after_first_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
5542
    {
5543
        if (
5544 1
            $separator === ''
5545
            ||
5546 1
            $str === ''
5547
        ) {
5548 1
            return '';
5549
        }
5550
5551 1
        $offset = self::str_iindex_first($str, $separator);
5552 1
        if ($offset === false) {
5553 1
            return '';
5554
        }
5555
5556 1
        return (string) self::substr(
5557 1
            $str,
5558 1
            $offset + self::strlen($separator, $encoding),
5559 1
            null,
5560 1
            $encoding
5561
        );
5562
    }
5563
5564
    /**
5565
     * Gets the substring after the last occurrence of a separator.
5566
     *
5567
     * @param string $str       <p>The input string.</p>
5568
     * @param string $separator <p>The string separator.</p>
5569
     * @param string $encoding  [optional] <p>Default: UTF-8</p>
5570
     *
5571
     * @return string
5572
     */
5573
    public static function str_isubstr_after_last_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
5574
    {
5575
        if (
5576 1
            $separator === ''
5577
            ||
5578 1
            $str === ''
5579
        ) {
5580 1
            return '';
5581
        }
5582
5583 1
        $offset = self::str_iindex_last($str, $separator);
5584 1
        if ($offset === false) {
5585 1
            return '';
5586
        }
5587
5588 1
        return (string) self::substr(
5589 1
            $str,
5590 1
            $offset + self::strlen($separator, $encoding),
5591 1
            null,
5592 1
            $encoding
5593
        );
5594
    }
5595
5596
    /**
5597
     * Gets the substring before the first occurrence of a separator.
5598
     *
5599
     * @param string $str       <p>The input string.</p>
5600
     * @param string $separator <p>The string separator.</p>
5601
     * @param string $encoding  [optional] <p>Default: UTF-8</p>
5602
     *
5603
     * @return string
5604
     */
5605
    public static function str_isubstr_before_first_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
5606
    {
5607
        if (
5608 1
            $separator === ''
5609
            ||
5610 1
            $str === ''
5611
        ) {
5612 1
            return '';
5613
        }
5614
5615 1
        $offset = self::str_iindex_first($str, $separator);
5616 1
        if ($offset === false) {
5617 1
            return '';
5618
        }
5619
5620 1
        return (string) self::substr($str, 0, $offset, $encoding);
5621
    }
5622
5623
    /**
5624
     * Gets the substring before the last occurrence of a separator.
5625
     *
5626
     * @param string $str       <p>The input string.</p>
5627
     * @param string $separator <p>The string separator.</p>
5628
     * @param string $encoding  [optional] <p>Default: UTF-8</p>
5629
     *
5630
     * @return string
5631
     */
5632
    public static function str_isubstr_before_last_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
5633
    {
5634
        if (
5635 1
            $separator === ''
5636
            ||
5637 1
            $str === ''
5638
        ) {
5639 1
            return '';
5640
        }
5641
5642 1
        $offset = self::str_iindex_last($str, $separator);
5643 1
        if ($offset === false) {
5644 1
            return '';
5645
        }
5646
5647 1
        return (string) self::substr($str, 0, $offset, $encoding);
5648
    }
5649
5650
    /**
5651
     * Gets the substring after (or before via "$beforeNeedle") the first occurrence of the "$needle".
5652
     *
5653
     * @param string $str          <p>The input string.</p>
5654
     * @param string $needle       <p>The string to look for.</p>
5655
     * @param bool   $beforeNeedle [optional] <p>Default: false</p>
5656
     * @param string $encoding     [optional] <p>Default: UTF-8</p>
5657
     *
5658
     * @return string
5659
     */
5660
    public static function str_isubstr_first(string $str, string $needle, bool $beforeNeedle = false, string $encoding = 'UTF-8'): string
5661
    {
5662
        if (
5663 2
            $needle === ''
5664
            ||
5665 2
            $str === ''
5666
        ) {
5667 2
            return '';
5668
        }
5669
5670 2
        $part = self::stristr(
5671 2
            $str,
5672 2
            $needle,
5673 2
            $beforeNeedle,
5674 2
            $encoding
5675
        );
5676 2
        if ($part === false) {
5677 2
            return '';
5678
        }
5679
5680 2
        return $part;
5681
    }
5682
5683
    /**
5684
     * Gets the substring after (or before via "$beforeNeedle") the last occurrence of the "$needle".
5685
     *
5686
     * @param string $str          <p>The input string.</p>
5687
     * @param string $needle       <p>The string to look for.</p>
5688
     * @param bool   $beforeNeedle [optional] <p>Default: false</p>
5689
     * @param string $encoding     [optional] <p>Default: UTF-8</p>
5690
     *
5691
     * @return string
5692
     */
5693
    public static function str_isubstr_last(string $str, string $needle, bool $beforeNeedle = false, string $encoding = 'UTF-8'): string
5694
    {
5695
        if (
5696 1
            $needle === ''
5697
            ||
5698 1
            $str === ''
5699
        ) {
5700 1
            return '';
5701
        }
5702
5703 1
        $part = self::strrichr($str, $needle, $beforeNeedle, $encoding);
5704 1
        if ($part === false) {
5705 1
            return '';
5706
        }
5707
5708 1
        return $part;
5709
    }
5710
5711
    /**
5712
     * Returns the last $n characters of the string.
5713
     *
5714
     * @param string $str      <p>The input string.</p>
5715
     * @param int    $n        <p>Number of characters to retrieve from the end.</p>
5716
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
5717
     *
5718
     * @return string
5719
     */
5720
    public static function str_last_char(string $str, int $n = 1, string $encoding = 'UTF-8'): string
5721
    {
5722 12
        if ($n <= 0) {
5723 4
            return '';
5724
        }
5725
5726 8
        $returnTmp = self::substr($str, -$n, null, $encoding);
5727
5728 8
        return $returnTmp === false ? '' : $returnTmp;
5729
    }
5730
5731
    /**
5732
     * Limit the number of characters in a string.
5733
     *
5734
     * @param string $str      <p>The input string.</p>
5735
     * @param int    $length   [optional] <p>Default: 100</p>
5736
     * @param string $strAddOn [optional] <p>Default: …</p>
5737
     * @param string $encoding [optional] <p>Default: UTF-8</p>
5738
     *
5739
     * @return string
5740
     */
5741
    public static function str_limit(string $str, int $length = 100, string $strAddOn = '…', string $encoding = 'UTF-8'): string
5742
    {
5743 2
        if ($str === '') {
5744 2
            return '';
5745
        }
5746
5747 2
        if ($length <= 0) {
5748 2
            return '';
5749
        }
5750
5751 2
        if (self::strlen($str, $encoding) <= $length) {
5752 2
            return $str;
5753
        }
5754
5755 2
        return self::substr($str, 0, $length - self::strlen($strAddOn), $encoding) . $strAddOn;
0 ignored issues
show
Bug introduced by
Are you sure self::substr($str, 0, $l...($strAddOn), $encoding) of type false|string can be used in concatenation? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

5755
        return /** @scrutinizer ignore-type */ self::substr($str, 0, $length - self::strlen($strAddOn), $encoding) . $strAddOn;
Loading history...
5756
    }
5757
5758
    /**
5759
     * Limit the number of characters in a string, but also after the next word.
5760
     *
5761
     * @param string $str      <p>The input string.</p>
5762
     * @param int    $length   [optional] <p>Default: 100</p>
5763
     * @param string $strAddOn [optional] <p>Default: …</p>
5764
     * @param string $encoding [optional] <p>Default: UTF-8</p>
5765
     *
5766
     * @return string
5767
     */
5768
    public static function str_limit_after_word(string $str, int $length = 100, string $strAddOn = '…', string $encoding = 'UTF-8'): string
5769
    {
5770 6
        if ($str === '') {
5771 2
            return '';
5772
        }
5773
5774 6
        if ($length <= 0) {
5775 2
            return '';
5776
        }
5777
5778 6
        if (self::strlen($str, $encoding) <= $length) {
5779 2
            return $str;
5780
        }
5781
5782 6
        if (self::substr($str, $length - 1, 1, $encoding) === ' ') {
5783 5
            return self::substr($str, 0, $length - 1, $encoding) . $strAddOn;
0 ignored issues
show
Bug introduced by
Are you sure self::substr($str, 0, $length - 1, $encoding) of type false|string can be used in concatenation? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

5783
            return /** @scrutinizer ignore-type */ self::substr($str, 0, $length - 1, $encoding) . $strAddOn;
Loading history...
5784
        }
5785
5786 3
        $str = (string) self::substr($str, 0, $length, $encoding);
5787 3
        $array = \explode(' ', $str);
5788 3
        \array_pop($array);
5789 3
        $new_str = \implode(' ', $array);
5790
5791 3
        if ($new_str === '') {
5792 2
            $str = self::substr($str, 0, $length - 1, $encoding) . $strAddOn;
5793
        } else {
5794 3
            $str = $new_str . $strAddOn;
5795
        }
5796
5797 3
        return $str;
5798
    }
5799
5800
    /**
5801
     * Returns the longest common prefix between the string and $otherStr.
5802
     *
5803
     * @param string $str      <p>The input sting.</p>
5804
     * @param string $otherStr <p>Second string for comparison.</p>
5805
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
5806
     *
5807
     * @return string
5808
     */
5809
    public static function str_longest_common_prefix(string $str, string $otherStr, string $encoding = 'UTF-8'): string
5810
    {
5811 10
        $maxLength = \min(self::strlen($str, $encoding), self::strlen($otherStr, $encoding));
5812
5813 10
        $longestCommonPrefix = '';
5814 10
        for ($i = 0; $i < $maxLength; ++$i) {
5815 8
            $char = self::substr($str, $i, 1, $encoding);
5816
5817 8
            if ($char === self::substr($otherStr, $i, 1, $encoding)) {
5818 6
                $longestCommonPrefix .= $char;
5819
            } else {
5820 6
                break;
5821
            }
5822
        }
5823
5824 10
        return $longestCommonPrefix;
5825
    }
5826
5827
    /**
5828
     * Returns the longest common substring between the string and $otherStr.
5829
     * In the case of ties, it returns that which occurs first.
5830
     *
5831
     * @param string $str
5832
     * @param string $otherStr <p>Second string for comparison.</p>
5833
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
5834
     *
5835
     * @return string string with its $str being the longest common substring
5836
     */
5837
    public static function str_longest_common_substring(string $str, string $otherStr, string $encoding = 'UTF-8'): string
5838
    {
5839
        // Uses dynamic programming to solve
5840
        // http://en.wikipedia.org/wiki/Longest_common_substring_problem
5841 11
        $strLength = self::strlen($str, $encoding);
5842 11
        $otherLength = self::strlen($otherStr, $encoding);
5843
5844
        // Return if either string is empty
5845 11
        if ($strLength === 0 || $otherLength === 0) {
5846 2
            return '';
5847
        }
5848
5849 9
        $len = 0;
5850 9
        $end = 0;
5851 9
        $table = \array_fill(
5852 9
            0,
5853 9
            $strLength + 1,
5854 9
            \array_fill(0, $otherLength + 1, 0)
5855
        );
5856
5857 9
        for ($i = 1; $i <= $strLength; ++$i) {
5858 9
            for ($j = 1; $j <= $otherLength; ++$j) {
5859 9
                $strChar = self::substr($str, $i - 1, 1, $encoding);
5860 9
                $otherChar = self::substr($otherStr, $j - 1, 1, $encoding);
5861
5862 9
                if ($strChar === $otherChar) {
5863 8
                    $table[$i][$j] = $table[$i - 1][$j - 1] + 1;
5864 8
                    if ($table[$i][$j] > $len) {
5865 8
                        $len = $table[$i][$j];
5866 8
                        $end = $i;
5867
                    }
5868
                } else {
5869 9
                    $table[$i][$j] = 0;
5870
                }
5871
            }
5872
        }
5873
5874 9
        $returnTmp = self::substr($str, $end - $len, $len, $encoding);
5875
5876 9
        return $returnTmp === false ? '' : $returnTmp;
5877
    }
5878
5879
    /**
5880
     * Returns the longest common suffix between the string and $otherStr.
5881
     *
5882
     * @param string $str
5883
     * @param string $otherStr <p>Second string for comparison.</p>
5884
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
5885
     *
5886
     * @return string
5887
     */
5888
    public static function str_longest_common_suffix(string $str, string $otherStr, string $encoding = 'UTF-8'): string
5889
    {
5890 10
        $maxLength = \min(self::strlen($str, $encoding), self::strlen($otherStr, $encoding));
5891
5892 10
        $longestCommonSuffix = '';
5893 10
        for ($i = 1; $i <= $maxLength; ++$i) {
5894 8
            $char = self::substr($str, -$i, 1, $encoding);
5895
5896 8
            if ($char === self::substr($otherStr, -$i, 1, $encoding)) {
5897 6
                $longestCommonSuffix = $char . $longestCommonSuffix;
0 ignored issues
show
Bug introduced by
Are you sure $char of type false|string can be used in concatenation? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

5897
                $longestCommonSuffix = /** @scrutinizer ignore-type */ $char . $longestCommonSuffix;
Loading history...
5898
            } else {
5899 6
                break;
5900
            }
5901
        }
5902
5903 10
        return $longestCommonSuffix;
5904
    }
5905
5906
    /**
5907
     * Returns true if $str matches the supplied pattern, false otherwise.
5908
     *
5909
     * @param string $str     <p>The input string.</p>
5910
     * @param string $pattern <p>Regex pattern to match against.</p>
5911
     *
5912
     * @return bool whether or not $str matches the pattern
5913
     */
5914
    public static function str_matches_pattern(string $str, string $pattern): bool
5915
    {
5916 126
        return (bool) \preg_match('/' . $pattern . '/u', $str);
5917
    }
5918
5919
    /**
5920
     * Returns whether or not a character exists at an index. Offsets may be
5921
     * negative to count from the last character in the string. Implements
5922
     * part of the ArrayAccess interface.
5923
     *
5924
     * @param string $str      <p>The input string.</p>
5925
     * @param int    $offset   <p>The index to check.</p>
5926
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
5927
     *
5928
     * @return bool whether or not the index exists
5929
     */
5930
    public static function str_offset_exists(string $str, int $offset, string $encoding = 'UTF-8'): bool
5931
    {
5932
        // init
5933 6
        $length = self::strlen($str, $encoding);
5934
5935 6
        if ($offset >= 0) {
5936 3
            return $length > $offset;
5937
        }
5938
5939 3
        return $length >= \abs($offset);
5940
    }
5941
5942
    /**
5943
     * Returns the character at the given index. Offsets may be negative to
5944
     * count from the last character in the string. Implements part of the
5945
     * ArrayAccess interface, and throws an OutOfBoundsException if the index
5946
     * does not exist.
5947
     *
5948
     * @param string $str      <p>The input string.</p>
5949
     * @param int    $index    <p>The <strong>index</strong> from which to retrieve the char.</p>
5950
     * @param string $encoding [optional] <p>Default: UTF-8</p>
5951
     *
5952
     * @throws \OutOfBoundsException if the positive or negative offset does not exist
5953
     *
5954
     * @return string the character at the specified index
5955
     */
5956
    public static function str_offset_get(string $str, int $index, string $encoding = 'UTF-8'): string
5957
    {
5958
        // init
5959 2
        $length = self::strlen($str);
5960
5961
        if (
5962 2
            ($index >= 0 && $length <= $index)
5963
            ||
5964 2
            $length < \abs($index)
5965
        ) {
5966 1
            throw new \OutOfBoundsException('No character exists at the index');
5967
        }
5968
5969 1
        return self::char_at($str, $index, $encoding);
5970
    }
5971
5972
    /**
5973
     * Pad a UTF-8 string to given length with another string.
5974
     *
5975
     * @param string $str        <p>The input string.</p>
5976
     * @param int    $pad_length <p>The length of return string.</p>
5977
     * @param string $pad_string [optional] <p>String to use for padding the input string.</p>
5978
     * @param int    $pad_type   [optional] <p>
5979
     *                           Can be <strong>STR_PAD_RIGHT</strong> (default),
5980
     *                           <strong>STR_PAD_LEFT</strong> or <strong>STR_PAD_BOTH</strong>
5981
     *                           </p>
5982
     * @param string $encoding   [optional] <p>Default: UTF-8</p>
5983
     *
5984
     * @return string returns the padded string
5985
     */
5986
    public static function str_pad(string $str, int $pad_length, string $pad_string = ' ', $pad_type = \STR_PAD_RIGHT, string $encoding = 'UTF-8'): string
5987
    {
5988 41
        if ($str === '') {
5989
            return '';
5990
        }
5991
5992 41
        if ($pad_type !== (int) $pad_type) {
5993 13
            if ($pad_type === 'left') {
0 ignored issues
show
introduced by
The condition $pad_type === 'left' is always false.
Loading history...
5994 3
                $pad_type = \STR_PAD_LEFT;
5995 10
            } elseif ($pad_type === 'right') {
0 ignored issues
show
introduced by
The condition $pad_type === 'right' is always false.
Loading history...
5996 6
                $pad_type = \STR_PAD_RIGHT;
5997 4
            } elseif ($pad_type === 'both') {
0 ignored issues
show
introduced by
The condition $pad_type === 'both' is always false.
Loading history...
5998 3
                $pad_type = \STR_PAD_BOTH;
5999
            } else {
6000 1
                throw new \InvalidArgumentException(
6001 1
                    'Pad expects $padType to be "STR_PAD_*" or ' . "to be one of 'left', 'right' or 'both'"
6002
                );
6003
            }
6004
        }
6005
6006 40
        $str_length = self::strlen($str, $encoding);
6007
6008
        if (
6009 40
            $pad_length > 0
6010
            &&
6011 40
            $pad_length >= $str_length
6012
        ) {
6013 39
            $ps_length = self::strlen($pad_string, $encoding);
6014
6015 39
            $diff = ($pad_length - $str_length);
6016
6017
            switch ($pad_type) {
6018 39
                case \STR_PAD_LEFT:
6019 13
                    $pre = \str_repeat($pad_string, (int) \ceil($diff / $ps_length));
6020 13
                    $pre = (string) self::substr($pre, 0, $diff, $encoding);
6021 13
                    $post = '';
6022
6023 13
                    break;
6024
6025 29
                case \STR_PAD_BOTH:
6026 14
                    $pre = \str_repeat($pad_string, (int) \ceil($diff / $ps_length / 2));
6027 14
                    $pre = (string) self::substr($pre, 0, (int) \floor($diff / 2), $encoding);
6028 14
                    $post = \str_repeat($pad_string, (int) \ceil($diff / $ps_length / 2));
6029 14
                    $post = (string) self::substr($post, 0, (int) \ceil($diff / 2), $encoding);
6030
6031 14
                    break;
6032
6033 18
                case \STR_PAD_RIGHT:
6034
                default:
6035 18
                    $post = \str_repeat($pad_string, (int) \ceil($diff / $ps_length));
6036 18
                    $post = (string) self::substr($post, 0, $diff, $encoding);
6037 18
                    $pre = '';
6038
            }
6039
6040 39
            return $pre . $str . $post;
6041
        }
6042
6043 4
        return $str;
6044
    }
6045
6046
    /**
6047
     * Returns a new string of a given length such that both sides of the
6048
     * string are padded. Alias for pad() with a $padType of 'both'.
6049
     *
6050
     * @param string $str
6051
     * @param int    $length   <p>Desired string length after padding.</p>
6052
     * @param string $padStr   [optional] <p>String used to pad, defaults to space. Default: ' '</p>
6053
     * @param string $encoding [optional] <p>Default: UTF-8</p>
6054
     *
6055
     * @return string string with padding applied
6056
     */
6057
    public static function str_pad_both(string $str, int $length, string $padStr = ' ', string $encoding = 'UTF-8'): string
6058
    {
6059 11
        $padding = $length - self::strlen($str, $encoding);
6060
6061 11
        return self::apply_padding($str, (int) \floor($padding / 2), (int) \ceil($padding / 2), $padStr, $encoding);
6062
    }
6063
6064
    /**
6065
     * Returns a new string of a given length such that the beginning of the
6066
     * string is padded. Alias for pad() with a $padType of 'left'.
6067
     *
6068
     * @param string $str
6069
     * @param int    $length   <p>Desired string length after padding.</p>
6070
     * @param string $padStr   [optional] <p>String used to pad, defaults to space. Default: ' '</p>
6071
     * @param string $encoding [optional] <p>Default: UTF-8</p>
6072
     *
6073
     * @return string string with left padding
6074
     */
6075
    public static function str_pad_left(string $str, int $length, string $padStr = ' ', string $encoding = 'UTF-8'): string
6076
    {
6077 7
        return self::apply_padding($str, $length - self::strlen($str), 0, $padStr, $encoding);
6078
    }
6079
6080
    /**
6081
     * Returns a new string of a given length such that the end of the string
6082
     * is padded. Alias for pad() with a $padType of 'right'.
6083
     *
6084
     * @param string $str
6085
     * @param int    $length   <p>Desired string length after padding.</p>
6086
     * @param string $padStr   [optional] <p>String used to pad, defaults to space. Default: ' '</p>
6087
     * @param string $encoding [optional] <p>Default: UTF-8</p>
6088
     *
6089
     * @return string string with right padding
6090
     */
6091
    public static function str_pad_right(string $str, int $length, string $padStr = ' ', string $encoding = 'UTF-8'): string
6092
    {
6093 7
        return self::apply_padding($str, 0, $length - self::strlen($str), $padStr, $encoding);
6094
    }
6095
6096
    /**
6097
     * Repeat a string.
6098
     *
6099
     * @param string $str        <p>
6100
     *                           The string to be repeated.
6101
     *                           </p>
6102
     * @param int    $multiplier <p>
6103
     *                           Number of time the input string should be
6104
     *                           repeated.
6105
     *                           </p>
6106
     *                           <p>
6107
     *                           multiplier has to be greater than or equal to 0.
6108
     *                           If the multiplier is set to 0, the function
6109
     *                           will return an empty string.
6110
     *                           </p>
6111
     *
6112
     * @return string the repeated string
6113
     */
6114
    public static function str_repeat(string $str, int $multiplier): string
6115
    {
6116 9
        $str = self::filter($str);
6117
6118 9
        return \str_repeat($str, $multiplier);
6119
    }
6120
6121
    /**
6122
     * INFO: This is only a wrapper for "str_replace()"  -> the original functions is already UTF-8 safe.
6123
     *
6124
     * Replace all occurrences of the search string with the replacement string
6125
     *
6126
     * @see http://php.net/manual/en/function.str-replace.php
6127
     *
6128
     * @param mixed $search  <p>
6129
     *                       The value being searched for, otherwise known as the needle.
6130
     *                       An array may be used to designate multiple needles.
6131
     *                       </p>
6132
     * @param mixed $replace <p>
6133
     *                       The replacement value that replaces found search
6134
     *                       values. An array may be used to designate multiple replacements.
6135
     *                       </p>
6136
     * @param mixed $subject <p>
6137
     *                       The string or array being searched and replaced on,
6138
     *                       otherwise known as the haystack.
6139
     *                       </p>
6140
     *                       <p>
6141
     *                       If subject is an array, then the search and
6142
     *                       replace is performed with every entry of
6143
     *                       subject, and the return value is an array as
6144
     *                       well.
6145
     *                       </p>
6146
     * @param int   $count   [optional] If passed, this will hold the number of matched and replaced needles
6147
     *
6148
     * @return mixed this function returns a string or an array with the replaced values
6149
     */
6150
    public static function str_replace($search, $replace, $subject, int &$count = null)
6151
    {
6152 60
        return \str_replace($search, $replace, $subject, $count);
6153
    }
6154
6155
    /**
6156
     * Replaces $search from the beginning of string with $replacement.
6157
     *
6158
     * @param string $str         <p>The input string.</p>
6159
     * @param string $search      <p>The string to search for.</p>
6160
     * @param string $replacement <p>The replacement.</p>
6161
     *
6162
     * @return string string after the replacements
6163
     */
6164
    public static function str_replace_beginning(string $str, string $search, string $replacement): string
6165
    {
6166 17
        if ($str === '') {
6167 4
            if ($replacement === '') {
6168 2
                return '';
6169
            }
6170
6171 2
            if ($search === '') {
6172 2
                return $replacement;
6173
            }
6174
        }
6175
6176 13
        if ($search === '') {
6177 2
            return $str . $replacement;
6178
        }
6179
6180 11
        if (\strpos($str, $search) === 0) {
6181 9
            return $replacement . \substr($str, \strlen($search));
6182
        }
6183
6184 2
        return $str;
6185
    }
6186
6187
    /**
6188
     * Replaces $search from the ending of string with $replacement.
6189
     *
6190
     * @param string $str         <p>The input string.</p>
6191
     * @param string $search      <p>The string to search for.</p>
6192
     * @param string $replacement <p>The replacement.</p>
6193
     *
6194
     * @return string string after the replacements
6195
     */
6196
    public static function str_replace_ending(string $str, string $search, string $replacement): string
6197
    {
6198 17
        if ($str === '') {
6199 4
            if ($replacement === '') {
6200 2
                return '';
6201
            }
6202
6203 2
            if ($search === '') {
6204 2
                return $replacement;
6205
            }
6206
        }
6207
6208 13
        if ($search === '') {
6209 2
            return $str . $replacement;
6210
        }
6211
6212 11
        if (\strpos($str, $search, \strlen($str) - \strlen($search)) !== false) {
6213 8
            $str = \substr($str, 0, -\strlen($search)) . $replacement;
6214
        }
6215
6216 11
        return $str;
6217
    }
6218
6219
    /**
6220
     * Replace the first "$search"-term with the "$replace"-term.
6221
     *
6222
     * @param string $search
6223
     * @param string $replace
6224
     * @param string $subject
6225
     *
6226
     * @return string
6227
     */
6228
    public static function str_replace_first(string $search, string $replace, string $subject): string
6229
    {
6230 2
        $pos = self::strpos($subject, $search);
6231 2
        if ($pos !== false) {
6232 2
            return self::substr_replace($subject, $replace, $pos, self::strlen($search));
0 ignored issues
show
Bug introduced by
It seems like self::strlen($search) can also be of type false; however, parameter $length of voku\helper\UTF8::substr_replace() does only seem to accept integer|integer[]|null, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

6232
            return self::substr_replace($subject, $replace, $pos, /** @scrutinizer ignore-type */ self::strlen($search));
Loading history...
Bug Best Practice introduced by
The expression return self::substr_repl... self::strlen($search)) could return the type string[] which is incompatible with the type-hinted return string. Consider adding an additional type-check to rule them out.
Loading history...
6233
        }
6234
6235 2
        return $subject;
6236
    }
6237
6238
    /**
6239
     * Replace the last "$search"-term with the "$replace"-term.
6240
     *
6241
     * @param string $search
6242
     * @param string $replace
6243
     * @param string $subject
6244
     *
6245
     * @return string
6246
     */
6247
    public static function str_replace_last(string $search, string $replace, string $subject): string
6248
    {
6249 2
        $pos = self::strrpos($subject, $search);
6250 2
        if ($pos !== false) {
6251 2
            return self::substr_replace($subject, $replace, $pos, self::strlen($search));
0 ignored issues
show
Bug Best Practice introduced by
The expression return self::substr_repl... self::strlen($search)) could return the type string[] which is incompatible with the type-hinted return string. Consider adding an additional type-check to rule them out.
Loading history...
Bug introduced by
It seems like self::strlen($search) can also be of type false; however, parameter $length of voku\helper\UTF8::substr_replace() does only seem to accept integer|integer[]|null, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

6251
            return self::substr_replace($subject, $replace, $pos, /** @scrutinizer ignore-type */ self::strlen($search));
Loading history...
6252
        }
6253
6254 2
        return $subject;
6255
    }
6256
6257
    /**
6258
     * Shuffles all the characters in the string.
6259
     *
6260
     * PS: uses random algorithm which is weak for cryptography purposes
6261
     *
6262
     * @param string $str <p>The input string</p>
6263
     *
6264
     * @return string the shuffled string
6265
     */
6266
    public static function str_shuffle(string $str): string
6267
    {
6268 5
        $indexes = \range(0, self::strlen($str) - 1);
6269
        /** @noinspection NonSecureShuffleUsageInspection */
6270 5
        \shuffle($indexes);
6271
6272 5
        $shuffledStr = '';
6273 5
        foreach ($indexes as $i) {
6274 5
            $shuffledStr .= self::substr($str, $i, 1);
6275
        }
6276
6277 5
        return $shuffledStr;
6278
    }
6279
6280
    /**
6281
     * Returns the substring beginning at $start, and up to, but not including
6282
     * the index specified by $end. If $end is omitted, the function extracts
6283
     * the remaining string. If $end is negative, it is computed from the end
6284
     * of the string.
6285
     *
6286
     * @param string $str
6287
     * @param int    $start    <p>Initial index from which to begin extraction.</p>
6288
     * @param int    $end      [optional] <p>Index at which to end extraction. Default: null</p>
6289
     * @param string $encoding [optional] <p>Default: UTF-8</p>
6290
     *
6291
     * @return false|string
6292
     *                      <p>The extracted substring.</p><p>If <i>str</i> is shorter than <i>start</i>
6293
     *                      characters long, <b>FALSE</b> will be returned.
6294
     */
6295
    public static function str_slice(string $str, int $start, int $end = null, string $encoding = 'UTF-8')
6296
    {
6297 18
        if ($end === null) {
6298 6
            $length = self::strlen($str);
6299 12
        } elseif ($end >= 0 && $end <= $start) {
6300 4
            return '';
6301 8
        } elseif ($end < 0) {
6302 2
            $length = self::strlen($str) + $end - $start;
6303
        } else {
6304 6
            $length = $end - $start;
6305
        }
6306
6307 14
        return self::substr($str, $start, $length, $encoding);
0 ignored issues
show
Bug introduced by
It seems like $length can also be of type false; however, parameter $length of voku\helper\UTF8::substr() does only seem to accept integer|null, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

6307
        return self::substr($str, $start, /** @scrutinizer ignore-type */ $length, $encoding);
Loading history...
6308
    }
6309
6310
    /**
6311
     * Convert a string to e.g.: "snake_case"
6312
     *
6313
     * @param string $str
6314
     * @param string $encoding [optional] <p>Default: UTF-8</p>
6315
     *
6316
     * @return string string in snake_case
6317
     */
6318
    public static function str_snakeize(string $str, string $encoding = 'UTF-8'): string
6319
    {
6320 20
        $str = self::normalize_whitespace($str);
6321 20
        $str = \str_replace('-', '_', $str);
6322
6323 20
        $str = (string) \preg_replace_callback(
6324 20
            '/([\d|A-Z])/u',
6325
            static function ($matches) use ($encoding) {
6326 8
                $match = $matches[1];
6327 8
                $matchInt = (int) $match;
6328
6329 8
                if ((string) $matchInt === $match) {
6330 4
                    return '_' . $match . '_';
6331
                }
6332
6333 4
                return '_' . self::strtolower($match, $encoding);
6334 20
            },
6335 20
            $str
6336
        );
6337
6338 20
        $str = (string) \preg_replace(
6339
            [
6340 20
                '/\s+/',        // convert spaces to "_"
6341
                '/^\s+|\s+$/',  // trim leading & trailing spaces
6342
                '/_+/',         // remove double "_"
6343
            ],
6344
            [
6345 20
                '_',
6346
                '',
6347
                '_',
6348
            ],
6349 20
            $str
6350
        );
6351
6352 20
        $str = self::trim($str, '_'); // trim leading & trailing "_"
6353
6354 20
        return self::trim($str); // trim leading & trailing whitespace
6355
    }
6356
6357
    /**
6358
     * Sort all characters according to code points.
6359
     *
6360
     * @param string $str    <p>A UTF-8 string.</p>
6361
     * @param bool   $unique <p>Sort unique. If <strong>true</strong>, repeated characters are ignored.</p>
6362
     * @param bool   $desc   <p>If <strong>true</strong>, will sort characters in reverse code point order.</p>
6363
     *
6364
     * @return string string of sorted characters
6365
     */
6366
    public static function str_sort(string $str, bool $unique = false, bool $desc = false): string
6367
    {
6368 2
        $array = self::codepoints($str);
6369
6370 2
        if ($unique) {
6371 2
            $array = \array_flip(\array_flip($array));
6372
        }
6373
6374 2
        if ($desc) {
6375 2
            \arsort($array);
0 ignored issues
show
Bug introduced by
It seems like $array can also be of type null; however, parameter $array of arsort() does only seem to accept array, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

6375
            \arsort(/** @scrutinizer ignore-type */ $array);
Loading history...
6376
        } else {
6377 2
            \asort($array);
0 ignored issues
show
Bug introduced by
It seems like $array can also be of type null; however, parameter $array of asort() does only seem to accept array, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

6377
            \asort(/** @scrutinizer ignore-type */ $array);
Loading history...
6378
        }
6379
6380 2
        return self::string($array);
6381
    }
6382
6383
    /**
6384
     * alias for "UTF8::split()"
6385
     *
6386
     * @see UTF8::split()
6387
     *
6388
     * @param string|string[] $str
6389
     * @param int             $len
6390
     *
6391
     * @return string[]
6392
     */
6393
    public static function str_split($str, int $len = 1): array
6394
    {
6395 25
        return self::split($str, $len);
6396
    }
6397
6398
    /**
6399
     * Splits the string with the provided regular expression, returning an
6400
     * array of Stringy objects. An optional integer $limit will truncate the
6401
     * results.
6402
     *
6403
     * @param string $str
6404
     * @param string $pattern <p>The regex with which to split the string.</p>
6405
     * @param int    $limit   [optional] <p>Maximum number of results to return. Default: -1 === no limit</p>
6406
     *
6407
     * @return string[] an array of strings
6408
     */
6409
    public static function str_split_pattern(string $str, string $pattern, int $limit = -1): array
6410
    {
6411 16
        if ($limit === 0) {
6412 2
            return [];
6413
        }
6414
6415
        // this->split errors when supplied an empty pattern in < PHP 5.4.13
6416
        // and current versions of HHVM (3.8 and below)
6417 14
        if ($pattern === '') {
6418 1
            return [$str];
6419
        }
6420
6421
        // this->split returns the remaining unsplit string in the last index when
6422
        // supplying a limit
6423 13
        if ($limit > 0) {
6424 8
            ++$limit;
6425
        } else {
6426 5
            $limit = -1;
6427
        }
6428
6429 13
        $array = \preg_split('/' . \preg_quote($pattern, '/') . '/u', $str, $limit);
6430
6431 13
        if ($array === false) {
6432
            return [];
6433
        }
6434
6435 13
        if ($limit > 0 && \count($array) === $limit) {
6436 4
            \array_pop($array);
6437
        }
6438
6439 13
        return $array;
6440
    }
6441
6442
    /**
6443
     * Check if the string starts with the given substring.
6444
     *
6445
     * @param string $haystack <p>The string to search in.</p>
6446
     * @param string $needle   <p>The substring to search for.</p>
6447
     *
6448
     * @return bool
6449
     */
6450
    public static function str_starts_with(string $haystack, string $needle): bool
6451
    {
6452 41
        if ($haystack === '' || $needle === '') {
6453 4
            return false;
6454
        }
6455
6456 39
        return \strpos($haystack, $needle) === 0;
6457
    }
6458
6459
    /**
6460
     * Returns true if the string begins with any of $substrings, false otherwise.
6461
     *
6462
     * - case-sensitive
6463
     *
6464
     * @param string $str        <p>The input string.</p>
6465
     * @param array  $substrings <p>Substrings to look for.</p>
6466
     *
6467
     * @return bool whether or not $str starts with $substring
6468
     */
6469
    public static function str_starts_with_any(string $str, array $substrings): bool
6470
    {
6471 8
        if ($str === '') {
6472
            return false;
6473
        }
6474
6475 8
        if (empty($substrings)) {
6476
            return false;
6477
        }
6478
6479 8
        foreach ($substrings as $substring) {
6480 8
            if (self::str_starts_with($str, $substring)) {
6481 8
                return true;
6482
            }
6483
        }
6484
6485 6
        return false;
6486
    }
6487
6488
    /**
6489
     * Gets the substring after the first occurrence of a separator.
6490
     *
6491
     * @param string $str       <p>The input string.</p>
6492
     * @param string $separator <p>The string separator.</p>
6493
     * @param string $encoding  [optional] <p>Default: UTF-8</p>
6494
     *
6495
     * @return string
6496
     */
6497
    public static function str_substr_after_first_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
6498
    {
6499
        if (
6500 1
            $separator === ''
6501
            ||
6502 1
            $str === ''
6503
        ) {
6504 1
            return '';
6505
        }
6506
6507 1
        $offset = self::str_index_first($str, $separator);
6508 1
        if ($offset === false) {
6509 1
            return '';
6510
        }
6511
6512 1
        return (string) self::substr(
6513 1
            $str,
6514 1
            $offset + self::strlen($separator, $encoding),
6515 1
            null,
6516 1
            $encoding
6517
        );
6518
    }
6519
6520
    /**
6521
     * Gets the substring after the last occurrence of a separator.
6522
     *
6523
     * @param string $str       <p>The input string.</p>
6524
     * @param string $separator <p>The string separator.</p>
6525
     * @param string $encoding  [optional] <p>Default: UTF-8</p>
6526
     *
6527
     * @return string
6528
     */
6529
    public static function str_substr_after_last_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
6530
    {
6531
        if (
6532 1
            $separator === ''
6533
            ||
6534 1
            $str === ''
6535
        ) {
6536 1
            return '';
6537
        }
6538
6539 1
        $offset = self::str_index_last($str, $separator);
6540 1
        if ($offset === false) {
6541 1
            return '';
6542
        }
6543
6544 1
        return (string) self::substr(
6545 1
            $str,
6546 1
            $offset + self::strlen($separator, $encoding),
6547 1
            null,
6548 1
            $encoding
6549
        );
6550
    }
6551
6552
    /**
6553
     * Gets the substring before the first occurrence of a separator.
6554
     *
6555
     * @param string $str       <p>The input string.</p>
6556
     * @param string $separator <p>The string separator.</p>
6557
     * @param string $encoding  [optional] <p>Default: UTF-8</p>
6558
     *
6559
     * @return string
6560
     */
6561
    public static function str_substr_before_first_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
6562
    {
6563
        if (
6564 1
            $separator === ''
6565
            ||
6566 1
            $str === ''
6567
        ) {
6568 1
            return '';
6569
        }
6570
6571 1
        $offset = self::str_index_first($str, $separator);
6572 1
        if ($offset === false) {
6573 1
            return '';
6574
        }
6575
6576 1
        return (string) self::substr(
6577 1
            $str,
6578 1
            0,
6579 1
            $offset,
6580 1
            $encoding
6581
        );
6582
    }
6583
6584
    /**
6585
     * Gets the substring before the last occurrence of a separator.
6586
     *
6587
     * @param string $str       <p>The input string.</p>
6588
     * @param string $separator <p>The string separator.</p>
6589
     * @param string $encoding  [optional] <p>Default: UTF-8</p>
6590
     *
6591
     * @return string
6592
     */
6593
    public static function str_substr_before_last_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
6594
    {
6595
        if (
6596 1
            $separator === ''
6597
            ||
6598 1
            $str === ''
6599
        ) {
6600 1
            return '';
6601
        }
6602
6603 1
        $offset = self::str_index_last($str, $separator);
6604 1
        if ($offset === false) {
6605 1
            return '';
6606
        }
6607
6608 1
        return (string) self::substr(
6609 1
            $str,
6610 1
            0,
6611 1
            $offset,
6612 1
            $encoding
6613
        );
6614
    }
6615
6616
    /**
6617
     * Gets the substring after (or before via "$beforeNeedle") the first occurrence of the "$needle".
6618
     *
6619
     * @param string $str          <p>The input string.</p>
6620
     * @param string $needle       <p>The string to look for.</p>
6621
     * @param bool   $beforeNeedle [optional] <p>Default: false</p>
6622
     * @param string $encoding     [optional] <p>Default: UTF-8</p>
6623
     *
6624
     * @return string
6625
     */
6626
    public static function str_substr_first(string $str, string $needle, bool $beforeNeedle = false, string $encoding = 'UTF-8'): string
6627
    {
6628
        if (
6629 2
            $str === ''
6630
            ||
6631 2
            $needle === ''
6632
        ) {
6633 2
            return '';
6634
        }
6635
6636 2
        $part = self::strstr(
6637 2
            $str,
6638 2
            $needle,
6639 2
            $beforeNeedle,
6640 2
            $encoding
6641
        );
6642 2
        if ($part === false) {
6643 2
            return '';
6644
        }
6645
6646 2
        return $part;
6647
    }
6648
6649
    /**
6650
     * Gets the substring after (or before via "$beforeNeedle") the last occurrence of the "$needle".
6651
     *
6652
     * @param string $str          <p>The input string.</p>
6653
     * @param string $needle       <p>The string to look for.</p>
6654
     * @param bool   $beforeNeedle [optional] <p>Default: false</p>
6655
     * @param string $encoding     [optional] <p>Default: UTF-8</p>
6656
     *
6657
     * @return string
6658
     */
6659
    public static function str_substr_last(string $str, string $needle, bool $beforeNeedle = false, string $encoding = 'UTF-8'): string
6660
    {
6661
        if (
6662 2
            $str === ''
6663
            ||
6664 2
            $needle === ''
6665
        ) {
6666 2
            return '';
6667
        }
6668
6669 2
        $part = self::strrchr($str, $needle, $beforeNeedle, $encoding);
6670 2
        if ($part === false) {
6671 2
            return '';
6672
        }
6673
6674 2
        return $part;
6675
    }
6676
6677
    /**
6678
     * Surrounds $str with the given substring.
6679
     *
6680
     * @param string $str
6681
     * @param string $substring <p>The substring to add to both sides.</P>
6682
     *
6683
     * @return string string with the substring both prepended and appended
6684
     */
6685
    public static function str_surround(string $str, string $substring): string
6686
    {
6687 5
        return \implode('', [$substring, $str, $substring]);
6688
    }
6689
6690
    /**
6691
     * Returns a trimmed string with the first letter of each word capitalized.
6692
     * Also accepts an array, $ignore, allowing you to list words not to be
6693
     * capitalized.
6694
     *
6695
     * @param string              $str
6696
     * @param array|string[]|null $ignore                [optional] <p>An array of words not to capitalize or null.
6697
     *                                                   Default: null</p>
6698
     * @param string              $encoding              [optional] <p>Default: UTF-8</p>
6699
     * @param bool                $cleanUtf8             [optional] <p>Remove non UTF-8 chars from the string.</p>
6700
     * @param string|null         $lang                  [optional] <p>Set the language for special cases: az, el, lt,
6701
     *                                                   tr</p>
6702
     * @param bool                $tryToKeepStringLength [optional] <p>true === try to keep the string length: e.g. ẞ ->
6703
     *                                                   ß</p>
6704
     * @param bool                $useTrimFirst          [optional] <p>true === trim the input string, first</p>
6705
     *
6706
     * @return string the titleized string
6707
     */
6708
    public static function str_titleize(
6709
        string $str,
6710
        array $ignore = null,
6711
        string $encoding = 'UTF-8',
6712
        bool $cleanUtf8 = false,
6713
        string $lang = null,
6714
        bool $tryToKeepStringLength = false,
6715
        bool $useTrimFirst = true
6716
    ): string {
6717 10
        if ($useTrimFirst === true) {
6718 5
            $str = self::trim($str);
6719
        }
6720
6721 10
        $str_array = self::str_to_words($str);
6722
6723 10
        foreach ($str_array as &$str_tmp) {
6724 10
            if ($ignore && \in_array($str_tmp, $ignore, true)) {
6725 2
                continue;
6726
            }
6727
6728 10
            $str_tmp = self::str_upper_first(
6729 10
                self::strtolower(
6730 10
                    $str_tmp,
6731 10
                    $encoding,
6732 10
                    $cleanUtf8,
6733 10
                    $lang,
6734 10
                    $tryToKeepStringLength
6735
                ),
6736 10
                $encoding,
6737 10
                $cleanUtf8,
6738 10
                $lang,
6739 10
                $tryToKeepStringLength
6740
            );
6741
        }
6742
6743 10
        return \implode('', $str_array);
6744
    }
6745
6746
    /**
6747
     * Returns a trimmed string in proper title case.
6748
     *
6749
     * Also accepts an array, $ignore, allowing you to list words not to be
6750
     * capitalized.
6751
     *
6752
     * Adapted from John Gruber's script.
6753
     *
6754
     * @see https://gist.github.com/gruber/9f9e8650d68b13ce4d78
6755
     *
6756
     * @param string $str
6757
     * @param array  $ignore   <p>An array of words not to capitalize.</p>
6758
     * @param string $encoding [optional] <p>Default: UTF-8</p>
6759
     *
6760
     * @return string the titleized string
6761
     */
6762
    public static function str_titleize_for_humans(string $str, array $ignore = [], string $encoding = 'UTF-8'): string
6763
    {
6764 35
        $smallWords = \array_merge(
6765
            [
6766 35
                '(?<!q&)a',
6767
                'an',
6768
                'and',
6769
                'as',
6770
                'at(?!&t)',
6771
                'but',
6772
                'by',
6773
                'en',
6774
                'for',
6775
                'if',
6776
                'in',
6777
                'of',
6778
                'on',
6779
                'or',
6780
                'the',
6781
                'to',
6782
                'v[.]?',
6783
                'via',
6784
                'vs[.]?',
6785
            ],
6786 35
            $ignore
6787
        );
6788
6789 35
        $smallWordsRx = \implode('|', $smallWords);
6790 35
        $apostropheRx = '(?x: [\'’] [[:lower:]]* )?';
6791
6792 35
        $str = self::trim($str);
6793
6794 35
        if (self::has_lowercase($str) === false) {
6795 2
            $str = self::strtolower($str);
6796
        }
6797
6798
        // The main substitutions
6799 35
        $str = (string) \preg_replace_callback(
6800
            '~\b (_*) (?:                                                              # 1. Leading underscore and
6801
                        ( (?<=[ ][/\\\\]) [[:alpha:]]+ [-_[:alpha:]/\\\\]+ |              # 2. file path or 
6802 35
                          [-_[:alpha:]]+ [@.:] [-_[:alpha:]@.:/]+ ' . $apostropheRx . ' ) #    URL, domain, or email
6803
                        |
6804 35
                        ( (?i: ' . $smallWordsRx . ' ) ' . $apostropheRx . ' )            # 3. or small word (case-insensitive)
6805
                        |
6806 35
                        ( [[:alpha:]] [[:lower:]\'’()\[\]{}]* ' . $apostropheRx . ' )     # 4. or word w/o internal caps
6807
                        |
6808 35
                        ( [[:alpha:]] [[:alpha:]\'’()\[\]{}]* ' . $apostropheRx . ' )     # 5. or some other word
6809
                      ) (_*) \b                                                           # 6. With trailing underscore
6810
                    ~ux',
6811
            static function ($matches) use ($encoding) {
6812
                // Preserve leading underscore
6813 35
                $str = $matches[1];
6814 35
                if ($matches[2]) {
6815
                    // Preserve URLs, domains, emails and file paths
6816 5
                    $str .= $matches[2];
6817 35
                } elseif ($matches[3]) {
6818
                    // Lower-case small words
6819 25
                    $str .= self::strtolower($matches[3], $encoding);
6820 35
                } elseif ($matches[4]) {
6821
                    // Capitalize word w/o internal caps
6822 34
                    $str .= static::str_upper_first($matches[4], $encoding);
6823
                } else {
6824
                    // Preserve other kinds of word (iPhone)
6825 7
                    $str .= $matches[5];
6826
                }
6827
                // Preserve trailing underscore
6828 35
                $str .= $matches[6];
6829
6830 35
                return $str;
6831 35
            },
6832 35
            $str
6833
        );
6834
6835
        // Exceptions for small words: capitalize at start of title...
6836 35
        $str = (string) \preg_replace_callback(
6837
            '~(  \A [[:punct:]]*                # start of title...
6838
                      |  [:.;?!][ ]+               # or of subsentence...
6839
                      |  [ ][\'"“‘(\[][ ]* )       # or of inserted subphrase...
6840 35
                      ( ' . $smallWordsRx . ' ) \b # ...followed by small word
6841
                     ~uxi',
6842
            static function ($matches) use ($encoding) {
6843 11
                return $matches[1] . static::str_upper_first($matches[2], $encoding);
6844 35
            },
6845 35
            $str
6846
        );
6847
6848
        // ...and end of title
6849 35
        $str = (string) \preg_replace_callback(
6850 35
            '~\b ( ' . $smallWordsRx . ' ) # small word...
6851
                      (?= [[:punct:]]* \Z     # ...at the end of the title...
6852
                      |   [\'"’”)\]] [ ] )    # ...or of an inserted subphrase?
6853
                     ~uxi',
6854
            static function ($matches) use ($encoding) {
6855 3
                return static::str_upper_first($matches[1], $encoding);
6856 35
            },
6857 35
            $str
6858
        );
6859
6860
        // Exceptions for small words in hyphenated compound words
6861
        // e.g. "in-flight" -> In-Flight
6862 35
        $str = (string) \preg_replace_callback(
6863
            '~\b
6864
                        (?<! -)                   # Negative lookbehind for a hyphen; we do not want to match man-in-the-middle but do want (in-flight)
6865 35
                        ( ' . $smallWordsRx . ' )
6866
                        (?= -[[:alpha:]]+)        # lookahead for "-someword"
6867
                       ~uxi',
6868
            static function ($matches) use ($encoding) {
6869
                return static::str_upper_first($matches[1], $encoding);
6870 35
            },
6871 35
            $str
6872
        );
6873
6874
        // e.g. "Stand-in" -> "Stand-In" (Stand is already capped at this point)
6875 35
        $str = (string) \preg_replace_callback(
6876
            '~\b
6877
                      (?<!…)                    # Negative lookbehind for a hyphen; we do not want to match man-in-the-middle but do want (stand-in)
6878
                      ( [[:alpha:]]+- )         # $1 = first word and hyphen, should already be properly capped
6879 35
                      ( ' . $smallWordsRx . ' ) # ...followed by small word
6880
                      (?!	- )                   # Negative lookahead for another -
6881
                     ~uxi',
6882
            static function ($matches) use ($encoding) {
6883
                return $matches[1] . static::str_upper_first($matches[2], $encoding);
6884 35
            },
6885 35
            $str
6886
        );
6887
6888 35
        return $str;
6889
    }
6890
6891
    /**
6892
     * Get a binary representation of a specific string.
6893
     *
6894
     * @param string $str <p>The input string.</p>
6895
     *
6896
     * @return string
6897
     */
6898
    public static function str_to_binary(string $str): string
6899
    {
6900 2
        $value = \unpack('H*', $str);
6901
6902 2
        return \base_convert($value[1], 16, 2);
6903
    }
6904
6905
    /**
6906
     * @param string   $str
6907
     * @param bool     $removeEmptyValues <p>Remove empty values.</p>
6908
     * @param int|null $removeShortValues <p>The min. string length or null to disable</p>
6909
     *
6910
     * @return string[]
6911
     */
6912
    public static function str_to_lines(string $str, bool $removeEmptyValues = false, int $removeShortValues = null): array
6913
    {
6914 17
        if ($str === '') {
6915 1
            return $removeEmptyValues === true ? [] : [''];
6916
        }
6917
6918 16
        $return = \preg_split("/[\r\n]{1,2}/u", $str);
6919
6920 16
        if ($return === false) {
6921
            return $removeEmptyValues === true ? [] : [''];
6922
        }
6923
6924
        if (
6925 16
            $removeShortValues === null
6926
            &&
6927 16
            $removeEmptyValues === false
6928
        ) {
6929 16
            return $return;
0 ignored issues
show
Bug Best Practice introduced by
The expression return $return returns an array which contains values of type array which are incompatible with the documented value type string.
Loading history...
6930
        }
6931
6932
        return self::reduce_string_array(
6933
            $return,
6934
            $removeEmptyValues,
6935
            $removeShortValues
6936
        );
6937
    }
6938
6939
    /**
6940
     * Convert a string into an array of words.
6941
     *
6942
     * @param string   $str
6943
     * @param string   $charList          <p>Additional chars for the definition of "words".</p>
6944
     * @param bool     $removeEmptyValues <p>Remove empty values.</p>
6945
     * @param int|null $removeShortValues <p>The min. string length or null to disable</p>
6946
     *
6947
     * @return string[]
6948
     */
6949
    public static function str_to_words(string $str, string $charList = '', bool $removeEmptyValues = false, int $removeShortValues = null): array
6950
    {
6951 23
        if ($str === '') {
6952 4
            return $removeEmptyValues === true ? [] : [''];
6953
        }
6954
6955 23
        $charList = self::rxClass($charList, '\pL');
6956
6957 23
        $return = \preg_split("/({$charList}+(?:[\p{Pd}’']{$charList}+)*)/u", $str, -1, \PREG_SPLIT_DELIM_CAPTURE);
6958
6959 23
        if ($return === false) {
6960
            return $removeEmptyValues === true ? [] : [''];
6961
        }
6962
6963
        if (
6964 23
            $removeShortValues === null
6965
            &&
6966 23
            $removeEmptyValues === false
6967
        ) {
6968 23
            return $return;
0 ignored issues
show
Bug Best Practice introduced by
The expression return $return returns an array which contains values of type array which are incompatible with the documented value type string.
Loading history...
6969
        }
6970
6971 2
        $tmpReturn = self::reduce_string_array(
6972 2
            $return,
6973 2
            $removeEmptyValues,
6974 2
            $removeShortValues
6975
        );
6976
6977 2
        foreach ($tmpReturn as &$item) {
6978 2
            $item = (string) $item;
6979
        }
6980
6981 2
        return $tmpReturn;
6982
    }
6983
6984
    /**
6985
     * alias for "UTF8::to_ascii()"
6986
     *
6987
     * @see UTF8::to_ascii()
6988
     *
6989
     * @param string $str
6990
     * @param string $unknown
6991
     * @param bool   $strict
6992
     *
6993
     * @return string
6994
     */
6995
    public static function str_transliterate(string $str, string $unknown = '?', bool $strict = false): string
6996
    {
6997 8
        return self::to_ascii($str, $unknown, $strict);
6998
    }
6999
7000
    /**
7001
     * Truncates the string to a given length. If $substring is provided, and
7002
     * truncating occurs, the string is further truncated so that the substring
7003
     * may be appended without exceeding the desired length.
7004
     *
7005
     * @param string $str
7006
     * @param int    $length    <p>Desired length of the truncated string.</p>
7007
     * @param string $substring [optional] <p>The substring to append if it can fit. Default: ''</p>
7008
     * @param string $encoding  [optional] <p>Default: UTF-8</p>
7009
     *
7010
     * @return string string after truncating
7011
     */
7012
    public static function str_truncate($str, int $length, string $substring = '', string $encoding = 'UTF-8'): string
7013
    {
7014
        // init
7015 22
        $str = (string) $str;
7016
7017 22
        if ($str === '') {
7018
            return '';
7019
        }
7020
7021 22
        if ($length >= self::strlen($str, $encoding)) {
7022 4
            return $str;
7023
        }
7024
7025
        // Need to further trim the string so we can append the substring
7026 18
        $substringLength = self::strlen($substring, $encoding);
7027 18
        $length -= $substringLength;
7028
7029 18
        $truncated = self::substr($str, 0, $length, $encoding);
7030
7031 18
        return $truncated . $substring;
0 ignored issues
show
Bug introduced by
Are you sure $truncated of type false|string can be used in concatenation? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

7031
        return /** @scrutinizer ignore-type */ $truncated . $substring;
Loading history...
7032
    }
7033
7034
    /**
7035
     * Truncates the string to a given length, while ensuring that it does not
7036
     * split words. If $substring is provided, and truncating occurs, the
7037
     * string is further truncated so that the substring may be appended without
7038
     * exceeding the desired length.
7039
     *
7040
     * @param string $str
7041
     * @param int    $length    <p>Desired length of the truncated string.</p>
7042
     * @param string $substring [optional] <p>The substring to append if it can fit. Default: ''</p>
7043
     * @param string $encoding  [optional] <p>Default: UTF-8</p>
7044
     *
7045
     * @return string string after truncating
7046
     */
7047
    public static function str_truncate_safe(string $str, int $length, string $substring = '', string $encoding = 'UTF-8'): string
7048
    {
7049 23
        if ($length >= self::strlen($str, $encoding)) {
7050 4
            return $str;
7051
        }
7052
7053
        // need to further trim the string so we can append the substring
7054 19
        $substringLength = self::strlen($substring, $encoding);
7055 19
        $length -= $substringLength;
7056
7057 19
        $truncated = self::substr($str, 0, $length, $encoding);
7058 19
        if ($truncated === false) {
7059
            return '';
7060
        }
7061
7062
        // if the last word was truncated
7063 19
        $strPosSpace = self::strpos($str, ' ', $length - 1, $encoding);
7064 19
        if ($strPosSpace !== $length) {
7065
            // find pos of the last occurrence of a space, get up to that
7066 12
            $lastPos = self::strrpos($truncated, ' ', 0, $encoding);
7067
7068 12
            if ($lastPos !== false || $strPosSpace !== false) {
7069 11
                $truncated = self::substr($truncated, 0, (int) $lastPos, $encoding);
7070
            }
7071
        }
7072
7073 19
        return $truncated . $substring;
0 ignored issues
show
Bug introduced by
Are you sure $truncated of type false|string can be used in concatenation? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

7073
        return /** @scrutinizer ignore-type */ $truncated . $substring;
Loading history...
7074
    }
7075
7076
    /**
7077
     * Returns a lowercase and trimmed string separated by underscores.
7078
     * Underscores are inserted before uppercase characters (with the exception
7079
     * of the first character of the string), and in place of spaces as well as
7080
     * dashes.
7081
     *
7082
     * @param string $str
7083
     *
7084
     * @return string the underscored string
7085
     */
7086
    public static function str_underscored(string $str): string
7087
    {
7088 16
        return self::str_delimit($str, '_');
7089
    }
7090
7091
    /**
7092
     * Returns an UpperCamelCase version of the supplied string. It trims
7093
     * surrounding spaces, capitalizes letters following digits, spaces, dashes
7094
     * and underscores, and removes spaces, dashes, underscores.
7095
     *
7096
     * @param string      $str                   <p>The input string.</p>
7097
     * @param string      $encoding              [optional] <p>Default: UTF-8</p>
7098
     * @param bool        $cleanUtf8             [optional] <p>Remove non UTF-8 chars from the string.</p>
7099
     * @param string|null $lang                  [optional] <p>Set the language for special cases: az, el, lt, tr</p>
7100
     * @param bool        $tryToKeepStringLength [optional] <p>true === try to keep the string length: e.g. ẞ -> ß</p>
7101
     *
7102
     * @return string string in UpperCamelCase
7103
     */
7104
    public static function str_upper_camelize(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false, string $lang = null, bool $tryToKeepStringLength = false): string
7105
    {
7106 13
        return self::str_upper_first(self::str_camelize($str, $encoding), $encoding, $cleanUtf8, $lang, $tryToKeepStringLength);
7107
    }
7108
7109
    /**
7110
     * alias for "UTF8::ucfirst()"
7111
     *
7112
     * @see UTF8::ucfirst()
7113
     *
7114
     * @param string      $str
7115
     * @param string      $encoding
7116
     * @param bool        $cleanUtf8
7117
     * @param string|null $lang
7118
     * @param bool        $tryToKeepStringLength
7119
     *
7120
     * @return string
7121
     */
7122
    public static function str_upper_first(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false, string $lang = null, bool $tryToKeepStringLength = false): string
7123
    {
7124 63
        return self::ucfirst($str, $encoding, $cleanUtf8, $lang, $tryToKeepStringLength);
7125
    }
7126
7127
    /**
7128
     * Counts number of words in the UTF-8 string.
7129
     *
7130
     * @param string $str      <p>The input string.</p>
7131
     * @param int    $format   [optional] <p>
7132
     *                         <strong>0</strong> => return a number of words (default)<br>
7133
     *                         <strong>1</strong> => return an array of words<br>
7134
     *                         <strong>2</strong> => return an array of words with word-offset as key
7135
     *                         </p>
7136
     * @param string $charlist [optional] <p>Additional chars that contains to words and do not start a new word.</p>
7137
     *
7138
     * @return int|string[] The number of words in the string
7139
     */
7140
    public static function str_word_count(string $str, int $format = 0, string $charlist = '')
7141
    {
7142 2
        $strParts = self::str_to_words($str, $charlist);
7143
7144 2
        $len = \count($strParts);
7145
7146 2
        if ($format === 1) {
7147 2
            $numberOfWords = [];
7148 2
            for ($i = 1; $i < $len; $i += 2) {
7149 2
                $numberOfWords[] = $strParts[$i];
7150
            }
7151 2
        } elseif ($format === 2) {
7152 2
            $numberOfWords = [];
7153 2
            $offset = self::strlen($strParts[0]);
7154 2
            for ($i = 1; $i < $len; $i += 2) {
7155 2
                $numberOfWords[$offset] = $strParts[$i];
7156 2
                $offset += self::strlen($strParts[$i]) + self::strlen($strParts[$i + 1]);
7157
            }
7158
        } else {
7159 2
            $numberOfWords = (int) (($len - 1) / 2);
7160
        }
7161
7162 2
        return $numberOfWords;
7163
    }
7164
7165
    /**
7166
     * Case-insensitive string comparison.
7167
     *
7168
     * INFO: Case-insensitive version of UTF8::strcmp()
7169
     *
7170
     * @param string $str1     <p>The first string.</p>
7171
     * @param string $str2     <p>The second string.</p>
7172
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
7173
     *
7174
     * @return int
7175
     *             <strong>&lt; 0</strong> if str1 is less than str2;<br>
7176
     *             <strong>&gt; 0</strong> if str1 is greater than str2,<br>
7177
     *             <strong>0</strong> if they are equal
7178
     */
7179
    public static function strcasecmp(string $str1, string $str2, string $encoding = 'UTF-8'): int
7180
    {
7181 23
        return self::strcmp(
7182 23
            self::strtocasefold($str1, true, false, $encoding, null, false),
7183 23
            self::strtocasefold($str2, true, false, $encoding, null, false)
7184
        );
7185
    }
7186
7187
    /**
7188
     * alias for "UTF8::strstr()"
7189
     *
7190
     * @see UTF8::strstr()
7191
     *
7192
     * @param string $haystack
7193
     * @param string $needle
7194
     * @param bool   $before_needle
7195
     * @param string $encoding
7196
     * @param bool   $cleanUtf8
7197
     *
7198
     * @return false|string
7199
     */
7200
    public static function strchr(string $haystack, string $needle, bool $before_needle = false, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
7201
    {
7202 2
        return self::strstr($haystack, $needle, $before_needle, $encoding, $cleanUtf8);
7203
    }
7204
7205
    /**
7206
     * Case-sensitive string comparison.
7207
     *
7208
     * @param string $str1 <p>The first string.</p>
7209
     * @param string $str2 <p>The second string.</p>
7210
     *
7211
     * @return int
7212
     *             <strong>&lt; 0</strong> if str1 is less than str2<br>
7213
     *             <strong>&gt; 0</strong> if str1 is greater than str2<br>
7214
     *             <strong>0</strong> if they are equal
7215
     */
7216
    public static function strcmp(string $str1, string $str2): int
7217
    {
7218
        /** @noinspection PhpUndefinedClassInspection */
7219 29
        return $str1 . '' === $str2 . '' ? 0 : \strcmp(
7220 24
            \Normalizer::normalize($str1, \Normalizer::NFD),
7221 29
            \Normalizer::normalize($str2, \Normalizer::NFD)
7222
        );
7223
    }
7224
7225
    /**
7226
     * Find length of initial segment not matching mask.
7227
     *
7228
     * @param string $str
7229
     * @param string $charList
7230
     * @param int    $offset
7231
     * @param int    $length
7232
     *
7233
     * @return int|null
7234
     */
7235
    public static function strcspn(string $str, string $charList, int $offset = 0, int $length = null)
7236
    {
7237 11
        if ($charList === '') {
7238 1
            return null;
7239
        }
7240
7241 10
        if ($offset || $length !== null) {
7242 2
            $strTmp = self::substr($str, $offset, $length);
7243 2
            if ($strTmp === false) {
7244
                return null;
7245
            }
7246 2
            $str = $strTmp;
7247
        }
7248
7249 10
        if ($str === '') {
7250 1
            return null;
7251
        }
7252
7253 9
        if (\preg_match('/^(.*?)' . self::rxClass($charList) . '/us', $str, $length)) {
0 ignored issues
show
Bug introduced by
It seems like $length can also be of type integer; however, parameter $matches of preg_match() does only seem to accept array|null, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

7253
        if (\preg_match('/^(.*?)' . self::rxClass($charList) . '/us', $str, /** @scrutinizer ignore-type */ $length)) {
Loading history...
7254 9
            return self::strlen($length[1]);
0 ignored issues
show
Bug Best Practice introduced by
The expression return self::strlen($length[1]) could also return false which is incompatible with the documented return type integer|null. Did you maybe forget to handle an error condition?

If the returned type also contains false, it is an indicator that maybe an error condition leading to the specific return statement remains unhandled.

Loading history...
7255
        }
7256
7257 1
        return self::strlen($str);
0 ignored issues
show
Bug Best Practice introduced by
The expression return self::strlen($str) could also return false which is incompatible with the documented return type integer|null. Did you maybe forget to handle an error condition?

If the returned type also contains false, it is an indicator that maybe an error condition leading to the specific return statement remains unhandled.

Loading history...
7258
    }
7259
7260
    /**
7261
     * alias for "UTF8::stristr()"
7262
     *
7263
     * @see UTF8::stristr()
7264
     *
7265
     * @param string $haystack
7266
     * @param string $needle
7267
     * @param bool   $before_needle
7268
     * @param string $encoding
7269
     * @param bool   $cleanUtf8
7270
     *
7271
     * @return false|string
7272
     */
7273
    public static function strichr(string $haystack, string $needle, bool $before_needle = false, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
7274
    {
7275 1
        return self::stristr($haystack, $needle, $before_needle, $encoding, $cleanUtf8);
7276
    }
7277
7278
    /**
7279
     * Create a UTF-8 string from code points.
7280
     *
7281
     * INFO: opposite to UTF8::codepoints()
7282
     *
7283
     * @param array $array <p>Integer or Hexadecimal codepoints.</p>
7284
     *
7285
     * @return string UTF-8 encoded string
7286
     */
7287
    public static function string(array $array): string
7288
    {
7289 4
        return \implode(
7290 4
            '',
7291 4
            \array_map(
7292
                [
7293 4
                    self::class,
7294
                    'chr',
7295
                ],
7296 4
                $array
7297
            )
7298
        );
7299
    }
7300
7301
    /**
7302
     * Checks if string starts with "BOM" (Byte Order Mark Character) character.
7303
     *
7304
     * @param string $str <p>The input string.</p>
7305
     *
7306
     * @return bool
7307
     *              <strong>true</strong> if the string has BOM at the start,<br>
7308
     *              <strong>false</strong> otherwise
7309
     */
7310
    public static function string_has_bom(string $str): bool
7311
    {
7312 6
        foreach (self::$BOM as $bomString => $bomByteLength) {
7313 6
            if (\strpos($str, $bomString) === 0) {
7314 6
                return true;
7315
            }
7316
        }
7317
7318 6
        return false;
7319
    }
7320
7321
    /**
7322
     * Strip HTML and PHP tags from a string + clean invalid UTF-8.
7323
     *
7324
     * @see http://php.net/manual/en/function.strip-tags.php
7325
     *
7326
     * @param string $str            <p>
7327
     *                               The input string.
7328
     *                               </p>
7329
     * @param string $allowable_tags [optional] <p>
7330
     *                               You can use the optional second parameter to specify tags which should
7331
     *                               not be stripped.
7332
     *                               </p>
7333
     *                               <p>
7334
     *                               HTML comments and PHP tags are also stripped. This is hardcoded and
7335
     *                               can not be changed with allowable_tags.
7336
     *                               </p>
7337
     * @param bool   $cleanUtf8      [optional] <p>Remove non UTF-8 chars from the string.</p>
7338
     *
7339
     * @return string the stripped string
7340
     */
7341
    public static function strip_tags(string $str, string $allowable_tags = null, bool $cleanUtf8 = false): string
7342
    {
7343 4
        if ($str === '') {
7344 1
            return '';
7345
        }
7346
7347 4
        if ($cleanUtf8 === true) {
7348 2
            $str = self::clean($str);
7349
        }
7350
7351 4
        return \strip_tags($str, $allowable_tags);
7352
    }
7353
7354
    /**
7355
     * Strip all whitespace characters. This includes tabs and newline
7356
     * characters, as well as multibyte whitespace such as the thin space
7357
     * and ideographic space.
7358
     *
7359
     * @param string $str
7360
     *
7361
     * @return string
7362
     */
7363
    public static function strip_whitespace(string $str): string
7364
    {
7365 36
        if ($str === '') {
7366 3
            return '';
7367
        }
7368
7369 33
        return (string) \preg_replace('/[[:space:]]+/u', '', $str);
7370
    }
7371
7372
    /**
7373
     * Finds position of first occurrence of a string within another, case insensitive.
7374
     *
7375
     * @see http://php.net/manual/en/function.mb-stripos.php
7376
     *
7377
     * @param string $haystack  <p>The string from which to get the position of the first occurrence of needle.</p>
7378
     * @param string $needle    <p>The string to find in haystack.</p>
7379
     * @param int    $offset    [optional] <p>The position in haystack to start searching.</p>
7380
     * @param string $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
7381
     * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
7382
     *
7383
     * @return false|int
7384
     *                   Return the <strong>(int)</strong> numeric position of the first occurrence of needle in the
7385
     *                   haystack string,<br> or <strong>false</strong> if needle is not found
7386
     */
7387
    public static function stripos(string $haystack, string $needle, int $offset = 0, $encoding = 'UTF-8', bool $cleanUtf8 = false)
7388
    {
7389 75
        if ($haystack === '' || $needle === '') {
7390 5
            return false;
7391
        }
7392
7393 74
        if ($cleanUtf8 === true) {
7394
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
7395
            // if invalid characters are found in $haystack before $needle
7396 1
            $haystack = self::clean($haystack);
7397 1
            $needle = self::clean($needle);
7398
        }
7399
7400 74
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
7401 23
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
7402
        }
7403
7404 74
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
7405
            self::checkForSupport();
7406
        }
7407
7408 74
        if (self::$SUPPORT['mbstring'] === true) {
7409 74
            $returnTmp = \mb_stripos($haystack, $needle, $offset, $encoding);
7410 74
            if ($returnTmp !== false) {
7411 54
                return $returnTmp;
7412
            }
7413
        }
7414
7415
        if (
7416 31
            $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
7417
            &&
7418 31
            $offset >= 0 // grapheme_stripos() can't handle negative offset
7419
            &&
7420 31
            self::$SUPPORT['intl'] === true
7421
        ) {
7422 31
            $returnTmp = \grapheme_stripos($haystack, $needle, $offset);
7423 31
            if ($returnTmp !== false) {
7424
                return $returnTmp;
7425
            }
7426
        }
7427
7428
        //
7429
        // fallback for ascii only
7430
        //
7431
7432 31
        if (self::is_ascii($haystack) && self::is_ascii($needle)) {
7433 15
            return \stripos($haystack, $needle, $offset);
7434
        }
7435
7436
        //
7437
        // fallback via vanilla php
7438
        //
7439
7440 20
        $haystack = self::strtocasefold($haystack, true, false, $encoding, null, false);
7441 20
        $needle = self::strtocasefold($needle, true, false, $encoding, null, false);
7442
7443 20
        return self::strpos($haystack, $needle, $offset, $encoding);
7444
    }
7445
7446
    /**
7447
     * Returns all of haystack starting from and including the first occurrence of needle to the end.
7448
     *
7449
     * @param string $haystack      <p>The input string. Must be valid UTF-8.</p>
7450
     * @param string $needle        <p>The string to look for. Must be valid UTF-8.</p>
7451
     * @param bool   $before_needle [optional] <p>
7452
     *                              If <b>TRUE</b>, it returns the part of the
7453
     *                              haystack before the first occurrence of the needle (excluding the needle).
7454
     *                              </p>
7455
     * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
7456
     * @param bool   $cleanUtf8     [optional] <p>Remove non UTF-8 chars from the string.</p>
7457
     *
7458
     * @return false|string a sub-string,<br>or <strong>false</strong> if needle is not found
7459
     */
7460
    public static function stristr(string $haystack, string $needle, bool $before_needle = false, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
7461
    {
7462 12
        if ($haystack === '' || $needle === '') {
7463 3
            return false;
7464
        }
7465
7466 9
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
7467 1
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
7468
        }
7469
7470 9
        if ($cleanUtf8 === true) {
7471
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
7472
            // if invalid characters are found in $haystack before $needle
7473 1
            $needle = self::clean($needle);
7474 1
            $haystack = self::clean($haystack);
7475
        }
7476
7477 9
        if (!$needle) {
7478
            return $haystack;
7479
        }
7480
7481 9
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
7482
            self::checkForSupport();
7483
        }
7484
7485
        if (
7486 9
            $encoding !== 'UTF-8'
7487
            &&
7488 9
            self::$SUPPORT['mbstring'] === false
7489
        ) {
7490
            \trigger_error('UTF8::stristr() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
7491
        }
7492
7493 9
        if (self::$SUPPORT['mbstring'] === true) {
7494 9
            return \mb_stristr($haystack, $needle, $before_needle, $encoding);
7495
        }
7496
7497
        if (
7498
            $encoding === 'UTF-8' // INFO: "grapheme_stristr()" can't handle other encodings
7499
            &&
7500
            self::$SUPPORT['intl'] === true
7501
        ) {
7502
            $returnTmp = \grapheme_stristr($haystack, $needle, $before_needle);
7503
            if ($returnTmp !== false) {
7504
                return $returnTmp;
7505
            }
7506
        }
7507
7508
        if (self::is_ascii($needle) && self::is_ascii($haystack)) {
7509
            return \stristr($haystack, $needle, $before_needle);
7510
        }
7511
7512
        \preg_match('/^(.*?)' . \preg_quote($needle, '/') . '/usi', $haystack, $match);
7513
7514
        if (!isset($match[1])) {
7515
            return false;
7516
        }
7517
7518
        if ($before_needle) {
7519
            return $match[1];
7520
        }
7521
7522
        return self::substr($haystack, self::strlen($match[1]));
0 ignored issues
show
Bug introduced by
It seems like self::strlen($match[1]) can also be of type false; however, parameter $offset of voku\helper\UTF8::substr() does only seem to accept integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

7522
        return self::substr($haystack, /** @scrutinizer ignore-type */ self::strlen($match[1]));
Loading history...
7523
    }
7524
7525
    /**
7526
     * Get the string length, not the byte-length!
7527
     *
7528
     * @see     http://php.net/manual/en/function.mb-strlen.php
7529
     *
7530
     * @param string $str       <p>The string being checked for length.</p>
7531
     * @param string $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
7532
     * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
7533
     *
7534
     * @return false|int
7535
     *                   The number <strong>(int)</strong> of characters in the string $str having character encoding
7536
     *                   $encoding.
7537
     *                   (One multi-byte character counted as +1).
7538
     *                   <br>
7539
     *                   Can return <strong>false</strong>, if e.g. mbstring is not installed and we process invalid
7540
     *                   chars.
7541
     */
7542
    public static function strlen(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
7543
    {
7544 259
        if ($str === '') {
7545 37
            return 0;
7546
        }
7547
7548 257
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
7549 83
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
7550
        }
7551
7552
        //
7553
        // fallback for binary || ascii only
7554
        //
7555
7556
        if (
7557 257
            $encoding === 'CP850'
7558
            ||
7559 257
            $encoding === 'ASCII'
7560
        ) {
7561 2
            return self::strlen_in_byte($str);
7562
        }
7563
7564 257
        if ($cleanUtf8 === true) {
7565
            // "mb_strlen" and "\iconv_strlen" returns wrong length,
7566
            // if invalid characters are found in $str
7567 4
            $str = self::clean($str);
7568
        }
7569
7570 257
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
7571
            self::checkForSupport();
7572
        }
7573
7574
        if (
7575 257
            $encoding !== 'UTF-8'
7576
            &&
7577 257
            self::$SUPPORT['mbstring'] === false
7578
            &&
7579 257
            self::$SUPPORT['iconv'] === false
7580
        ) {
7581 2
            \trigger_error('UTF8::strlen() without mbstring / iconv cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
7582
        }
7583
7584
        //
7585
        // fallback via mbstring
7586
        //
7587
7588 257
        if (self::$SUPPORT['mbstring'] === true) {
7589 253
            $returnTmp = \mb_strlen($str, $encoding);
7590 253
            if ($returnTmp !== false) {
7591 253
                return $returnTmp;
7592
            }
7593
        }
7594
7595
        //
7596
        // fallback via iconv
7597
        //
7598
7599 8
        if (self::$SUPPORT['iconv'] === true) {
7600
            $returnTmp = \iconv_strlen($str, $encoding);
7601
            if ($returnTmp !== false) {
7602
                return $returnTmp;
7603
            }
7604
        }
7605
7606
        //
7607
        // fallback via intl
7608
        //
7609
7610
        if (
7611 8
            $encoding === 'UTF-8' // INFO: "grapheme_strlen()" can't handle other encodings
7612
            &&
7613 8
            self::$SUPPORT['intl'] === true
7614
        ) {
7615
            $returnTmp = \grapheme_strlen($str);
7616
            if ($returnTmp !== null) {
7617
                return $returnTmp;
7618
            }
7619
        }
7620
7621
        //
7622
        // fallback for ascii only
7623
        //
7624
7625 8
        if (self::is_ascii($str)) {
7626 4
            return \strlen($str);
7627
        }
7628
7629
        //
7630
        // fallback via vanilla php
7631
        //
7632
7633 8
        \preg_match_all('/./us', $str, $parts);
7634
7635 8
        $returnTmp = \count($parts[0]);
7636 8
        if ($returnTmp === 0 && isset($str[0])) {
7637
            return false;
7638
        }
7639
7640 8
        return $returnTmp;
7641
    }
7642
7643
    /**
7644
     * Get string length in byte.
7645
     *
7646
     * @param string $str
7647
     *
7648
     * @return int
7649
     */
7650
    public static function strlen_in_byte(string $str): int
7651
    {
7652 192
        if ($str === '') {
7653
            return 0;
7654
        }
7655
7656 192
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
7657
            self::checkForSupport();
7658
        }
7659
7660 192
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
7661
            // "mb_" is available if overload is used, so use it ...
7662
            return \mb_strlen($str, 'CP850'); // 8-BIT
7663
        }
7664
7665 192
        return \strlen($str);
7666
    }
7667
7668
    /**
7669
     * Case insensitive string comparisons using a "natural order" algorithm.
7670
     *
7671
     * INFO: natural order version of UTF8::strcasecmp()
7672
     *
7673
     * @param string $str1     <p>The first string.</p>
7674
     * @param string $str2     <p>The second string.</p>
7675
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
7676
     *
7677
     * @return int
7678
     *             <strong>&lt; 0</strong> if str1 is less than str2<br>
7679
     *             <strong>&gt; 0</strong> if str1 is greater than str2<br>
7680
     *             <strong>0</strong> if they are equal
7681
     */
7682
    public static function strnatcasecmp(string $str1, string $str2, string $encoding = 'UTF-8'): int
7683
    {
7684 2
        return self::strnatcmp(
7685 2
            self::strtocasefold($str1, true, false, $encoding, null, false),
7686 2
            self::strtocasefold($str2, true, false, $encoding, null, false)
7687
        );
7688
    }
7689
7690
    /**
7691
     * String comparisons using a "natural order" algorithm
7692
     *
7693
     * INFO: natural order version of UTF8::strcmp()
7694
     *
7695
     * @see  http://php.net/manual/en/function.strnatcmp.php
7696
     *
7697
     * @param string $str1 <p>The first string.</p>
7698
     * @param string $str2 <p>The second string.</p>
7699
     *
7700
     * @return int
7701
     *             <strong>&lt; 0</strong> if str1 is less than str2;<br>
7702
     *             <strong>&gt; 0</strong> if str1 is greater than str2;<br>
7703
     *             <strong>0</strong> if they are equal
7704
     */
7705
    public static function strnatcmp(string $str1, string $str2): int
7706
    {
7707 4
        return $str1 . '' === $str2 . '' ? 0 : \strnatcmp(self::strtonatfold($str1), self::strtonatfold($str2));
7708
    }
7709
7710
    /**
7711
     * Case-insensitive string comparison of the first n characters.
7712
     *
7713
     * @see  http://php.net/manual/en/function.strncasecmp.php
7714
     *
7715
     * @param string $str1     <p>The first string.</p>
7716
     * @param string $str2     <p>The second string.</p>
7717
     * @param int    $len      <p>The length of strings to be used in the comparison.</p>
7718
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
7719
     *
7720
     * @return int
7721
     *             <strong>&lt; 0</strong> if <i>str1</i> is less than <i>str2</i>;<br>
7722
     *             <strong>&gt; 0</strong> if <i>str1</i> is greater than <i>str2</i>;<br>
7723
     *             <strong>0</strong> if they are equal
7724
     */
7725
    public static function strncasecmp(string $str1, string $str2, int $len, string $encoding = 'UTF-8'): int
7726
    {
7727 2
        return self::strncmp(
7728 2
            self::strtocasefold($str1, true, false, $encoding, null, false),
7729 2
            self::strtocasefold($str2, true, false, $encoding, null, false),
7730 2
            $len
7731
        );
7732
    }
7733
7734
    /**
7735
     * String comparison of the first n characters.
7736
     *
7737
     * @see  http://php.net/manual/en/function.strncmp.php
7738
     *
7739
     * @param string $str1 <p>The first string.</p>
7740
     * @param string $str2 <p>The second string.</p>
7741
     * @param int    $len  <p>Number of characters to use in the comparison.</p>
7742
     *
7743
     * @return int
7744
     *             <strong>&lt; 0</strong> if <i>str1</i> is less than <i>str2</i>;<br>
7745
     *             <strong>&gt; 0</strong> if <i>str1</i> is greater than <i>str2</i>;<br>
7746
     *             <strong>0</strong> if they are equal
7747
     */
7748
    public static function strncmp(string $str1, string $str2, int $len): int
7749
    {
7750 4
        $str1 = (string) self::substr($str1, 0, $len);
7751 4
        $str2 = (string) self::substr($str2, 0, $len);
7752
7753 4
        return self::strcmp($str1, $str2);
7754
    }
7755
7756
    /**
7757
     * Search a string for any of a set of characters.
7758
     *
7759
     * @see  http://php.net/manual/en/function.strpbrk.php
7760
     *
7761
     * @param string $haystack  <p>The string where char_list is looked for.</p>
7762
     * @param string $char_list <p>This parameter is case sensitive.</p>
7763
     *
7764
     * @return false|string string starting from the character found, or false if it is not found
7765
     */
7766
    public static function strpbrk(string $haystack, string $char_list)
7767
    {
7768 2
        if ($haystack === '' || $char_list === '') {
7769 2
            return false;
7770
        }
7771
7772 2
        if (\preg_match('/' . self::rxClass($char_list) . '/us', $haystack, $m)) {
7773 2
            return \substr($haystack, (int) \strpos($haystack, $m[0]));
7774
        }
7775
7776 2
        return false;
7777
    }
7778
7779
    /**
7780
     * Find position of first occurrence of string in a string.
7781
     *
7782
     * @see http://php.net/manual/en/function.mb-strpos.php
7783
     *
7784
     * @param string     $haystack  <p>The string from which to get the position of the first occurrence of needle.</p>
7785
     * @param int|string $needle    <p>The string to find in haystack.<br>Or a code point as int.</p>
7786
     * @param int        $offset    [optional] <p>The search offset. If it is not specified, 0 is used.</p>
7787
     * @param string     $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
7788
     * @param bool       $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
7789
     *
7790
     * @return false|int
7791
     *                   The <strong>(int)</strong> numeric position of the first occurrence of needle in the haystack
7792
     *                   string.<br> If needle is not found it returns false.
7793
     */
7794
    public static function strpos(string $haystack, $needle, int $offset = 0, $encoding = 'UTF-8', bool $cleanUtf8 = false)
7795
    {
7796 142
        if ($haystack === '') {
7797 4
            return false;
7798
        }
7799
7800
        // iconv and mbstring do not support integer $needle
7801 141
        if ((int) $needle === $needle && $needle >= 0) {
7802
            $needle = (string) self::chr($needle);
7803
        }
7804 141
        $needle = (string) $needle;
7805
7806 141
        if ($needle === '') {
7807 2
            return false;
7808
        }
7809
7810 141
        if ($cleanUtf8 === true) {
7811
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
7812
            // if invalid characters are found in $haystack before $needle
7813 3
            $needle = self::clean($needle);
7814 3
            $haystack = self::clean($haystack);
7815
        }
7816
7817 141
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
7818 55
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
7819
        }
7820
7821 141
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
7822
            self::checkForSupport();
7823
        }
7824
7825
        //
7826
        // fallback for binary || ascii only
7827
        //
7828
7829
        if (
7830 141
            $encoding === 'CP850'
7831
            ||
7832 141
            $encoding === 'ASCII'
7833
        ) {
7834 2
            return self::strpos_in_byte($haystack, $needle, $offset);
7835
        }
7836
7837
        if (
7838 141
            $encoding !== 'UTF-8'
7839
            &&
7840 141
            self::$SUPPORT['iconv'] === false
7841
            &&
7842 141
            self::$SUPPORT['mbstring'] === false
7843
        ) {
7844 2
            \trigger_error('UTF8::strpos() without mbstring / iconv cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
7845
        }
7846
7847
        //
7848
        // fallback via mbstring
7849
        //
7850
7851 141
        if (self::$SUPPORT['mbstring'] === true) {
7852 141
            $returnTmp = \mb_strpos($haystack, $needle, $offset, $encoding);
7853 141
            if ($returnTmp !== false) {
7854 86
                return $returnTmp;
7855
            }
7856
        }
7857
7858
        //
7859
        // fallback via intl
7860
        //
7861
7862
        if (
7863 69
            $encoding === 'UTF-8' // INFO: "grapheme_strpos()" can't handle other encodings
7864
            &&
7865 69
            $offset >= 0 // grapheme_strpos() can't handle negative offset
7866
            &&
7867 69
            self::$SUPPORT['intl'] === true
7868
        ) {
7869 69
            $returnTmp = \grapheme_strpos($haystack, $needle, $offset);
7870 69
            if ($returnTmp !== false) {
7871
                return $returnTmp;
7872
            }
7873
        }
7874
7875
        //
7876
        // fallback via iconv
7877
        //
7878
7879
        if (
7880 69
            $offset >= 0 // iconv_strpos() can't handle negative offset
7881
            &&
7882 69
            self::$SUPPORT['iconv'] === true
7883
        ) {
7884
            // ignore invalid negative offset to keep compatibility
7885
            // with php < 5.5.35, < 5.6.21, < 7.0.6
7886 69
            $returnTmp = \iconv_strpos($haystack, $needle, $offset > 0 ? $offset : 0, $encoding);
7887 69
            if ($returnTmp !== false) {
7888
                return $returnTmp;
7889
            }
7890
        }
7891
7892
        //
7893
        // fallback for ascii only
7894
        //
7895
7896 69
        if (($haystackIsAscii = self::is_ascii($haystack)) && self::is_ascii($needle)) {
7897 35
            return \strpos($haystack, $needle, $offset);
7898
        }
7899
7900
        //
7901
        // fallback via vanilla php
7902
        //
7903
7904 39
        if ($haystackIsAscii) {
7905
            $haystackTmp = \substr($haystack, $offset);
7906
        } else {
7907 39
            $haystackTmp = self::substr($haystack, $offset, null, $encoding);
7908
        }
7909 39
        if ($haystackTmp === false) {
7910
            $haystackTmp = '';
7911
        }
7912 39
        $haystack = (string) $haystackTmp;
7913
7914 39
        if ($offset < 0) {
7915 2
            $offset = 0;
7916
        }
7917
7918 39
        $pos = \strpos($haystack, $needle);
7919 39
        if ($pos === false) {
7920 39
            return false;
7921
        }
7922
7923 4
        if ($pos) {
7924 4
            return $offset + (self::strlen(\substr($haystack, 0, $pos), $encoding));
7925
        }
7926
7927 2
        return $offset + 0;
7928
    }
7929
7930
    /**
7931
     * Find position of first occurrence of string in a string.
7932
     *
7933
     * @param string $haystack <p>
7934
     *                         The string being checked.
7935
     *                         </p>
7936
     * @param string $needle   <p>
7937
     *                         The position counted from the beginning of haystack.
7938
     *                         </p>
7939
     * @param int    $offset   [optional] <p>
7940
     *                         The search offset. If it is not specified, 0 is used.
7941
     *                         </p>
7942
     *
7943
     * @return false|int The numeric position of the first occurrence of needle in the
7944
     *                   haystack string. If needle is not found, it returns false.
7945
     */
7946
    public static function strpos_in_byte(string $haystack, string $needle, int $offset = 0)
7947
    {
7948 81
        if ($haystack === '' || $needle === '') {
7949
            return false;
7950
        }
7951
7952 81
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
7953
            self::checkForSupport();
7954
        }
7955
7956 81
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
7957
            // "mb_" is available if overload is used, so use it ...
7958
            return \mb_strpos($haystack, $needle, $offset, 'CP850'); // 8-BIT
7959
        }
7960
7961 81
        return \strpos($haystack, $needle, $offset);
7962
    }
7963
7964
    /**
7965
     * Finds the last occurrence of a character in a string within another.
7966
     *
7967
     * @see http://php.net/manual/en/function.mb-strrchr.php
7968
     *
7969
     * @param string $haystack      <p>The string from which to get the last occurrence of needle.</p>
7970
     * @param string $needle        <p>The string to find in haystack</p>
7971
     * @param bool   $before_needle [optional] <p>
7972
     *                              Determines which portion of haystack
7973
     *                              this function returns.
7974
     *                              If set to true, it returns all of haystack
7975
     *                              from the beginning to the last occurrence of needle.
7976
     *                              If set to false, it returns all of haystack
7977
     *                              from the last occurrence of needle to the end,
7978
     *                              </p>
7979
     * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
7980
     * @param bool   $cleanUtf8     [optional] <p>Remove non UTF-8 chars from the string.</p>
7981
     *
7982
     * @return false|string the portion of haystack or false if needle is not found
7983
     */
7984
    public static function strrchr(string $haystack, string $needle, bool $before_needle = false, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
7985
    {
7986 4
        if ($haystack === '' || $needle === '') {
7987 2
            return false;
7988
        }
7989
7990 4
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
7991 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
7992
        }
7993
7994 4
        if ($cleanUtf8 === true) {
7995
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
7996
            // if invalid characters are found in $haystack before $needle
7997 2
            $needle = self::clean($needle);
7998 2
            $haystack = self::clean($haystack);
7999
        }
8000
8001 4
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
8002
            self::checkForSupport();
8003
        }
8004
8005
        if (
8006 4
            $encoding !== 'UTF-8'
8007
            &&
8008 4
            self::$SUPPORT['mbstring'] === false
8009
        ) {
8010
            \trigger_error('UTF8::strrchr() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
8011
        }
8012
8013 4
        if (self::$SUPPORT['mbstring'] === true) {
8014 4
            return \mb_strrchr($haystack, $needle, $before_needle, $encoding);
8015
        }
8016
8017
        //
8018
        // fallback for binary || ascii only
8019
        //
8020
8021
        if (
8022
            $before_needle === false
8023
            &&
8024
            (
8025
                $encoding === 'CP850'
8026
                ||
8027
                $encoding === 'ASCII'
8028
            )
8029
        ) {
8030
            return \strrchr($haystack, $needle);
8031
        }
8032
8033
        //
8034
        // fallback via iconv
8035
        //
8036
8037
        if (self::$SUPPORT['iconv'] === true) {
8038
            $needleTmp = self::substr($needle, 0, 1, $encoding);
8039
            if ($needleTmp === false) {
8040
                return false;
8041
            }
8042
            $needle = (string) $needleTmp;
8043
8044
            $pos = \iconv_strrpos($haystack, $needle, $encoding);
8045
            if ($pos === false) {
8046
                return false;
8047
            }
8048
8049
            if ($before_needle) {
8050
                return self::substr($haystack, 0, $pos, $encoding);
8051
            }
8052
8053
            return self::substr($haystack, $pos, null, $encoding);
8054
        }
8055
8056
        //
8057
        // fallback via vanilla php
8058
        //
8059
8060
        $needleTmp = self::substr($needle, 0, 1, $encoding);
8061
        if ($needleTmp === false) {
8062
            return false;
8063
        }
8064
        $needle = (string) $needleTmp;
8065
8066
        $pos = self::strrpos($haystack, $needle, null, $encoding);
8067
        if ($pos === false) {
8068
            return false;
8069
        }
8070
8071
        if ($before_needle) {
8072
            return self::substr($haystack, 0, $pos, $encoding);
8073
        }
8074
8075
        return self::substr($haystack, $pos, null, $encoding);
8076
    }
8077
8078
    /**
8079
     * Reverses characters order in the string.
8080
     *
8081
     * @param string $str <p>The input string.</p>
8082
     *
8083
     * @return string the string with characters in the reverse sequence
8084
     */
8085
    public static function strrev(string $str): string
8086
    {
8087 10
        if ($str === '') {
8088 4
            return '';
8089
        }
8090
8091 8
        $reversed = '';
8092 8
        $i = self::strlen($str);
8093 8
        while ($i--) {
8094 8
            $reversed .= self::substr($str, $i, 1);
0 ignored issues
show
Bug introduced by
It seems like $i can also be of type false; however, parameter $offset of voku\helper\UTF8::substr() does only seem to accept integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

8094
            $reversed .= self::substr($str, /** @scrutinizer ignore-type */ $i, 1);
Loading history...
8095
        }
8096
8097 8
        return $reversed;
8098
    }
8099
8100
    /**
8101
     * Finds the last occurrence of a character in a string within another, case insensitive.
8102
     *
8103
     * @see http://php.net/manual/en/function.mb-strrichr.php
8104
     *
8105
     * @param string $haystack      <p>The string from which to get the last occurrence of needle.</p>
8106
     * @param string $needle        <p>The string to find in haystack.</p>
8107
     * @param bool   $before_needle [optional] <p>
8108
     *                              Determines which portion of haystack
8109
     *                              this function returns.
8110
     *                              If set to true, it returns all of haystack
8111
     *                              from the beginning to the last occurrence of needle.
8112
     *                              If set to false, it returns all of haystack
8113
     *                              from the last occurrence of needle to the end,
8114
     *                              </p>
8115
     * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
8116
     * @param bool   $cleanUtf8     [optional] <p>Remove non UTF-8 chars from the string.</p>
8117
     *
8118
     * @return false|string the portion of haystack or<br>false if needle is not found
8119
     */
8120
    public static function strrichr(string $haystack, string $needle, bool $before_needle = false, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
8121
    {
8122 3
        if ($haystack === '' || $needle === '') {
8123 2
            return false;
8124
        }
8125
8126 3
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
8127 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
8128
        }
8129
8130 3
        if ($cleanUtf8 === true) {
8131
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
8132
            // if invalid characters are found in $haystack before $needle
8133 2
            $needle = self::clean($needle);
8134 2
            $haystack = self::clean($haystack);
8135
        }
8136
8137 3
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
8138
            self::checkForSupport();
8139
        }
8140
8141
        //
8142
        // fallback via mbstring
8143
        //
8144
8145 3
        if (self::$SUPPORT['mbstring'] === true) {
8146 3
            return \mb_strrichr($haystack, $needle, $before_needle, $encoding);
8147
        }
8148
8149
        //
8150
        // fallback via vanilla php
8151
        //
8152
8153
        $needleTmp = self::substr($needle, 0, 1, $encoding);
8154
        if ($needleTmp === false) {
8155
            return false;
8156
        }
8157
        $needle = (string) $needleTmp;
8158
8159
        $pos = self::strripos($haystack, $needle, 0, $encoding);
8160
        if ($pos === false) {
8161
            return false;
8162
        }
8163
8164
        if ($before_needle) {
8165
            return self::substr($haystack, 0, $pos, $encoding);
8166
        }
8167
8168
        return self::substr($haystack, $pos, null, $encoding);
8169
    }
8170
8171
    /**
8172
     * Find position of last occurrence of a case-insensitive string.
8173
     *
8174
     * @param string     $haystack  <p>The string to look in.</p>
8175
     * @param int|string $needle    <p>The string to look for.</p>
8176
     * @param int        $offset    [optional] <p>Number of characters to ignore in the beginning or end.</p>
8177
     * @param string     $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
8178
     * @param bool       $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
8179
     *
8180
     * @return false|int
8181
     *                   The <strong>(int)</strong> numeric position of the last occurrence of needle in the haystack
8182
     *                   string.<br>If needle is not found, it returns false.
8183
     */
8184
    public static function strripos(string $haystack, $needle, int $offset = 0, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
8185
    {
8186 4
        if ($haystack === '') {
8187
            return false;
8188
        }
8189
8190
        // iconv and mbstring do not support integer $needle
8191 4
        if ((int) $needle === $needle && $needle >= 0) {
8192
            $needle = (string) self::chr($needle);
8193
        }
8194 4
        $needle = (string) $needle;
8195
8196 4
        if ($needle === '') {
8197
            return false;
8198
        }
8199
8200 4
        if ($cleanUtf8 === true) {
8201
            // mb_strripos() && iconv_strripos() is not tolerant to invalid characters
8202 2
            $needle = self::clean($needle);
8203 2
            $haystack = self::clean($haystack);
8204
        }
8205
8206 4
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
8207 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
8208
        }
8209
8210
        //
8211
        // fallback for binary || ascii only
8212
        //
8213
8214
        if (
8215 4
            $encoding === 'CP850'
8216
            ||
8217 4
            $encoding === 'ASCII'
8218
        ) {
8219
            return self::strripos_in_byte($haystack, $needle, $offset);
8220
        }
8221
8222 4
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
8223
            self::checkForSupport();
8224
        }
8225
8226
        if (
8227 4
            $encoding !== 'UTF-8'
8228
            &&
8229 4
            self::$SUPPORT['mbstring'] === false
8230
        ) {
8231
            \trigger_error('UTF8::strripos() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
8232
        }
8233
8234
        //
8235
        // fallback via mbstrig
8236
        //
8237
8238 4
        if (self::$SUPPORT['mbstring'] === true) {
8239 4
            return \mb_strripos($haystack, $needle, $offset, $encoding);
8240
        }
8241
8242
        //
8243
        // fallback via intl
8244
        //
8245
8246
        if (
8247
            $encoding === 'UTF-8' // INFO: "grapheme_strripos()" can't handle other encodings
8248
            &&
8249
            $offset >= 0 // grapheme_strripos() can't handle negative offset
8250
            &&
8251
            self::$SUPPORT['intl'] === true
8252
        ) {
8253
            $returnTmp = \grapheme_strripos($haystack, $needle, $offset);
8254
            if ($returnTmp !== false) {
8255
                return $returnTmp;
8256
            }
8257
        }
8258
8259
        //
8260
        // fallback for ascii only
8261
        //
8262
8263
        if (self::is_ascii($haystack) && self::is_ascii($needle)) {
8264
            return self::strripos_in_byte($haystack, $needle, $offset);
8265
        }
8266
8267
        //
8268
        // fallback via vanilla php
8269
        //
8270
8271
        $haystack = self::strtocasefold($haystack, true, false, $encoding);
8272
        $needle = self::strtocasefold($needle, true, false, $encoding);
8273
8274
        return self::strrpos($haystack, $needle, $offset, $encoding, $cleanUtf8);
8275
    }
8276
8277
    /**
8278
     * Finds position of last occurrence of a string within another, case insensitive.
8279
     *
8280
     * @param string $haystack <p>
8281
     *                         The string from which to get the position of the last occurrence
8282
     *                         of needle.
8283
     *                         </p>
8284
     * @param string $needle   <p>
8285
     *                         The string to find in haystack.
8286
     *                         </p>
8287
     * @param int    $offset   [optional] <p>
8288
     *                         The position in haystack
8289
     *                         to start searching.
8290
     *                         </p>
8291
     *
8292
     * @return false|int return the numeric position of the last occurrence of needle in the
8293
     *                   haystack string, or false if needle is not found
8294
     */
8295
    public static function strripos_in_byte(string $haystack, string $needle, int $offset = 0)
8296
    {
8297
        if ($haystack === '' || $needle === '') {
8298
            return false;
8299
        }
8300
8301
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
8302
            self::checkForSupport();
8303
        }
8304
8305
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
8306
            // "mb_" is available if overload is used, so use it ...
8307
            return \mb_strripos($haystack, $needle, $offset, 'CP850'); // 8-BIT
8308
        }
8309
8310
        return \strripos($haystack, $needle, $offset);
8311
    }
8312
8313
    /**
8314
     * Find position of last occurrence of a string in a string.
8315
     *
8316
     * @see http://php.net/manual/en/function.mb-strrpos.php
8317
     *
8318
     * @param string     $haystack  <p>The string being checked, for the last occurrence of needle</p>
8319
     * @param int|string $needle    <p>The string to find in haystack.<br>Or a code point as int.</p>
8320
     * @param int        $offset    [optional] <p>May be specified to begin searching an arbitrary number of characters
8321
     *                              into the string. Negative values will stop searching at an arbitrary point prior to
8322
     *                              the end of the string.
8323
     *                              </p>
8324
     * @param string     $encoding  [optional] <p>Set the charset.</p>
8325
     * @param bool       $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
8326
     *
8327
     * @return false|int
8328
     *                   The <strong>(int)</strong> numeric position of the last occurrence of needle in the haystack
8329
     *                   string.<br>If needle is not found, it returns false.
8330
     */
8331
    public static function strrpos(string $haystack, $needle, int $offset = null, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
8332
    {
8333 38
        if ($haystack === '') {
8334 3
            return false;
8335
        }
8336
8337
        // iconv and mbstring do not support integer $needle
8338 37
        if ((int) $needle === $needle && $needle >= 0) {
8339 2
            $needle = (string) self::chr($needle);
8340
        }
8341 37
        $needle = (string) $needle;
8342
8343 37
        if ($needle === '') {
8344 2
            return false;
8345
        }
8346
8347 37
        if ($cleanUtf8 === true) {
8348
            // \mb_strrpos && iconv_strrpos is not tolerant to invalid characters
8349 4
            $needle = self::clean($needle);
8350 4
            $haystack = self::clean($haystack);
8351
        }
8352
8353 37
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
8354 14
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
8355
        }
8356
8357
        //
8358
        // fallback for binary || ascii only
8359
        //
8360
8361
        if (
8362 37
            $encoding === 'CP850'
8363
            ||
8364 37
            $encoding === 'ASCII'
8365
        ) {
8366 2
            return self::strrpos_in_byte($haystack, $needle, $offset);
0 ignored issues
show
Bug introduced by
It seems like $offset can also be of type null; however, parameter $offset of voku\helper\UTF8::strrpos_in_byte() does only seem to accept integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

8366
            return self::strrpos_in_byte($haystack, $needle, /** @scrutinizer ignore-type */ $offset);
Loading history...
8367
        }
8368
8369 37
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
8370
            self::checkForSupport();
8371
        }
8372
8373
        if (
8374 37
            $encoding !== 'UTF-8'
8375
            &&
8376 37
            self::$SUPPORT['mbstring'] === false
8377
        ) {
8378
            \trigger_error('UTF8::strrpos() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
8379
        }
8380
8381
        //
8382
        // fallback via mbstring
8383
        //
8384
8385 37
        if (self::$SUPPORT['mbstring'] === true) {
8386 37
            return \mb_strrpos($haystack, $needle, $offset, $encoding);
8387
        }
8388
8389
        //
8390
        // fallback via intl
8391
        //
8392
8393
        if (
8394
            $offset !== null
8395
            &&
8396
            $offset >= 0 // grapheme_strrpos() can't handle negative offset
8397
            &&
8398
            $encoding === 'UTF-8' // INFO: "grapheme_strrpos()" can't handle other encodings
8399
            &&
8400
            self::$SUPPORT['intl'] === true
8401
        ) {
8402
            $returnTmp = \grapheme_strrpos($haystack, $needle, $offset);
8403
            if ($returnTmp !== false) {
8404
                return $returnTmp;
8405
            }
8406
        }
8407
8408
        //
8409
        // fallback for ascii only
8410
        //
8411
8412
        if (
8413
            $offset !== null
8414
            &&
8415
            self::is_ascii($haystack)
8416
            &&
8417
            self::is_ascii($needle)
8418
        ) {
8419
            return self::strrpos_in_byte($haystack, $needle, $offset);
8420
        }
8421
8422
        //
8423
        // fallback via vanilla php
8424
        //
8425
8426
        $haystackTmp = null;
8427
        if ($offset > 0) {
8428
            $haystackTmp = self::substr($haystack, $offset);
0 ignored issues
show
Bug introduced by
It seems like $offset can also be of type null; however, parameter $offset of voku\helper\UTF8::substr() does only seem to accept integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

8428
            $haystackTmp = self::substr($haystack, /** @scrutinizer ignore-type */ $offset);
Loading history...
8429
        } elseif ($offset < 0) {
8430
            $haystackTmp = self::substr($haystack, 0, $offset);
8431
            $offset = 0;
8432
        }
8433
8434
        if ($haystackTmp !== null) {
8435
            if ($haystackTmp === false) {
8436
                $haystackTmp = '';
8437
            }
8438
            $haystack = (string) $haystackTmp;
8439
        }
8440
8441
        $pos = self::strrpos_in_byte($haystack, $needle);
8442
        if ($pos === false) {
8443
            return false;
8444
        }
8445
8446
        return $offset + self::strlen(self::substr_in_byte($haystack, 0, $pos));
8447
    }
8448
8449
    /**
8450
     * Find position of last occurrence of a string in a string.
8451
     *
8452
     * @param string $haystack <p>
8453
     *                         The string being checked, for the last occurrence
8454
     *                         of needle.
8455
     *                         </p>
8456
     * @param string $needle   <p>
8457
     *                         The string to find in haystack.
8458
     *                         </p>
8459
     * @param int    $offset   [optional] May be specified to begin searching an arbitrary number of characters into
8460
     *                         the string. Negative values will stop searching at an arbitrary point
8461
     *                         prior to the end of the string.
8462
     *
8463
     * @return false|int The numeric position of the last occurrence of needle in the
8464
     *                   haystack string. If needle is not found, it returns false.
8465
     */
8466
    public static function strrpos_in_byte(string $haystack, string $needle, int $offset = 0)
8467
    {
8468 2
        if ($haystack === '' || $needle === '') {
8469
            return false;
8470
        }
8471
8472 2
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
8473
            self::checkForSupport();
8474
        }
8475
8476 2
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
8477
            // "mb_" is available if overload is used, so use it ...
8478
            return \mb_strrpos($haystack, $needle, $offset, 'CP850'); // 8-BIT
8479
        }
8480
8481 2
        return \strrpos($haystack, $needle, $offset);
8482
    }
8483
8484
    /**
8485
     * Finds the length of the initial segment of a string consisting entirely of characters contained within a given
8486
     * mask.
8487
     *
8488
     * @param string $str    <p>The input string.</p>
8489
     * @param string $mask   <p>The mask of chars</p>
8490
     * @param int    $offset [optional]
8491
     * @param int    $length [optional]
8492
     *
8493
     * @return int
8494
     */
8495
    public static function strspn(string $str, string $mask, int $offset = 0, int $length = null): int
8496
    {
8497 10
        if ($offset || $length !== null) {
8498 2
            $strTmp = self::substr($str, $offset, $length);
8499 2
            if ($strTmp === false) {
8500
                $strTmp = '';
8501
            }
8502 2
            $str = (string) $strTmp;
8503
        }
8504
8505 10
        if ($str === '' || $mask === '') {
8506 2
            return 0;
8507
        }
8508
8509 8
        return \preg_match('/^' . self::rxClass($mask) . '+/u', $str, $str) ? self::strlen($str[0]) : 0;
0 ignored issues
show
Bug Best Practice introduced by
The expression return preg_match('/^' ....lf::strlen($str[0]) : 0 could return the type false which is incompatible with the type-hinted return integer. Consider adding an additional type-check to rule them out.
Loading history...
Bug introduced by
$str of type string is incompatible with the type array|null expected by parameter $matches of preg_match(). ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

8509
        return \preg_match('/^' . self::rxClass($mask) . '+/u', $str, /** @scrutinizer ignore-type */ $str) ? self::strlen($str[0]) : 0;
Loading history...
8510
    }
8511
8512
    /**
8513
     * Returns part of haystack string from the first occurrence of needle to the end of haystack.
8514
     *
8515
     * @param string $haystack      <p>The input string. Must be valid UTF-8.</p>
8516
     * @param string $needle        <p>The string to look for. Must be valid UTF-8.</p>
8517
     * @param bool   $before_needle [optional] <p>
8518
     *                              If <b>TRUE</b>, strstr() returns the part of the
8519
     *                              haystack before the first occurrence of the needle (excluding the needle).
8520
     *                              </p>
8521
     * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
8522
     * @param bool   $cleanUtf8     [optional] <p>Remove non UTF-8 chars from the string.</p>
8523
     *
8524
     * @return false|string
8525
     *                      A sub-string,<br>or <strong>false</strong> if needle is not found
8526
     */
8527
    public static function strstr(string $haystack, string $needle, bool $before_needle = false, string $encoding = 'UTF-8', $cleanUtf8 = false)
8528
    {
8529 5
        if ($haystack === '' || $needle === '') {
8530 2
            return false;
8531
        }
8532
8533 5
        if ($cleanUtf8 === true) {
8534
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
8535
            // if invalid characters are found in $haystack before $needle
8536
            $needle = self::clean($needle);
8537
            $haystack = self::clean($haystack);
8538
        }
8539
8540 5
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
8541 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
8542
        }
8543
8544
        //
8545
        // fallback for binary || ascii only
8546
        //
8547
8548
        if (
8549 5
            $encoding === 'CP850'
8550
            ||
8551 5
            $encoding === 'ASCII'
8552
        ) {
8553
            return self::strstr_in_byte($haystack, $needle, $before_needle);
8554
        }
8555
8556 5
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
8557
            self::checkForSupport();
8558
        }
8559
8560
        if (
8561 5
            $encoding !== 'UTF-8'
8562
            &&
8563 5
            self::$SUPPORT['mbstring'] === false
8564
        ) {
8565
            \trigger_error('UTF8::strstr() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
8566
        }
8567
8568
        //
8569
        // fallback via mbstring
8570
        //
8571
8572 5
        if (self::$SUPPORT['mbstring'] === true) {
8573 5
            return \mb_strstr($haystack, $needle, $before_needle, $encoding);
8574
        }
8575
8576
        //
8577
        // fallback via intl
8578
        //
8579
8580
        if (
8581
            $encoding === 'UTF-8' // INFO: "grapheme_strstr()" can't handle other encodings
8582
            &&
8583
            self::$SUPPORT['intl'] === true
8584
        ) {
8585
            $returnTmp = \grapheme_strstr($haystack, $needle, $before_needle);
8586
            if ($returnTmp !== false) {
8587
                return $returnTmp;
8588
            }
8589
        }
8590
8591
        //
8592
        // fallback for ascii only
8593
        //
8594
8595
        if (self::is_ascii($haystack) && self::is_ascii($needle)) {
8596
            return self::strstr_in_byte($haystack, $needle, $before_needle);
8597
        }
8598
8599
        //
8600
        // fallback via vanilla php
8601
        //
8602
8603
        \preg_match('/^(.*?)' . \preg_quote($needle, '/') . '/us', $haystack, $match);
8604
8605
        if (!isset($match[1])) {
8606
            return false;
8607
        }
8608
8609
        if ($before_needle) {
8610
            return $match[1];
8611
        }
8612
8613
        return self::substr($haystack, self::strlen($match[1]));
0 ignored issues
show
Bug introduced by
It seems like self::strlen($match[1]) can also be of type false; however, parameter $offset of voku\helper\UTF8::substr() does only seem to accept integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

8613
        return self::substr($haystack, /** @scrutinizer ignore-type */ self::strlen($match[1]));
Loading history...
8614
    }
8615
8616
    /**
8617
     *  * Finds first occurrence of a string within another.
8618
     *
8619
     * @param string $haystack      <p>
8620
     *                              The string from which to get the first occurrence
8621
     *                              of needle.
8622
     *                              </p>
8623
     * @param string $needle        <p>
8624
     *                              The string to find in haystack.
8625
     *                              </p>
8626
     * @param bool   $before_needle [optional] <p>
8627
     *                              Determines which portion of haystack
8628
     *                              this function returns.
8629
     *                              If set to true, it returns all of haystack
8630
     *                              from the beginning to the first occurrence of needle.
8631
     *                              If set to false, it returns all of haystack
8632
     *                              from the first occurrence of needle to the end,
8633
     *                              </p>
8634
     *
8635
     * @return false|string the portion of haystack,
8636
     *                      or false if needle is not found
8637
     */
8638
    public static function strstr_in_byte(string $haystack, string $needle, bool $before_needle = false)
8639
    {
8640
        if ($haystack === '' || $needle === '') {
8641
            return false;
8642
        }
8643
8644
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
8645
            self::checkForSupport();
8646
        }
8647
8648
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
8649
            // "mb_" is available if overload is used, so use it ...
8650
            return \mb_strstr($haystack, $needle, $before_needle, 'CP850'); // 8-BIT
8651
        }
8652
8653
        return \strstr($haystack, $needle, $before_needle);
8654
    }
8655
8656
    /**
8657
     * Unicode transformation for case-less matching.
8658
     *
8659
     * @see http://unicode.org/reports/tr21/tr21-5.html
8660
     *
8661
     * @param string      $str       <p>The input string.</p>
8662
     * @param bool        $full      [optional] <p>
8663
     *                               <b>true</b>, replace full case folding chars (default)<br>
8664
     *                               <b>false</b>, use only limited static array [UTF8::$COMMON_CASE_FOLD]
8665
     *                               </p>
8666
     * @param bool        $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
8667
     * @param string      $encoding  [optional] <p>Set the charset.</p>
8668
     * @param string|null $lang      [optional] <p>Set the language for special cases: az, el, lt, tr</p>
8669
     * @param bool        $lower     [optional] <p>Use lowercase string, otherwise use uppercase string. PS: uppercase
8670
     *                               is for some languages better ...</p>
8671
     *
8672
     * @return string
8673
     */
8674
    public static function strtocasefold(
8675
        string $str,
8676
        bool $full = true,
8677
        bool $cleanUtf8 = false,
8678
        string $encoding = 'UTF-8',
8679
        string $lang = null,
8680
        $lower = true
8681
    ): string {
8682 53
        if ($str === '') {
8683 5
            return '';
8684
        }
8685
8686 52
        $str = self::fixStrCaseHelper($str, $lower, $full);
8687
8688 52
        if ($lower === true) {
8689 2
            return self::strtolower($str, $encoding, $cleanUtf8, $lang);
8690
        }
8691
8692 50
        return self::strtoupper($str, $encoding, $cleanUtf8, $lang);
8693
    }
8694
8695
    /**
8696
     * Make a string lowercase.
8697
     *
8698
     * @see http://php.net/manual/en/function.mb-strtolower.php
8699
     *
8700
     * @param string      $str                   <p>The string being lowercased.</p>
8701
     * @param string      $encoding              [optional] <p>Set the charset for e.g. "mb_" function</p>
8702
     * @param bool        $cleanUtf8             [optional] <p>Remove non UTF-8 chars from the string.</p>
8703
     * @param string|null $lang                  [optional] <p>Set the language for special cases: az, el, lt, tr</p>
8704
     * @param bool        $tryToKeepStringLength [optional] <p>true === try to keep the string length: e.g. ẞ -> ß</p>
8705
     *
8706
     * @return string string with all alphabetic characters converted to lowercase
8707
     */
8708
    public static function strtolower($str, string $encoding = 'UTF-8', bool $cleanUtf8 = false, string $lang = null, bool $tryToKeepStringLength = false): string
8709
    {
8710
        // init
8711 156
        $str = (string) $str;
8712
8713 156
        if ($str === '') {
8714 12
            return '';
8715
        }
8716
8717 154
        if ($cleanUtf8 === true) {
8718
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
8719
            // if invalid characters are found in $haystack before $needle
8720 4
            $str = self::clean($str);
8721
        }
8722
8723 154
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
8724 94
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
8725
        }
8726
8727
        // hack for old php version or for the polyfill ...
8728 154
        if ($tryToKeepStringLength === true) {
8729
            $str = self::fixStrCaseHelper($str, true);
8730
        }
8731
8732 154
        if ($lang !== null) {
8733 2
            if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
8734
                self::checkForSupport();
8735
            }
8736
8737 2
            if (self::$SUPPORT['intl'] === true) {
8738 2
                $langCode = $lang . '-Lower';
8739 2
                if (!\in_array($langCode, self::$SUPPORT['intl__transliterator_list_ids'], true)) {
8740
                    \trigger_error('UTF8::strtolower() cannot handle special language: ' . $lang, \E_USER_WARNING);
8741
8742
                    $langCode = 'Any-Lower';
8743
                }
8744
8745
                /** @noinspection PhpComposerExtensionStubsInspection */
8746 2
                return \transliterator_transliterate($langCode, $str);
8747
            }
8748
8749
            \trigger_error('UTF8::strtolower() without intl cannot handle the "lang" parameter: ' . $lang, \E_USER_WARNING);
8750
        }
8751
8752
        // always fallback via symfony polyfill
8753 154
        return \mb_strtolower($str, $encoding);
8754
    }
8755
8756
    /**
8757
     * Make a string uppercase.
8758
     *
8759
     * @see http://php.net/manual/en/function.mb-strtoupper.php
8760
     *
8761
     * @param string      $str                   <p>The string being uppercased.</p>
8762
     * @param string      $encoding              [optional] <p>Set the charset.</p>
8763
     * @param bool        $cleanUtf8             [optional] <p>Remove non UTF-8 chars from the string.</p>
8764
     * @param string|null $lang                  [optional] <p>Set the language for special cases: az, el, lt, tr</p>
8765
     * @param bool        $tryToKeepStringLength [optional] <p>true === try to keep the string length: e.g. ẞ -> ß</p>
8766
     *
8767
     * @return string string with all alphabetic characters converted to uppercase
8768
     */
8769
    public static function strtoupper($str, string $encoding = 'UTF-8', bool $cleanUtf8 = false, string $lang = null, bool $tryToKeepStringLength = false): string
8770
    {
8771
        // init
8772 163
        $str = (string) $str;
8773
8774 163
        if ($str === '') {
8775 12
            return '';
8776
        }
8777
8778 161
        if ($cleanUtf8 === true) {
8779
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
8780
            // if invalid characters are found in $haystack before $needle
8781 3
            $str = self::clean($str);
8782
        }
8783
8784 161
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
8785 76
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
8786
        }
8787
8788
        // hack for old php version or for the polyfill ...
8789 161
        if ($tryToKeepStringLength === true) {
8790 2
            $str = self::fixStrCaseHelper($str, false);
8791
        }
8792
8793 161
        if ($lang !== null) {
8794 2
            if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
8795
                self::checkForSupport();
8796
            }
8797
8798 2
            if (self::$SUPPORT['intl'] === true) {
8799 2
                $langCode = $lang . '-Upper';
8800 2
                if (!\in_array($langCode, self::$SUPPORT['intl__transliterator_list_ids'], true)) {
8801
                    \trigger_error('UTF8::strtoupper() without intl for special language: ' . $lang, \E_USER_WARNING);
8802
8803
                    $langCode = 'Any-Upper';
8804
                }
8805
8806
                /** @noinspection PhpComposerExtensionStubsInspection */
8807 2
                return \transliterator_transliterate($langCode, $str);
8808
            }
8809
8810
            \trigger_error('UTF8::strtolower() without intl + PHP >= 5.4 cannot handle the "lang"-parameter: ' . $lang, \E_USER_WARNING);
8811
        }
8812
8813
        // always fallback via symfony polyfill
8814 161
        return \mb_strtoupper($str, $encoding);
8815
    }
8816
8817
    /**
8818
     * Translate characters or replace sub-strings.
8819
     *
8820
     * @see  http://php.net/manual/en/function.strtr.php
8821
     *
8822
     * @param string          $str  <p>The string being translated.</p>
8823
     * @param string|string[] $from <p>The string replacing from.</p>
8824
     * @param string|string[] $to   <p>The string being translated to to.</p>
8825
     *
8826
     * @return string
8827
     *                This function returns a copy of str, translating all occurrences of each character in from to the
8828
     *                corresponding character in to
8829
     */
8830
    public static function strtr(string $str, $from, $to = \INF): string
8831
    {
8832 2
        if ($str === '') {
8833
            return '';
8834
        }
8835
8836 2
        if ($from === $to) {
8837
            return $str;
8838
        }
8839
8840 2
        if ($to !== \INF) {
8841 2
            $from = self::str_split($from);
8842 2
            $to = self::str_split($to);
8843 2
            $countFrom = \count($from);
8844 2
            $countTo = \count($to);
8845
8846 2
            if ($countFrom > $countTo) {
8847 2
                $from = \array_slice($from, 0, $countTo);
8848 2
            } elseif ($countFrom < $countTo) {
8849 2
                $to = \array_slice($to, 0, $countFrom);
8850
            }
8851
8852 2
            $from = \array_combine($from, $to);
8853
        }
8854
8855 2
        if (\is_string($from)) {
8856 2
            return \str_replace($from, '', $str);
8857
        }
8858
8859 2
        return \strtr($str, $from);
8860
    }
8861
8862
    /**
8863
     * Return the width of a string.
8864
     *
8865
     * @param string $str       <p>The input string.</p>
8866
     * @param string $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
8867
     * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
8868
     *
8869
     * @return int
8870
     */
8871
    public static function strwidth(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false): int
8872
    {
8873 2
        if ($str === '') {
8874 2
            return 0;
8875
        }
8876
8877 2
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
8878 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
8879
        }
8880
8881 2
        if ($cleanUtf8 === true) {
8882
            // iconv and mbstring are not tolerant to invalid encoding
8883
            // further, their behaviour is inconsistent with that of PHP's substr
8884 2
            $str = self::clean($str);
8885
        }
8886
8887 2
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
8888
            self::checkForSupport();
8889
        }
8890
8891
        //
8892
        // fallback via mbstring
8893
        //
8894
8895 2
        if (self::$SUPPORT['mbstring'] === true) {
8896 2
            return \mb_strwidth($str, $encoding);
8897
        }
8898
8899
        //
8900
        // fallback via vanilla php
8901
        //
8902
8903
        if ($encoding !== 'UTF-8') {
8904
            $str = self::encode('UTF-8', $str, false, $encoding);
8905
        }
8906
8907
        $wide = 0;
8908
        $str = (string) \preg_replace('/[\x{1100}-\x{115F}\x{2329}\x{232A}\x{2E80}-\x{303E}\x{3040}-\x{A4CF}\x{AC00}-\x{D7A3}\x{F900}-\x{FAFF}\x{FE10}-\x{FE19}\x{FE30}-\x{FE6F}\x{FF00}-\x{FF60}\x{FFE0}-\x{FFE6}\x{20000}-\x{2FFFD}\x{30000}-\x{3FFFD}]/u', '', $str, -1, $wide);
8909
8910
        return ($wide << 1) + self::strlen($str, 'UTF-8');
8911
    }
8912
8913
    /**
8914
     * Get part of a string.
8915
     *
8916
     * @see http://php.net/manual/en/function.mb-substr.php
8917
     *
8918
     * @param string $str       <p>The string being checked.</p>
8919
     * @param int    $offset    <p>The first position used in str.</p>
8920
     * @param int    $length    [optional] <p>The maximum length of the returned string.</p>
8921
     * @param string $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
8922
     * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
8923
     *
8924
     * @return false|string
8925
     *                      The portion of <i>str</i> specified by the <i>offset</i> and
8926
     *                      <i>length</i> parameters.</p><p>If <i>str</i> is shorter than <i>offset</i>
8927
     *                      characters long, <b>FALSE</b> will be returned.
8928
     */
8929
    public static function substr(string $str, int $offset = 0, int $length = null, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
8930
    {
8931 401
        if ($str === '') {
8932 26
            return '';
8933
        }
8934
8935
        // Empty string
8936 396
        if ($length === 0) {
8937 20
            return '';
8938
        }
8939
8940 393
        if ($cleanUtf8 === true) {
8941
            // iconv and mbstring are not tolerant to invalid encoding
8942
            // further, their behaviour is inconsistent with that of PHP's substr
8943 2
            $str = self::clean($str);
8944
        }
8945
8946
        // Whole string
8947 393
        if (!$offset && $length === null) {
8948 40
            return $str;
8949
        }
8950
8951 364
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
8952 161
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
8953
        }
8954
8955 364
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
8956
            self::checkForSupport();
8957
        }
8958
8959
        //
8960
        // fallback for binary || ascii only
8961
        //
8962
8963
        if (
8964 364
            $encoding === 'CP850'
8965
            ||
8966 364
            $encoding === 'ASCII'
8967
        ) {
8968 2
            return self::substr_in_byte($str, $offset, $length);
8969
        }
8970
8971
        //
8972
        // fallback via mbstring
8973
        //
8974
8975 362
        if (self::$SUPPORT['mbstring'] === true) {
8976 362
            $return = \mb_substr($str, $offset, $length ?? 2147483647, $encoding);
8977 362
            if ($return !== false) {
8978 362
                return $return;
8979
            }
8980
        }
8981
8982
        // otherwise we need the string-length and can't fake it via "2147483647"
8983 4
        $str_length = 0;
8984 4
        if ($offset || $length === null) {
8985 4
            $str_length = self::strlen($str, $encoding);
8986
        }
8987
8988
        // e.g.: invalid chars + mbstring not installed
8989 4
        if ($str_length === false) {
8990
            return false;
8991
        }
8992
8993
        // Empty string
8994 4
        if ($offset === $str_length && !$length) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $length of type integer|null is loosely compared to false; this is ambiguous if the integer can be 0. You might want to explicitly use === null instead.

In PHP, under loose comparison (like ==, or !=, or switch conditions), values of different types might be equal.

For integer values, zero is a special case, in particular the following results might be unexpected:

0   == false // true
0   == null  // true
123 == false // false
123 == null  // false

// It is often better to use strict comparison
0 === false // false
0 === null  // false
Loading history...
8995
            return '';
8996
        }
8997
8998
        // Impossible
8999 4
        if ($offset && $offset > $str_length) {
9000
            // "false" is the php native return type here,
9001
            //  but we optimized this for performance ... see "2147483647" instead of "strlen"
9002
            return '';
9003
        }
9004
9005 4
        if ($length === null) {
9006 4
            $length = (int) $str_length;
9007
        } else {
9008 2
            $length = (int) $length;
9009
        }
9010
9011
        if (
9012 4
            $encoding !== 'UTF-8'
9013
            &&
9014 4
            self::$SUPPORT['mbstring'] === false
9015
        ) {
9016 2
            \trigger_error('UTF8::substr() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
9017
        }
9018
9019
        //
9020
        // fallback via intl
9021
        //
9022
9023
        if (
9024 4
            $encoding === 'UTF-8' // INFO: "grapheme_substr()" can't handle other encodings
9025
            &&
9026 4
            $offset >= 0 // grapheme_substr() can't handle negative offset
9027
            &&
9028 4
            self::$SUPPORT['intl'] === true
9029
        ) {
9030
            $returnTmp = \grapheme_substr($str, $offset, $length);
9031
            if ($returnTmp !== false) {
9032
                return $returnTmp;
9033
            }
9034
        }
9035
9036
        //
9037
        // fallback via iconv
9038
        //
9039
9040
        if (
9041 4
            $length >= 0 // "iconv_substr()" can't handle negative length
9042
            &&
9043 4
            self::$SUPPORT['iconv'] === true
9044
        ) {
9045
            $returnTmp = \iconv_substr($str, $offset, $length);
9046
            if ($returnTmp !== false) {
9047
                return $returnTmp;
9048
            }
9049
        }
9050
9051
        //
9052
        // fallback for ascii only
9053
        //
9054
9055 4
        if (self::is_ascii($str)) {
9056
            return \substr($str, $offset, $length);
9057
        }
9058
9059
        //
9060
        // fallback via vanilla php
9061
        //
9062
9063
        // split to array, and remove invalid characters
9064 4
        $array = self::split($str);
9065
9066
        // extract relevant part, and join to make sting again
9067 4
        return \implode('', \array_slice($array, $offset, $length));
9068
    }
9069
9070
    /**
9071
     * Binary safe comparison of two strings from an offset, up to length characters.
9072
     *
9073
     * @param string   $str1               <p>The main string being compared.</p>
9074
     * @param string   $str2               <p>The secondary string being compared.</p>
9075
     * @param int      $offset             [optional] <p>The start position for the comparison. If negative, it starts
9076
     *                                     counting from the end of the string.</p>
9077
     * @param int|null $length             [optional] <p>The length of the comparison. The default value is the largest
9078
     *                                     of the length of the str compared to the length of main_str less the
9079
     *                                     offset.</p>
9080
     * @param bool     $case_insensitivity [optional] <p>If case_insensitivity is TRUE, comparison is case
9081
     *                                     insensitive.</p>
9082
     *
9083
     * @return int
9084
     *             <strong>&lt; 0</strong> if str1 is less than str2;<br>
9085
     *             <strong>&gt; 0</strong> if str1 is greater than str2,<br>
9086
     *             <strong>0</strong> if they are equal
9087
     */
9088
    public static function substr_compare(string $str1, string $str2, int $offset = 0, int $length = null, bool $case_insensitivity = false): int
9089
    {
9090
        if (
9091 2
            $offset !== 0
9092
            ||
9093 2
            $length !== null
9094
        ) {
9095 2
            $str1Tmp = self::substr($str1, $offset, $length);
9096 2
            if ($str1Tmp === false) {
9097
                $str1Tmp = '';
9098
            }
9099 2
            $str1 = (string) $str1Tmp;
9100
9101 2
            $str2Tmp = self::substr($str2, 0, self::strlen($str1));
0 ignored issues
show
Bug introduced by
It seems like self::strlen($str1) can also be of type false; however, parameter $length of voku\helper\UTF8::substr() does only seem to accept integer|null, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

9101
            $str2Tmp = self::substr($str2, 0, /** @scrutinizer ignore-type */ self::strlen($str1));
Loading history...
9102 2
            if ($str2Tmp === false) {
9103
                $str2Tmp = '';
9104
            }
9105 2
            $str2 = (string) $str2Tmp;
9106
        }
9107
9108 2
        if ($case_insensitivity === true) {
9109 2
            return self::strcasecmp($str1, $str2);
9110
        }
9111
9112 2
        return self::strcmp($str1, $str2);
9113
    }
9114
9115
    /**
9116
     * Count the number of substring occurrences.
9117
     *
9118
     * @see  http://php.net/manual/en/function.substr-count.php
9119
     *
9120
     * @param string $haystack  <p>The string to search in.</p>
9121
     * @param string $needle    <p>The substring to search for.</p>
9122
     * @param int    $offset    [optional] <p>The offset where to start counting.</p>
9123
     * @param int    $length    [optional] <p>
9124
     *                          The maximum length after the specified offset to search for the
9125
     *                          substring. It outputs a warning if the offset plus the length is
9126
     *                          greater than the haystack length.
9127
     *                          </p>
9128
     * @param string $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
9129
     * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
9130
     *
9131
     * @return false|int this functions returns an integer or false if there isn't a string
9132
     */
9133
    public static function substr_count(
9134
        string $haystack,
9135
        string $needle,
9136
        int $offset = 0,
9137
        int $length = null,
9138
        string $encoding = 'UTF-8',
9139
        bool $cleanUtf8 = false
9140
    ) {
9141 18
        if ($haystack === '' || $needle === '') {
9142 2
            return false;
9143
        }
9144
9145 18
        if ($offset || $length !== null) {
9146 2
            if ($length === null) {
9147 2
                $lengthTmp = self::strlen($haystack);
9148 2
                if ($lengthTmp === false) {
9149
                    return false;
9150
                }
9151 2
                $length = (int) $lengthTmp;
9152
            }
9153
9154
            if (
9155
                (
9156 2
                    $length !== 0
9157
                    &&
9158 2
                    $offset !== 0
9159
                )
9160
                &&
9161 2
                ($length + $offset) <= 0
9162
                &&
9163 2
                Bootup::is_php('7.1') === false // output from "substr_count()" have changed in PHP 7.1
9164
            ) {
9165 2
                return false;
9166
            }
9167
9168 2
            $haystackTmp = self::substr($haystack, $offset, $length, $encoding);
9169 2
            if ($haystackTmp === false) {
9170
                $haystackTmp = '';
9171
            }
9172 2
            $haystack = (string) $haystackTmp;
9173
        }
9174
9175 18
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
9176 8
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
9177
        }
9178
9179 18
        if ($cleanUtf8 === true) {
9180
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
9181
            // if invalid characters are found in $haystack before $needle
9182
            $needle = self::clean($needle);
9183
            $haystack = self::clean($haystack);
9184
        }
9185
9186 18
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
9187
            self::checkForSupport();
9188
        }
9189
9190
        if (
9191 18
            $encoding !== 'UTF-8'
9192
            &&
9193 18
            self::$SUPPORT['mbstring'] === false
9194
        ) {
9195
            \trigger_error('UTF8::substr_count() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
9196
        }
9197
9198 18
        if (self::$SUPPORT['mbstring'] === true) {
9199 18
            return \mb_substr_count($haystack, $needle, $encoding);
9200
        }
9201
9202
        \preg_match_all('/' . \preg_quote($needle, '/') . '/us', $haystack, $matches, \PREG_SET_ORDER);
9203
9204
        return \count($matches);
9205
    }
9206
9207
    /**
9208
     * Count the number of substring occurrences.
9209
     *
9210
     * @param string $haystack <p>
9211
     *                         The string being checked.
9212
     *                         </p>
9213
     * @param string $needle   <p>
9214
     *                         The string being found.
9215
     *                         </p>
9216
     * @param int    $offset   [optional] <p>
9217
     *                         The offset where to start counting
9218
     *                         </p>
9219
     * @param int    $length   [optional] <p>
9220
     *                         The maximum length after the specified offset to search for the
9221
     *                         substring. It outputs a warning if the offset plus the length is
9222
     *                         greater than the haystack length.
9223
     *                         </p>
9224
     *
9225
     * @return false|int the number of times the
9226
     *                   needle substring occurs in the
9227
     *                   haystack string
9228
     */
9229
    public static function substr_count_in_byte(string $haystack, string $needle, int $offset = 0, int $length = null)
9230
    {
9231 36
        if ($haystack === '' || $needle === '') {
9232
            return 0;
9233
        }
9234
9235 36
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
9236
            self::checkForSupport();
9237
        }
9238
9239
        if (
9240 36
            ($offset || $length !== null)
9241
            &&
9242 36
            self::$SUPPORT['mbstring_func_overload'] === true
9243
        ) {
9244
            if ($length === null) {
9245
                $lengthTmp = self::strlen($haystack);
9246
                if ($lengthTmp === false) {
9247
                    return false;
9248
                }
9249
                $length = (int) $lengthTmp;
9250
            }
9251
9252
            if (
9253
                (
9254
                    $length !== 0
9255
                    &&
9256
                    $offset !== 0
9257
                )
9258
                &&
9259
                ($length + $offset) <= 0
9260
                &&
9261
                Bootup::is_php('7.1') === false // output from "substr_count()" have changed in PHP 7.1
9262
            ) {
9263
                return false;
9264
            }
9265
9266
            $haystackTmp = self::substr_in_byte($haystack, $offset, $length);
9267
            if ($haystackTmp === false) {
0 ignored issues
show
introduced by
The condition $haystackTmp === false is always false.
Loading history...
9268
                $haystackTmp = '';
9269
            }
9270
            $haystack = (string) $haystackTmp;
9271
        }
9272
9273 36
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
9274
            // "mb_" is available if overload is used, so use it ...
9275
            return \mb_substr_count($haystack, $needle, 'CP850'); // 8-BIT
9276
        }
9277
9278 36
        return \substr_count($haystack, $needle, $offset, $length);
9279
    }
9280
9281
    /**
9282
     * Returns the number of occurrences of $substring in the given string.
9283
     * By default, the comparison is case-sensitive, but can be made insensitive
9284
     * by setting $caseSensitive to false.
9285
     *
9286
     * @param string $str           <p>The input string.</p>
9287
     * @param string $substring     <p>The substring to search for.</p>
9288
     * @param bool   $caseSensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
9289
     * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
9290
     *
9291
     * @return int
9292
     */
9293
    public static function substr_count_simple(string $str, string $substring, $caseSensitive = true, string $encoding = 'UTF-8'): int
9294
    {
9295 15
        if ($str === '' || $substring === '') {
9296 2
            return 0;
9297
        }
9298
9299
        // only a fallback to prevent BC in the api ...
9300 13
        if ($caseSensitive !== false && $caseSensitive !== true) {
0 ignored issues
show
introduced by
The condition $caseSensitive !== true is always false.
Loading history...
9301 4
            $encoding = (string) $caseSensitive;
9302
        }
9303
9304 13
        if (!$caseSensitive) {
9305 6
            $str = self::strtocasefold($str, true, false, $encoding, null, false);
9306 6
            $substring = self::strtocasefold($substring, true, false, $encoding, null, false);
9307
        }
9308
9309 13
        return (int) self::substr_count($str, $substring, 0, null, $encoding);
9310
    }
9311
9312
    /**
9313
     * Removes an prefix ($needle) from start of the string ($haystack), case insensitive.
9314
     *
9315
     * @param string $haystack <p>The string to search in.</p>
9316
     * @param string $needle   <p>The substring to search for.</p>
9317
     *
9318
     * @return string return the sub-string
9319
     */
9320
    public static function substr_ileft(string $haystack, string $needle): string
9321
    {
9322 2
        if ($haystack === '') {
9323 2
            return '';
9324
        }
9325
9326 2
        if ($needle === '') {
9327 2
            return $haystack;
9328
        }
9329
9330 2
        if (self::str_istarts_with($haystack, $needle) === true) {
9331 2
            $haystackTmp = self::substr($haystack, self::strlen($needle));
0 ignored issues
show
Bug introduced by
It seems like self::strlen($needle) can also be of type false; however, parameter $offset of voku\helper\UTF8::substr() does only seem to accept integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

9331
            $haystackTmp = self::substr($haystack, /** @scrutinizer ignore-type */ self::strlen($needle));
Loading history...
9332 2
            if ($haystackTmp === false) {
9333
                $haystackTmp = '';
9334
            }
9335 2
            $haystack = (string) $haystackTmp;
9336
        }
9337
9338 2
        return $haystack;
9339
    }
9340
9341
    /**
9342
     * Get part of a string process in bytes.
9343
     *
9344
     * @param string $str    <p>The string being checked.</p>
9345
     * @param int    $offset <p>The first position used in str.</p>
9346
     * @param int    $length [optional] <p>The maximum length of the returned string.</p>
9347
     *
9348
     * @return false|string
9349
     *                      The portion of <i>str</i> specified by the <i>offset</i> and
9350
     *                      <i>length</i> parameters.</p><p>If <i>str</i> is shorter than <i>offset</i>
9351
     *                      characters long, <b>FALSE</b> will be returned.
9352
     */
9353
    public static function substr_in_byte(string $str, int $offset = 0, int $length = null)
9354
    {
9355 51
        if ($str === '') {
9356
            return '';
9357
        }
9358
9359
        // Empty string
9360 51
        if ($length === 0) {
9361
            return '';
9362
        }
9363
9364
        // Whole string
9365 51
        if (!$offset && $length === null) {
9366
            return $str;
9367
        }
9368
9369 51
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
9370
            self::checkForSupport();
9371
        }
9372
9373 51
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
9374
            // "mb_" is available if overload is used, so use it ...
9375
            return \mb_substr($str, $offset, $length ?? 2147483647, 'CP850'); // 8-BIT
9376
        }
9377
9378 51
        return \substr($str, $offset, $length ?? 2147483647);
9379
    }
9380
9381
    /**
9382
     * Removes an suffix ($needle) from end of the string ($haystack), case insensitive.
9383
     *
9384
     * @param string $haystack <p>The string to search in.</p>
9385
     * @param string $needle   <p>The substring to search for.</p>
9386
     *
9387
     * @return string return the sub-string
9388
     */
9389
    public static function substr_iright(string $haystack, string $needle): string
9390
    {
9391 2
        if ($haystack === '') {
9392 2
            return '';
9393
        }
9394
9395 2
        if ($needle === '') {
9396 2
            return $haystack;
9397
        }
9398
9399 2
        if (self::str_iends_with($haystack, $needle) === true) {
9400 2
            $haystackTmp = self::substr($haystack, 0, self::strlen($haystack) - self::strlen($needle));
9401 2
            if ($haystackTmp === false) {
9402
                $haystackTmp = '';
9403
            }
9404 2
            $haystack = (string) $haystackTmp;
9405
        }
9406
9407 2
        return $haystack;
9408
    }
9409
9410
    /**
9411
     * Removes an prefix ($needle) from start of the string ($haystack).
9412
     *
9413
     * @param string $haystack <p>The string to search in.</p>
9414
     * @param string $needle   <p>The substring to search for.</p>
9415
     *
9416
     * @return string return the sub-string
9417
     */
9418
    public static function substr_left(string $haystack, string $needle): string
9419
    {
9420 2
        if ($haystack === '') {
9421 2
            return '';
9422
        }
9423
9424 2
        if ($needle === '') {
9425 2
            return $haystack;
9426
        }
9427
9428 2
        if (self::str_starts_with($haystack, $needle) === true) {
9429 2
            $haystackTmp = self::substr($haystack, self::strlen($needle));
0 ignored issues
show
Bug introduced by
It seems like self::strlen($needle) can also be of type false; however, parameter $offset of voku\helper\UTF8::substr() does only seem to accept integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

9429
            $haystackTmp = self::substr($haystack, /** @scrutinizer ignore-type */ self::strlen($needle));
Loading history...
9430 2
            if ($haystackTmp === false) {
9431
                $haystackTmp = '';
9432
            }
9433 2
            $haystack = (string) $haystackTmp;
9434
        }
9435
9436 2
        return $haystack;
9437
    }
9438
9439
    /**
9440
     * Replace text within a portion of a string.
9441
     *
9442
     * source: https://gist.github.com/stemar/8287074
9443
     *
9444
     * @param string|string[] $str         <p>The input string or an array of stings.</p>
9445
     * @param string|string[] $replacement <p>The replacement string or an array of stings.</p>
9446
     * @param int|int[]       $offset      <p>
9447
     *                                     If start is positive, the replacing will begin at the start'th offset
9448
     *                                     into string.
9449
     *                                     <br><br>
9450
     *                                     If start is negative, the replacing will begin at the start'th character
9451
     *                                     from the end of string.
9452
     *                                     </p>
9453
     * @param int|int[]|null  $length      [optional] <p>If given and is positive, it represents the length of the
9454
     *                                     portion of string which is to be replaced. If it is negative, it
9455
     *                                     represents the number of characters from the end of string at which to
9456
     *                                     stop replacing. If it is not given, then it will default to strlen(
9457
     *                                     string ); i.e. end the replacing at the end of string. Of course, if
9458
     *                                     length is zero then this function will have the effect of inserting
9459
     *                                     replacement into string at the given start offset.</p>
9460
     * @param string          $encoding    [optional] <p>Set the charset for e.g. "mb_" function</p>
9461
     *
9462
     * @return string|string[] The result string is returned. If string is an array then array is returned.
9463
     */
9464
    public static function substr_replace($str, $replacement, $offset, $length = null, string $encoding = 'UTF-8')
9465
    {
9466 10
        if (\is_array($str) === true) {
9467 1
            $num = \count($str);
9468
9469
            // the replacement
9470 1
            if (\is_array($replacement) === true) {
9471 1
                $replacement = \array_slice($replacement, 0, $num);
9472
            } else {
9473 1
                $replacement = \array_pad([$replacement], $num, $replacement);
9474
            }
9475
9476
            // the offset
9477 1
            if (\is_array($offset) === true) {
9478 1
                $offset = \array_slice($offset, 0, $num);
9479 1
                foreach ($offset as &$valueTmp) {
9480 1
                    $valueTmp = (int) $valueTmp === $valueTmp ? $valueTmp : 0;
9481
                }
9482 1
                unset($valueTmp);
9483
            } else {
9484 1
                $offset = \array_pad([$offset], $num, $offset);
9485
            }
9486
9487
            // the length
9488 1
            if ($length === null) {
9489 1
                $length = \array_fill(0, $num, 0);
9490 1
            } elseif (\is_array($length) === true) {
9491 1
                $length = \array_slice($length, 0, $num);
9492 1
                foreach ($length as &$valueTmpV2) {
9493 1
                    if ($valueTmpV2 !== null) {
9494 1
                        $valueTmpV2 = (int) $valueTmpV2 === $valueTmpV2 ? $valueTmpV2 : $num;
9495
                    } else {
9496 1
                        $valueTmpV2 = 0;
9497
                    }
9498
                }
9499 1
                unset($valueTmpV2);
9500
            } else {
9501 1
                $length = \array_pad([$length], $num, $length);
9502
            }
9503
9504
            // recursive call
9505 1
            return \array_map([self::class, 'substr_replace'], $str, $replacement, $offset, $length);
9506
        }
9507
9508 10
        if (\is_array($replacement) === true) {
9509 1
            if (\count($replacement) > 0) {
9510 1
                $replacement = $replacement[0];
9511
            } else {
9512 1
                $replacement = '';
9513
            }
9514
        }
9515
9516
        // init
9517 10
        $str = (string) $str;
9518 10
        $replacement = (string) $replacement;
9519
9520 10
        if ($str === '') {
9521 1
            return $replacement;
9522
        }
9523
9524 9
        if (self::is_ascii($str)) {
9525 6
            return ($length === null) ?
9526
                \substr_replace($str, $replacement, $offset) :
0 ignored issues
show
Bug introduced by
It seems like $offset can also be of type integer[]; however, parameter $start of substr_replace() does only seem to accept integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

9526
                \substr_replace($str, $replacement, /** @scrutinizer ignore-type */ $offset) :
Loading history...
9527 6
                \substr_replace($str, $replacement, $offset, $length);
0 ignored issues
show
Bug introduced by
It seems like $length can also be of type integer[]; however, parameter $length of substr_replace() does only seem to accept integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

9527
                \substr_replace($str, $replacement, $offset, /** @scrutinizer ignore-type */ $length);
Loading history...
9528
        }
9529
9530 8
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
9531
            self::checkForSupport();
9532
        }
9533
9534 8
        if (self::$SUPPORT['mbstring'] === true) {
9535 8
            $string_length = self::strlen($str, $encoding);
9536
9537 8
            if ($offset < 0) {
9538 1
                $offset = \max(0, $string_length + $offset);
9539 8
            } elseif ($offset > $string_length) {
9540
                $offset = $string_length;
9541
            }
9542
9543 8
            if ($length < 0) {
9544 1
                $length = \max(0, $string_length - $offset + $length);
9545 8
            } elseif ($length === null || $length > $string_length) {
9546 3
                $length = $string_length;
9547
            }
9548
9549 8
            if (($offset + $length) > $string_length) {
9550 3
                $length = $string_length - $offset;
9551
            }
9552
9553 8
            return self::substr($str, 0, $offset, $encoding) . $replacement . self::substr($str, $offset + $length, $string_length - $offset - $length, $encoding);
0 ignored issues
show
Bug introduced by
Are you sure self::substr($str, 0, $offset, $encoding) of type false|string can be used in concatenation? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

9553
            return /** @scrutinizer ignore-type */ self::substr($str, 0, $offset, $encoding) . $replacement . self::substr($str, $offset + $length, $string_length - $offset - $length, $encoding);
Loading history...
Bug introduced by
Are you sure self::substr($str, $offs...t - $length, $encoding) of type false|string can be used in concatenation? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

9553
            return self::substr($str, 0, $offset, $encoding) . $replacement . /** @scrutinizer ignore-type */ self::substr($str, $offset + $length, $string_length - $offset - $length, $encoding);
Loading history...
9554
        }
9555
9556
        \preg_match_all('/./us', $str, $smatches);
9557
        \preg_match_all('/./us', $replacement, $rmatches);
9558
9559
        if ($length === null) {
9560
            $lengthTmp = self::strlen($str, $encoding);
9561
            if ($lengthTmp === false) {
9562
                // e.g.: non mbstring support + invalid chars
9563
                return '';
9564
            }
9565
            $length = (int) $lengthTmp;
9566
        }
9567
9568
        \array_splice($smatches[0], $offset, $length, $rmatches[0]);
0 ignored issues
show
Bug introduced by
It seems like $length can also be of type integer[]; however, parameter $length of array_splice() does only seem to accept integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

9568
        \array_splice($smatches[0], $offset, /** @scrutinizer ignore-type */ $length, $rmatches[0]);
Loading history...
Bug introduced by
It seems like $offset can also be of type integer[]; however, parameter $offset of array_splice() does only seem to accept integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

9568
        \array_splice($smatches[0], /** @scrutinizer ignore-type */ $offset, $length, $rmatches[0]);
Loading history...
9569
9570
        return \implode('', $smatches[0]);
9571
    }
9572
9573
    /**
9574
     * Removes an suffix ($needle) from end of the string ($haystack).
9575
     *
9576
     * @param string $haystack <p>The string to search in.</p>
9577
     * @param string $needle   <p>The substring to search for.</p>
9578
     *
9579
     * @return string return the sub-string
9580
     */
9581
    public static function substr_right(string $haystack, string $needle): string
9582
    {
9583 2
        if ($haystack === '') {
9584 2
            return '';
9585
        }
9586
9587 2
        if ($needle === '') {
9588 2
            return $haystack;
9589
        }
9590
9591 2
        if (self::str_ends_with($haystack, $needle) === true) {
9592 2
            $haystackTmp = self::substr($haystack, 0, self::strlen($haystack) - self::strlen($needle));
9593 2
            if ($haystackTmp === false) {
9594
                $haystackTmp = '';
9595
            }
9596 2
            $haystack = (string) $haystackTmp;
9597
        }
9598
9599 2
        return $haystack;
9600
    }
9601
9602
    /**
9603
     * Returns a case swapped version of the string.
9604
     *
9605
     * @param string $str       <p>The input string.</p>
9606
     * @param string $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
9607
     * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
9608
     *
9609
     * @return string each character's case swapped
9610
     */
9611
    public static function swapCase(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false): string
9612
    {
9613 6
        if ($str === '') {
9614 1
            return '';
9615
        }
9616
9617 6
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
9618 4
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
9619
        }
9620
9621 6
        if ($cleanUtf8 === true) {
9622
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
9623
            // if invalid characters are found in $haystack before $needle
9624 2
            $str = self::clean($str);
9625
        }
9626
9627 6
        return (string) (self::strtolower($str, $encoding) ^ self::strtoupper($str, $encoding) ^ $str);
9628
    }
9629
9630
    /**
9631
     * Checks whether mbstring is available on the server.
9632
     *
9633
     * @return bool
9634
     *              <strong>true</strong> if available, <strong>false</strong> otherwise
9635
     */
9636
    public static function symfony_polyfill_used(): bool
9637
    {
9638
        // init
9639
        $return = false;
9640
9641
        $returnTmp = \extension_loaded('mbstring');
9642
        if ($returnTmp === false && \function_exists('mb_strlen')) {
9643
            $return = true;
9644
        }
9645
9646
        $returnTmp = \extension_loaded('iconv');
9647
        if ($returnTmp === false && \function_exists('iconv')) {
9648
            $return = true;
9649
        }
9650
9651
        return $return;
9652
    }
9653
9654
    /**
9655
     * @param string $str
9656
     * @param int    $tabLength
9657
     *
9658
     * @return string
9659
     */
9660
    public static function tabs_to_spaces(string $str, int $tabLength = 4): string
9661
    {
9662 6
        return \str_replace("\t", \str_repeat(' ', $tabLength), $str);
9663
    }
9664
9665
    /**
9666
     * Converts the first character of each word in the string to uppercase
9667
     * and all other chars to lowercase.
9668
     *
9669
     * @param string      $str                   <p>The input string.</p>
9670
     * @param string      $encoding              [optional] <p>Set the charset for e.g. "mb_" function</p>
9671
     * @param bool        $cleanUtf8             [optional] <p>Remove non UTF-8 chars from the string.</p>
9672
     * @param string|null $lang                  [optional] <p>Set the language for special cases: az, el, lt, tr</p>
9673
     * @param bool        $tryToKeepStringLength [optional] <p>true === try to keep the string length: e.g. ẞ -> ß</p>
9674
     *
9675
     * @return string string with all characters of $str being title-cased
9676
     */
9677
    public static function titlecase(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false, string $lang = null, bool $tryToKeepStringLength = false): string
9678
    {
9679 5
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
9680 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
9681
        }
9682
9683 5
        return self::str_titleize($str, null, $encoding, $cleanUtf8, $lang, $tryToKeepStringLength, false);
9684
    }
9685
9686
    /**
9687
     * alias for "UTF8::to_ascii()"
9688
     *
9689
     * @see        UTF8::to_ascii()
9690
     *
9691
     * @param string $str
9692
     * @param string $subst_chr
9693
     * @param bool   $strict
9694
     *
9695
     * @return string
9696
     *
9697
     * @deprecated <p>use "UTF8::to_ascii()"</p>
9698
     */
9699
    public static function toAscii(string $str, string $subst_chr = '?', bool $strict = false): string
9700
    {
9701 7
        return self::to_ascii($str, $subst_chr, $strict);
9702
    }
9703
9704
    /**
9705
     * alias for "UTF8::to_iso8859()"
9706
     *
9707
     * @see        UTF8::to_iso8859()
9708
     *
9709
     * @param string|string[] $str
9710
     *
9711
     * @return string|string[]
9712
     *
9713
     * @deprecated <p>use "UTF8::to_iso8859()"</p>
9714
     */
9715
    public static function toIso8859($str)
9716
    {
9717 2
        return self::to_iso8859($str);
9718
    }
9719
9720
    /**
9721
     * alias for "UTF8::to_latin1()"
9722
     *
9723
     * @see        UTF8::to_latin1()
9724
     *
9725
     * @param string|string[] $str
9726
     *
9727
     * @return string|string[]
9728
     *
9729
     * @deprecated <p>use "UTF8::to_latin1()"</p>
9730
     */
9731
    public static function toLatin1($str)
9732
    {
9733 2
        return self::to_latin1($str);
9734
    }
9735
9736
    /**
9737
     * alias for "UTF8::to_utf8()"
9738
     *
9739
     * @see        UTF8::to_utf8()
9740
     *
9741
     * @param string|string[] $str
9742
     *
9743
     * @return string|string[]
9744
     *
9745
     * @deprecated <p>use "UTF8::to_utf8()"</p>
9746
     */
9747
    public static function toUTF8($str)
9748
    {
9749 2
        return self::to_utf8($str);
9750
    }
9751
9752
    /**
9753
     * Convert a string into ASCII.
9754
     *
9755
     * @param string $str     <p>The input string.</p>
9756
     * @param string $unknown [optional] <p>Character use if character unknown. (default is ?)</p>
9757
     * @param bool   $strict  [optional] <p>Use "transliterator_transliterate()" from PHP-Intl | WARNING: bad
9758
     *                        performance</p>
9759
     *
9760
     * @return string
9761
     */
9762
    public static function to_ascii(string $str, string $unknown = '?', bool $strict = false): string
9763
    {
9764 38
        static $UTF8_TO_ASCII;
9765
9766 38
        if ($str === '') {
9767 3
            return '';
9768
        }
9769
9770
        // check if we only have ASCII, first (better performance)
9771 35
        if (self::is_ascii($str) === true) {
9772 9
            return $str;
9773
        }
9774
9775 28
        $str = self::clean(
9776 28
            $str,
9777 28
            true,
9778 28
            true,
9779 28
            true,
9780 28
            false,
9781 28
            true,
9782 28
            true
9783
        );
9784
9785
        // check again, if we only have ASCII, now ...
9786 28
        if (self::is_ascii($str) === true) {
9787 10
            return $str;
9788
        }
9789
9790 19
        if ($strict === true) {
9791 1
            if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
9792
                self::checkForSupport();
9793
            }
9794
9795 1
            if (self::$SUPPORT['intl'] === true) {
9796
                // INFO: https://unicode.org/cldr/utility/character.jsp?a=%E2%84%8C
9797
                /** @noinspection PhpComposerExtensionStubsInspection */
9798 1
                $str = \transliterator_transliterate('NFKC; [:Nonspacing Mark:] Remove; NFKC; Any-Latin; Latin-ASCII;', $str);
9799
9800
                // check again, if we only have ASCII, now ...
9801 1
                if (self::is_ascii($str) === true) {
9802 1
                    return $str;
9803
                }
9804
            }
9805
        }
9806
9807 19
        if (self::$ORD === null) {
9808
            self::$ORD = self::getData('ord');
9809
        }
9810
9811 19
        \preg_match_all('/.{1}|[^\x00]{1,1}$/us', $str, $ar);
9812 19
        $chars = $ar[0];
9813 19
        $ord = null;
9814 19
        foreach ($chars as &$c) {
9815 19
            $ordC0 = self::$ORD[$c[0]];
9816
9817 19
            if ($ordC0 >= 0 && $ordC0 <= 127) {
9818 15
                continue;
9819
            }
9820
9821 19
            $ordC1 = self::$ORD[$c[1]];
9822
9823
            // ASCII - next please
9824 19
            if ($ordC0 >= 192 && $ordC0 <= 223) {
9825 17
                $ord = ($ordC0 - 192) * 64 + ($ordC1 - 128);
9826
            }
9827
9828 19
            if ($ordC0 >= 224) {
9829 8
                $ordC2 = self::$ORD[$c[2]];
9830
9831 8
                if ($ordC0 <= 239) {
9832 7
                    $ord = ($ordC0 - 224) * 4096 + ($ordC1 - 128) * 64 + ($ordC2 - 128);
9833
                }
9834
9835 8
                if ($ordC0 >= 240) {
9836 2
                    $ordC3 = self::$ORD[$c[3]];
9837
9838 2
                    if ($ordC0 <= 247) {
9839 2
                        $ord = ($ordC0 - 240) * 262144 + ($ordC1 - 128) * 4096 + ($ordC2 - 128) * 64 + ($ordC3 - 128);
9840
                    }
9841
9842 2
                    if ($ordC0 >= 248) {
9843
                        $ordC4 = self::$ORD[$c[4]];
9844
9845
                        if ($ordC0 <= 251) {
9846
                            $ord = ($ordC0 - 248) * 16777216 + ($ordC1 - 128) * 262144 + ($ordC2 - 128) * 4096 + ($ordC3 - 128) * 64 + ($ordC4 - 128);
9847
                        }
9848
9849
                        if ($ordC0 >= 252) {
9850
                            $ordC5 = self::$ORD[$c[5]];
9851
9852
                            if ($ordC0 <= 253) {
9853
                                $ord = ($ordC0 - 252) * 1073741824 + ($ordC1 - 128) * 16777216 + ($ordC2 - 128) * 262144 + ($ordC3 - 128) * 4096 + ($ordC4 - 128) * 64 + ($ordC5 - 128);
9854
                            }
9855
                        }
9856
                    }
9857
                }
9858
            }
9859
9860 19
            if ($ordC0 === 254 || $ordC0 === 255) {
9861
                $c = $unknown;
9862
9863
                continue;
9864
            }
9865
9866 19
            if ($ord === null) {
9867
                $c = $unknown;
9868
9869
                continue;
9870
            }
9871
9872 19
            $bank = $ord >> 8;
9873 19
            if (!isset($UTF8_TO_ASCII[$bank])) {
9874 9
                $UTF8_TO_ASCII[$bank] = self::getDataIfExists(\sprintf('x%02x', $bank));
9875 9
                if ($UTF8_TO_ASCII[$bank] === false) {
9876 2
                    $UTF8_TO_ASCII[$bank] = [];
9877
                }
9878
            }
9879
9880 19
            $newchar = $ord & 255;
9881
9882 19
            if (isset($UTF8_TO_ASCII[$bank][$newchar])) {
9883
9884
                // keep for debugging
9885
                /*
9886
                echo "file: " . sprintf('x%02x', $bank) . "\n";
9887
                echo "char: " . $c . "\n";
9888
                echo "ord: " . $ord . "\n";
9889
                echo "newchar: " . $newchar . "\n";
9890
                echo "ascii: " . $UTF8_TO_ASCII[$bank][$newchar] . "\n";
9891
                echo "bank:" . $bank . "\n\n";
9892
                 */
9893
9894 18
                $c = $UTF8_TO_ASCII[$bank][$newchar];
9895
            } else {
9896
9897
                // keep for debugging missing chars
9898
                /*
9899
                echo "file: " . sprintf('x%02x', $bank) . "\n";
9900
                echo "char: " . $c . "\n";
9901
                echo "ord: " . $ord . "\n";
9902
                echo "newchar: " . $newchar . "\n";
9903
                echo "bank:" . $bank . "\n\n";
9904
                 */
9905
9906 19
                $c = $unknown;
9907
            }
9908
        }
9909
9910 19
        return \implode('', $chars);
9911
    }
9912
9913
    /**
9914
     * @param mixed $str
9915
     *
9916
     * @return bool
9917
     */
9918
    public static function to_boolean($str): bool
9919
    {
9920
        // init
9921 19
        $str = (string) $str;
9922
9923 19
        if ($str === '') {
9924 2
            return false;
9925
        }
9926
9927 17
        $key = \strtolower($str);
9928
9929
        // Info: http://php.net/manual/en/filter.filters.validate.php
9930
        $map = [
9931 17
            'true'  => true,
9932
            '1'     => true,
9933
            'on'    => true,
9934
            'yes'   => true,
9935
            'false' => false,
9936
            '0'     => false,
9937
            'off'   => false,
9938
            'no'    => false,
9939
        ];
9940
9941 17
        if (isset($map[$key])) {
9942 13
            return $map[$key];
9943
        }
9944
9945
        /** @noinspection CallableParameterUseCaseInTypeContextInspection */
9946 4
        if (\is_numeric($str)) {
9947 2
            return ((float) $str + 0) > 0;
9948
        }
9949
9950 2
        return (bool) self::trim($str);
9951
    }
9952
9953
    /**
9954
     * Convert given string to safe filename (and keep string case).
9955
     *
9956
     * @param string $string
9957
     * @param bool   $use_transliterate No transliteration, conversion etc. is done by default - unsafe characters are
9958
     *                                  simply replaced with hyphen.
9959
     * @param string $fallback_char
9960
     *
9961
     * @return string
9962
     */
9963
    public static function to_filename(string $string, bool $use_transliterate = false, string $fallback_char = '-'): string
9964
    {
9965 1
        if ($use_transliterate === true) {
9966 1
            $string = self::str_transliterate($string, $fallback_char);
9967
        }
9968
9969 1
        $fallback_char_escaped = \preg_quote($fallback_char, '/');
9970
9971 1
        $string = (string) \preg_replace(
9972
            [
9973 1
                '/[^' . $fallback_char_escaped . '\.\-a-zA-Z0-9\s]/', // 1) remove un-needed chars
9974 1
                '/[\s]+/',                                            // 2) convert spaces to $fallback_char
9975 1
                '/[' . $fallback_char_escaped . ']+/',                // 3) remove double $fallback_char's
9976
            ],
9977
            [
9978 1
                '',
9979 1
                $fallback_char,
9980 1
                $fallback_char,
9981
            ],
9982 1
            $string
9983
        );
9984
9985
        // trim "$fallback_char" from beginning and end of the string
9986 1
        return \trim($string, $fallback_char);
9987
    }
9988
9989
    /**
9990
     * Convert a string into "ISO-8859"-encoding (Latin-1).
9991
     *
9992
     * @param string|string[] $str
9993
     *
9994
     * @return string|string[]
9995
     */
9996
    public static function to_iso8859($str)
9997
    {
9998 7
        if (\is_array($str) === true) {
9999 2
            foreach ($str as $k => $v) {
10000 2
                $str[$k] = self::to_iso8859($v);
10001
            }
10002
10003 2
            return $str;
10004
        }
10005
10006 7
        $str = (string) $str;
10007 7
        if ($str === '') {
10008 2
            return '';
10009
        }
10010
10011 7
        return self::utf8_decode($str);
10012
    }
10013
10014
    /**
10015
     * alias for "UTF8::to_iso8859()"
10016
     *
10017
     * @see UTF8::to_iso8859()
10018
     *
10019
     * @param string|string[] $str
10020
     *
10021
     * @return string|string[]
10022
     */
10023
    public static function to_latin1($str)
10024
    {
10025 2
        return self::to_iso8859($str);
10026
    }
10027
10028
    /**
10029
     * This function leaves UTF-8 characters alone, while converting almost all non-UTF8 to UTF8.
10030
     *
10031
     * <ul>
10032
     * <li>It decode UTF-8 codepoints and unicode escape sequences.</li>
10033
     * <li>It assumes that the encoding of the original string is either WINDOWS-1252 or ISO-8859.</li>
10034
     * <li>WARNING: It does not remove invalid UTF-8 characters, so you maybe need to use "UTF8::clean()" for this
10035
     * case.</li>
10036
     * </ul>
10037
     *
10038
     * @param string|string[] $str                    <p>Any string or array.</p>
10039
     * @param bool            $decodeHtmlEntityToUtf8 <p>Set to true, if you need to decode html-entities.</p>
10040
     *
10041
     * @return string|string[] the UTF-8 encoded string
10042
     */
10043
    public static function to_utf8($str, bool $decodeHtmlEntityToUtf8 = false)
10044
    {
10045 37
        if (\is_array($str) === true) {
10046 4
            foreach ($str as $k => $v) {
10047 4
                $str[$k] = self::to_utf8($v, $decodeHtmlEntityToUtf8);
10048
            }
10049
10050 4
            return $str;
10051
        }
10052
10053 37
        $str = (string) $str;
10054 37
        if ($str === '') {
10055 6
            return $str;
10056
        }
10057
10058 37
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
10059
            self::checkForSupport();
10060
        }
10061
10062 37
        $max = self::strlen_in_byte($str);
10063 37
        $buf = '';
10064
10065
        /** @noinspection ForeachInvariantsInspection */
10066 37
        for ($i = 0; $i < $max; ++$i) {
10067 37
            $c1 = $str[$i];
10068
10069 37
            if ($c1 >= "\xC0") { // should be converted to UTF8, if it's not UTF8 already
10070
10071 34
                if ($c1 <= "\xDF") { // looks like 2 bytes UTF8
10072
10073 31
                    $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
10074
10075 31
                    if ($c2 >= "\x80" && $c2 <= "\xBF") { // yeah, almost sure it's UTF8 already
10076 17
                        $buf .= $c1 . $c2;
10077 17
                        ++$i;
10078
                    } else { // not valid UTF8 - convert it
10079 31
                        $buf .= self::to_utf8_convert_helper($c1);
10080
                    }
10081 34
                } elseif ($c1 >= "\xE0" && $c1 <= "\xEF") { // looks like 3 bytes UTF8
10082
10083 32
                    $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
10084 32
                    $c3 = $i + 2 >= $max ? "\x00" : $str[$i + 2];
10085
10086 32
                    if ($c2 >= "\x80" && $c2 <= "\xBF" && $c3 >= "\x80" && $c3 <= "\xBF") { // yeah, almost sure it's UTF8 already
10087 14
                        $buf .= $c1 . $c2 . $c3;
10088 14
                        $i += 2;
10089
                    } else { // not valid UTF8 - convert it
10090 32
                        $buf .= self::to_utf8_convert_helper($c1);
10091
                    }
10092 26
                } elseif ($c1 >= "\xF0" && $c1 <= "\xF7") { // looks like 4 bytes UTF8
10093
10094 26
                    $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
10095 26
                    $c3 = $i + 2 >= $max ? "\x00" : $str[$i + 2];
10096 26
                    $c4 = $i + 3 >= $max ? "\x00" : $str[$i + 3];
10097
10098 26
                    if ($c2 >= "\x80" && $c2 <= "\xBF" && $c3 >= "\x80" && $c3 <= "\xBF" && $c4 >= "\x80" && $c4 <= "\xBF") { // yeah, almost sure it's UTF8 already
10099 8
                        $buf .= $c1 . $c2 . $c3 . $c4;
10100 8
                        $i += 3;
10101
                    } else { // not valid UTF8 - convert it
10102 26
                        $buf .= self::to_utf8_convert_helper($c1);
10103
                    }
10104
                } else { // doesn't look like UTF8, but should be converted
10105 34
                    $buf .= self::to_utf8_convert_helper($c1);
10106
                }
10107 34
            } elseif (($c1 & "\xC0") === "\x80") { // needs conversion
10108
10109 4
                $buf .= self::to_utf8_convert_helper($c1);
10110
            } else { // it doesn't need conversion
10111 34
                $buf .= $c1;
10112
            }
10113
        }
10114
10115
        // decode unicode escape sequences
10116 37
        $buf = \preg_replace_callback(
10117 37
            '/\\\\u([0-9a-f]{4})/i',
10118
            static function ($match) {
10119
                // always fallback via symfony polyfill
10120 8
                return \mb_convert_encoding(\pack('H*', $match[1]), 'UTF-8', 'UCS-2BE');
10121 37
            },
10122 37
            $buf
10123
        );
10124
10125
        // decode UTF-8 codepoints
10126 37
        if ($decodeHtmlEntityToUtf8 === true) {
10127 2
            $buf = self::html_entity_decode($buf);
10128
        }
10129
10130 37
        return $buf;
10131
    }
10132
10133
    /**
10134
     * Strip whitespace or other characters from beginning or end of a UTF-8 string.
10135
     *
10136
     * INFO: This is slower then "trim()"
10137
     *
10138
     * We can only use the original-function, if we use <= 7-Bit in the string / chars
10139
     * but the check for ACSII (7-Bit) cost more time, then we can safe here.
10140
     *
10141
     * @param string $str   <p>The string to be trimmed</p>
10142
     * @param mixed  $chars [optional] <p>Optional characters to be stripped</p>
10143
     *
10144
     * @return string the trimmed string
10145
     */
10146
    public static function trim(string $str = '', $chars = \INF): string
10147
    {
10148 214
        if ($str === '') {
10149 11
            return '';
10150
        }
10151
10152
        // Info: http://nadeausoftware.com/articles/2007/9/php_tip_how_strip_punctuation_characters_web_page#Unicodecharactercategories
10153 206
        if ($chars === \INF || !$chars) {
10154 179
            $pattern = "^[\pZ\pC]+|[\pZ\pC]+\$";
10155
        } else {
10156 47
            $chars = \preg_quote($chars, '/');
10157 47
            $pattern = "^[${chars}]+|[${chars}]+\$";
10158
        }
10159
10160 206
        return self::regex_replace($str, $pattern, '', '', '/');
10161
    }
10162
10163
    /**
10164
     * Makes string's first char uppercase.
10165
     *
10166
     * @param string      $str                   <p>The input string.</p>
10167
     * @param string      $encoding              [optional] <p>Set the charset for e.g. "mb_" function</p>
10168
     * @param bool        $cleanUtf8             [optional] <p>Remove non UTF-8 chars from the string.</p>
10169
     * @param string|null $lang                  [optional] <p>Set the language for special cases: az, el, lt, tr</p>
10170
     * @param bool        $tryToKeepStringLength [optional] <p>true === try to keep the string length: e.g. ẞ -> ß</p>
10171
     *
10172
     * @return string the resulting string
10173
     */
10174
    public static function ucfirst(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false, string $lang = null, bool $tryToKeepStringLength = false): string
10175
    {
10176 79
        if ($cleanUtf8 === true) {
10177
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
10178
            // if invalid characters are found in $haystack before $needle
10179 1
            $str = self::clean($str);
10180
        }
10181
10182 79
        $strPartTwo = self::substr($str, 1, null, $encoding);
10183 79
        if ($strPartTwo === false) {
10184
            $strPartTwo = '';
10185
        }
10186
10187 79
        $strPartOne = self::strtoupper(
10188 79
            (string) self::substr($str, 0, 1, $encoding),
10189 79
            $encoding,
10190 79
            $cleanUtf8,
10191 79
            $lang,
10192 79
            $tryToKeepStringLength
10193
        );
10194
10195 79
        return $strPartOne . $strPartTwo;
10196
    }
10197
10198
    /**
10199
     * alias for "UTF8::ucfirst()"
10200
     *
10201
     * @see UTF8::ucfirst()
10202
     *
10203
     * @param string $str
10204
     * @param string $encoding
10205
     * @param bool   $cleanUtf8
10206
     *
10207
     * @return string
10208
     */
10209
    public static function ucword(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false): string
10210
    {
10211 1
        return self::ucfirst($str, $encoding, $cleanUtf8);
10212
    }
10213
10214
    /**
10215
     * Uppercase for all words in the string.
10216
     *
10217
     * @param string   $str        <p>The input string.</p>
10218
     * @param string[] $exceptions [optional] <p>Exclusion for some words.</p>
10219
     * @param string   $charlist   [optional] <p>Additional chars that contains to words and do not start a new
10220
     *                             word.</p>
10221
     * @param string   $encoding   [optional] <p>Set the charset.</p>
10222
     * @param bool     $cleanUtf8  [optional] <p>Remove non UTF-8 chars from the string.</p>
10223
     *
10224
     * @return string
10225
     */
10226
    public static function ucwords(string $str, array $exceptions = [], string $charlist = '', string $encoding = 'UTF-8', bool $cleanUtf8 = false): string
10227
    {
10228 8
        if (!$str) {
10229 2
            return '';
10230
        }
10231
10232
        // INFO: mb_convert_case($str, MB_CASE_TITLE);
10233
        // -> MB_CASE_TITLE didn't only uppercase the first letter, it also lowercase all other letters
10234
10235 7
        if ($cleanUtf8 === true) {
10236
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
10237
            // if invalid characters are found in $haystack before $needle
10238 1
            $str = self::clean($str);
10239
        }
10240
10241 7
        $usePhpDefaultFunctions = !(bool) ($charlist . \implode('', $exceptions));
10242
10243
        if (
10244 7
            $usePhpDefaultFunctions === true
10245
            &&
10246 7
            self::is_ascii($str) === true
10247
        ) {
10248
            return \ucwords($str);
10249
        }
10250
10251 7
        $words = self::str_to_words($str, $charlist);
10252 7
        $newWords = [];
10253
10254 7
        $useExceptions = \count($exceptions) > 0;
10255
10256 7
        foreach ($words as $word) {
10257 7
            if (!$word) {
10258 7
                continue;
10259
            }
10260
10261
            if (
10262 7
                $useExceptions === false
10263
                ||
10264
                (
10265 1
                    $useExceptions === true
10266
                    &&
10267 7
                    !\in_array($word, $exceptions, true)
10268
                )
10269
            ) {
10270 7
                $word = self::ucfirst($word, $encoding);
10271
            }
10272
10273 7
            $newWords[] = $word;
10274
        }
10275
10276 7
        return \implode('', $newWords);
10277
    }
10278
10279
    /**
10280
     * Multi decode html entity & fix urlencoded-win1252-chars.
10281
     *
10282
     * e.g:
10283
     * 'test+test'                     => 'test test'
10284
     * 'D&#252;sseldorf'               => 'Düsseldorf'
10285
     * 'D%FCsseldorf'                  => 'Düsseldorf'
10286
     * 'D&#xFC;sseldorf'               => 'Düsseldorf'
10287
     * 'D%26%23xFC%3Bsseldorf'         => 'Düsseldorf'
10288
     * 'Düsseldorf'                   => 'Düsseldorf'
10289
     * 'D%C3%BCsseldorf'               => 'Düsseldorf'
10290
     * 'D%C3%83%C2%BCsseldorf'         => 'Düsseldorf'
10291
     * 'D%25C3%2583%25C2%25BCsseldorf' => 'Düsseldorf'
10292
     *
10293
     * @param string $str          <p>The input string.</p>
10294
     * @param bool   $multi_decode <p>Decode as often as possible.</p>
10295
     *
10296
     * @return string
10297
     */
10298
    public static function urldecode(string $str, bool $multi_decode = true): string
10299
    {
10300 2
        if ($str === '') {
10301 2
            return '';
10302
        }
10303
10304 2
        $pattern = '/%u([0-9a-f]{3,4})/i';
10305 2
        if (\preg_match($pattern, $str)) {
10306 2
            $str = (string) \preg_replace($pattern, '&#x\\1;', \urldecode($str));
10307
        }
10308
10309 2
        $flags = \ENT_QUOTES | \ENT_HTML5;
10310
10311
        do {
10312 2
            $str_compare = $str;
10313
10314 2
            $str = self::fix_simple_utf8(
10315 2
                \urldecode(
10316 2
                    self::html_entity_decode(
10317 2
                        self::to_utf8($str),
10318 2
                        $flags
10319
                    )
10320
                )
10321
            );
10322 2
        } while ($multi_decode === true && $str_compare !== $str);
10323
10324 2
        return $str;
10325
    }
10326
10327
    /**
10328
     * Return a array with "urlencoded"-win1252 -> UTF-8
10329
     *
10330
     * @deprecated <p>use the "UTF8::urldecode()" function to decode a string</p>
10331
     *
10332
     * @return string[]
10333
     */
10334
    public static function urldecode_fix_win1252_chars(): array
10335
    {
10336
        return [
10337 2
            '%20' => ' ',
10338
            '%21' => '!',
10339
            '%22' => '"',
10340
            '%23' => '#',
10341
            '%24' => '$',
10342
            '%25' => '%',
10343
            '%26' => '&',
10344
            '%27' => "'",
10345
            '%28' => '(',
10346
            '%29' => ')',
10347
            '%2A' => '*',
10348
            '%2B' => '+',
10349
            '%2C' => ',',
10350
            '%2D' => '-',
10351
            '%2E' => '.',
10352
            '%2F' => '/',
10353
            '%30' => '0',
10354
            '%31' => '1',
10355
            '%32' => '2',
10356
            '%33' => '3',
10357
            '%34' => '4',
10358
            '%35' => '5',
10359
            '%36' => '6',
10360
            '%37' => '7',
10361
            '%38' => '8',
10362
            '%39' => '9',
10363
            '%3A' => ':',
10364
            '%3B' => ';',
10365
            '%3C' => '<',
10366
            '%3D' => '=',
10367
            '%3E' => '>',
10368
            '%3F' => '?',
10369
            '%40' => '@',
10370
            '%41' => 'A',
10371
            '%42' => 'B',
10372
            '%43' => 'C',
10373
            '%44' => 'D',
10374
            '%45' => 'E',
10375
            '%46' => 'F',
10376
            '%47' => 'G',
10377
            '%48' => 'H',
10378
            '%49' => 'I',
10379
            '%4A' => 'J',
10380
            '%4B' => 'K',
10381
            '%4C' => 'L',
10382
            '%4D' => 'M',
10383
            '%4E' => 'N',
10384
            '%4F' => 'O',
10385
            '%50' => 'P',
10386
            '%51' => 'Q',
10387
            '%52' => 'R',
10388
            '%53' => 'S',
10389
            '%54' => 'T',
10390
            '%55' => 'U',
10391
            '%56' => 'V',
10392
            '%57' => 'W',
10393
            '%58' => 'X',
10394
            '%59' => 'Y',
10395
            '%5A' => 'Z',
10396
            '%5B' => '[',
10397
            '%5C' => '\\',
10398
            '%5D' => ']',
10399
            '%5E' => '^',
10400
            '%5F' => '_',
10401
            '%60' => '`',
10402
            '%61' => 'a',
10403
            '%62' => 'b',
10404
            '%63' => 'c',
10405
            '%64' => 'd',
10406
            '%65' => 'e',
10407
            '%66' => 'f',
10408
            '%67' => 'g',
10409
            '%68' => 'h',
10410
            '%69' => 'i',
10411
            '%6A' => 'j',
10412
            '%6B' => 'k',
10413
            '%6C' => 'l',
10414
            '%6D' => 'm',
10415
            '%6E' => 'n',
10416
            '%6F' => 'o',
10417
            '%70' => 'p',
10418
            '%71' => 'q',
10419
            '%72' => 'r',
10420
            '%73' => 's',
10421
            '%74' => 't',
10422
            '%75' => 'u',
10423
            '%76' => 'v',
10424
            '%77' => 'w',
10425
            '%78' => 'x',
10426
            '%79' => 'y',
10427
            '%7A' => 'z',
10428
            '%7B' => '{',
10429
            '%7C' => '|',
10430
            '%7D' => '}',
10431
            '%7E' => '~',
10432
            '%7F' => '',
10433
            '%80' => '`',
10434
            '%81' => '',
10435
            '%82' => '‚',
10436
            '%83' => 'ƒ',
10437
            '%84' => '„',
10438
            '%85' => '…',
10439
            '%86' => '†',
10440
            '%87' => '‡',
10441
            '%88' => 'ˆ',
10442
            '%89' => '‰',
10443
            '%8A' => 'Š',
10444
            '%8B' => '‹',
10445
            '%8C' => 'Œ',
10446
            '%8D' => '',
10447
            '%8E' => 'Ž',
10448
            '%8F' => '',
10449
            '%90' => '',
10450
            '%91' => '‘',
10451
            '%92' => '’',
10452
            '%93' => '“',
10453
            '%94' => '”',
10454
            '%95' => '•',
10455
            '%96' => '–',
10456
            '%97' => '—',
10457
            '%98' => '˜',
10458
            '%99' => '™',
10459
            '%9A' => 'š',
10460
            '%9B' => '›',
10461
            '%9C' => 'œ',
10462
            '%9D' => '',
10463
            '%9E' => 'ž',
10464
            '%9F' => 'Ÿ',
10465
            '%A0' => '',
10466
            '%A1' => '¡',
10467
            '%A2' => '¢',
10468
            '%A3' => '£',
10469
            '%A4' => '¤',
10470
            '%A5' => '¥',
10471
            '%A6' => '¦',
10472
            '%A7' => '§',
10473
            '%A8' => '¨',
10474
            '%A9' => '©',
10475
            '%AA' => 'ª',
10476
            '%AB' => '«',
10477
            '%AC' => '¬',
10478
            '%AD' => '',
10479
            '%AE' => '®',
10480
            '%AF' => '¯',
10481
            '%B0' => '°',
10482
            '%B1' => '±',
10483
            '%B2' => '²',
10484
            '%B3' => '³',
10485
            '%B4' => '´',
10486
            '%B5' => 'µ',
10487
            '%B6' => '¶',
10488
            '%B7' => '·',
10489
            '%B8' => '¸',
10490
            '%B9' => '¹',
10491
            '%BA' => 'º',
10492
            '%BB' => '»',
10493
            '%BC' => '¼',
10494
            '%BD' => '½',
10495
            '%BE' => '¾',
10496
            '%BF' => '¿',
10497
            '%C0' => 'À',
10498
            '%C1' => 'Á',
10499
            '%C2' => 'Â',
10500
            '%C3' => 'Ã',
10501
            '%C4' => 'Ä',
10502
            '%C5' => 'Å',
10503
            '%C6' => 'Æ',
10504
            '%C7' => 'Ç',
10505
            '%C8' => 'È',
10506
            '%C9' => 'É',
10507
            '%CA' => 'Ê',
10508
            '%CB' => 'Ë',
10509
            '%CC' => 'Ì',
10510
            '%CD' => 'Í',
10511
            '%CE' => 'Î',
10512
            '%CF' => 'Ï',
10513
            '%D0' => 'Ð',
10514
            '%D1' => 'Ñ',
10515
            '%D2' => 'Ò',
10516
            '%D3' => 'Ó',
10517
            '%D4' => 'Ô',
10518
            '%D5' => 'Õ',
10519
            '%D6' => 'Ö',
10520
            '%D7' => '×',
10521
            '%D8' => 'Ø',
10522
            '%D9' => 'Ù',
10523
            '%DA' => 'Ú',
10524
            '%DB' => 'Û',
10525
            '%DC' => 'Ü',
10526
            '%DD' => 'Ý',
10527
            '%DE' => 'Þ',
10528
            '%DF' => 'ß',
10529
            '%E0' => 'à',
10530
            '%E1' => 'á',
10531
            '%E2' => 'â',
10532
            '%E3' => 'ã',
10533
            '%E4' => 'ä',
10534
            '%E5' => 'å',
10535
            '%E6' => 'æ',
10536
            '%E7' => 'ç',
10537
            '%E8' => 'è',
10538
            '%E9' => 'é',
10539
            '%EA' => 'ê',
10540
            '%EB' => 'ë',
10541
            '%EC' => 'ì',
10542
            '%ED' => 'í',
10543
            '%EE' => 'î',
10544
            '%EF' => 'ï',
10545
            '%F0' => 'ð',
10546
            '%F1' => 'ñ',
10547
            '%F2' => 'ò',
10548
            '%F3' => 'ó',
10549
            '%F4' => 'ô',
10550
            '%F5' => 'õ',
10551
            '%F6' => 'ö',
10552
            '%F7' => '÷',
10553
            '%F8' => 'ø',
10554
            '%F9' => 'ù',
10555
            '%FA' => 'ú',
10556
            '%FB' => 'û',
10557
            '%FC' => 'ü',
10558
            '%FD' => 'ý',
10559
            '%FE' => 'þ',
10560
            '%FF' => 'ÿ',
10561
        ];
10562
    }
10563
10564
    /**
10565
     * Decodes an UTF-8 string to ISO-8859-1.
10566
     *
10567
     * @param string $str           <p>The input string.</p>
10568
     * @param bool   $keepUtf8Chars
10569
     *
10570
     * @return string
10571
     */
10572
    public static function utf8_decode(string $str, bool $keepUtf8Chars = false): string
10573
    {
10574 13
        if ($str === '') {
10575 5
            return '';
10576
        }
10577
10578 13
        static $UTF8_TO_WIN1252_KEYS_CACHE = null;
10579 13
        static $UTF8_TO_WIN1252_VALUES_CACHE = null;
10580
10581 13
        if ($UTF8_TO_WIN1252_KEYS_CACHE === null) {
10582 1
            if (self::$WIN1252_TO_UTF8 === null) {
10583
                self::$WIN1252_TO_UTF8 = self::getData('win1252_to_utf8');
10584
            }
10585
10586 1
            $UTF8_TO_WIN1252_KEYS_CACHE = \array_keys(self::$WIN1252_TO_UTF8);
10587 1
            $UTF8_TO_WIN1252_VALUES_CACHE = \array_values(self::$WIN1252_TO_UTF8);
10588
        }
10589
10590
        /** @noinspection PhpInternalEntityUsedInspection */
10591 13
        $str = \str_replace($UTF8_TO_WIN1252_KEYS_CACHE, $UTF8_TO_WIN1252_VALUES_CACHE, $str);
10592
10593 13
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
10594
            self::checkForSupport();
10595
        }
10596
10597
        // save for later comparision
10598 13
        $str_backup = $str;
10599 13
        $len = self::strlen_in_byte($str);
10600
10601 13
        if (self::$ORD === null) {
10602
            self::$ORD = self::getData('ord');
10603
        }
10604
10605 13
        if (self::$CHR === null) {
10606
            self::$CHR = self::getData('chr');
10607
        }
10608
10609 13
        $noCharFound = '?';
10610
        /** @noinspection ForeachInvariantsInspection */
10611 13
        for ($i = 0, $j = 0; $i < $len; ++$i, ++$j) {
10612 13
            switch ($str[$i] & "\xF0") {
10613 13
                case "\xC0":
10614 12
                case "\xD0":
10615 13
                    $c = (self::$ORD[$str[$i] & "\x1F"] << 6) | self::$ORD[$str[++$i] & "\x3F"];
10616 13
                    $str[$j] = $c < 256 ? self::$CHR[$c] : $noCharFound;
10617
10618 13
                    break;
10619
10620
                /** @noinspection PhpMissingBreakStatementInspection */
10621 12
                case "\xF0":
10622
                    ++$i;
10623
                // no break
10624 12
                case "\xE0":
10625 10
                    $str[$j] = $noCharFound;
10626 10
                    $i += 2;
10627
10628 10
                    break;
10629
10630
                default:
10631 12
                    $str[$j] = $str[$i];
10632
            }
10633
        }
10634
10635 13
        $return = self::substr_in_byte($str, 0, $j);
10636 13
        if ($return === false) {
0 ignored issues
show
introduced by
The condition $return === false is always false.
Loading history...
10637
            $return = '';
10638
        }
10639
10640
        if (
10641 13
            $keepUtf8Chars === true
10642
            &&
10643 13
            self::strlen($return) >= self::strlen($str_backup)
10644
        ) {
10645 2
            return $str_backup;
10646
        }
10647
10648 13
        return $return;
10649
    }
10650
10651
    /**
10652
     * Encodes an ISO-8859-1 string to UTF-8.
10653
     *
10654
     * @param string $str <p>The input string.</p>
10655
     *
10656
     * @return string
10657
     */
10658
    public static function utf8_encode(string $str): string
10659
    {
10660 14
        if ($str === '') {
10661 13
            return '';
10662
        }
10663
10664 14
        $str = \utf8_encode($str);
10665
10666
        // the polyfill maybe return false
10667
        /** @noinspection CallableParameterUseCaseInTypeContextInspection */
10668 14
        if ($str === false) {
10669
            return '';
10670
        }
10671
10672 14
        if (\strpos($str, "\xC2") === false) {
10673 6
            return $str;
10674
        }
10675
10676 12
        static $WIN1252_TO_UTF8_KEYS_CACHE = null;
10677 12
        static $WIN1252_TO_UTF8_VALUES_CACHE = null;
10678
10679 12
        if ($WIN1252_TO_UTF8_KEYS_CACHE === null) {
10680 1
            if (self::$WIN1252_TO_UTF8 === null) {
10681
                self::$WIN1252_TO_UTF8 = self::getData('win1252_to_utf8');
10682
            }
10683
10684 1
            $WIN1252_TO_UTF8_KEYS_CACHE = \array_keys(self::$WIN1252_TO_UTF8);
10685 1
            $WIN1252_TO_UTF8_VALUES_CACHE = \array_values(self::$WIN1252_TO_UTF8);
10686
        }
10687
10688 12
        return \str_replace($WIN1252_TO_UTF8_KEYS_CACHE, $WIN1252_TO_UTF8_VALUES_CACHE, $str);
10689
    }
10690
10691
    /**
10692
     * fix -> utf8-win1252 chars
10693
     *
10694
     * @param string $str <p>The input string.</p>
10695
     *
10696
     * @return string
10697
     *
10698
     * @deprecated <p>use "UTF8::fix_simple_utf8()"</p>
10699
     */
10700
    public static function utf8_fix_win1252_chars(string $str): string
10701
    {
10702 2
        return self::fix_simple_utf8($str);
10703
    }
10704
10705
    /**
10706
     * Returns an array with all utf8 whitespace characters.
10707
     *
10708
     * @see   : http://www.bogofilter.org/pipermail/bogofilter/2003-March/001889.html
10709
     *
10710
     * @author: Derek E. [email protected]
10711
     *
10712
     * @return string[]
10713
     *                  An array with all known whitespace characters as values and the type of whitespace as keys
10714
     *                  as defined in above URL
10715
     */
10716
    public static function whitespace_table(): array
10717
    {
10718 2
        return self::$WHITESPACE_TABLE;
10719
    }
10720
10721
    /**
10722
     * Limit the number of words in a string.
10723
     *
10724
     * @param string $str      <p>The input string.</p>
10725
     * @param int    $limit    <p>The limit of words as integer.</p>
10726
     * @param string $strAddOn <p>Replacement for the striped string.</p>
10727
     *
10728
     * @return string
10729
     */
10730
    public static function words_limit(string $str, int $limit = 100, string $strAddOn = '…'): string
10731
    {
10732 2
        if ($str === '') {
10733 2
            return '';
10734
        }
10735
10736 2
        if ($limit < 1) {
10737 2
            return '';
10738
        }
10739
10740 2
        \preg_match('/^\s*+(?:\S++\s*+){1,' . $limit . '}/u', $str, $matches);
10741
10742
        if (
10743 2
            !isset($matches[0])
10744
            ||
10745 2
            self::strlen($str) === self::strlen($matches[0])
10746
        ) {
10747 2
            return $str;
10748
        }
10749
10750 2
        return self::rtrim($matches[0]) . $strAddOn;
10751
    }
10752
10753
    /**
10754
     * Wraps a string to a given number of characters
10755
     *
10756
     * @see  http://php.net/manual/en/function.wordwrap.php
10757
     *
10758
     * @param string $str   <p>The input string.</p>
10759
     * @param int    $width [optional] <p>The column width.</p>
10760
     * @param string $break [optional] <p>The line is broken using the optional break parameter.</p>
10761
     * @param bool   $cut   [optional] <p>
10762
     *                      If the cut is set to true, the string is
10763
     *                      always wrapped at or before the specified width. So if you have
10764
     *                      a word that is larger than the given width, it is broken apart.
10765
     *                      </p>
10766
     *
10767
     * @return string the given string wrapped at the specified column
10768
     */
10769
    public static function wordwrap(string $str, int $width = 75, string $break = "\n", bool $cut = false): string
10770
    {
10771 10
        if ($str === '' || $break === '') {
10772 3
            return '';
10773
        }
10774
10775 8
        $w = '';
10776 8
        $strSplit = \explode($break, $str);
10777 8
        $count = $strSplit === false
10778
            ? 0
10779 8
            : \count($strSplit);
10780
10781 8
        $chars = [];
10782
        /** @noinspection ForeachInvariantsInspection */
10783 8
        for ($i = 0; $i < $count; ++$i) {
10784 8
            if ($i) {
10785 1
                $chars[] = $break;
10786 1
                $w .= '#';
10787
            }
10788
10789 8
            $c = $strSplit[$i];
10790 8
            unset($strSplit[$i]);
10791
10792 8
            if ($c !== null) {
10793 8
                foreach (self::split($c) as $c) {
10794 8
                    $chars[] = $c;
10795 8
                    $w .= $c === ' ' ? ' ' : '?';
10796
                }
10797
            }
10798
        }
10799
10800 8
        $strReturn = '';
10801 8
        $j = 0;
10802 8
        $b = $i = -1;
10803 8
        $w = \wordwrap($w, $width, '#', $cut);
10804
10805 8
        while (false !== $b = self::strpos($w, '#', $b + 1)) {
10806 6
            for (++$i; $i < $b; ++$i) {
10807 6
                $strReturn .= $chars[$j];
10808 6
                unset($chars[$j++]);
10809
            }
10810
10811 6
            if ($break === $chars[$j] || $chars[$j] === ' ') {
10812 3
                unset($chars[$j++]);
10813
            }
10814
10815 6
            $strReturn .= $break;
10816
        }
10817
10818 8
        return $strReturn . \implode('', $chars);
10819
    }
10820
10821
    /**
10822
     * Line-Wrap the string after $limit, but also after the next word.
10823
     *
10824
     * @param string $str
10825
     * @param int    $limit
10826
     *
10827
     * @return string
10828
     */
10829
    public static function wordwrap_per_line(string $str, int $limit): string
10830
    {
10831 1
        $strings = (array) \preg_split('/\\r\\n|\\r|\\n/', $str);
10832
10833 1
        $string = '';
10834 1
        foreach ($strings as $value) {
10835 1
            if ($value === false) {
10836
                continue;
10837
            }
10838
10839 1
            $string .= \wordwrap($value, $limit);
10840 1
            $string .= "\n";
10841
        }
10842
10843 1
        return $string;
10844
    }
10845
10846
    /**
10847
     * Returns an array of Unicode White Space characters.
10848
     *
10849
     * @return string[] an array with numeric code point as key and White Space Character as value
10850
     */
10851
    public static function ws(): array
10852
    {
10853 2
        return self::$WHITESPACE;
10854
    }
10855
10856
    /**
10857
     * Adds the specified amount of left and right padding to the given string.
10858
     * The default character used is a space.
10859
     *
10860
     * @param string $str
10861
     * @param int    $left     [optional] <p>Length of left padding. Default: 0</p>
10862
     * @param int    $right    [optional] <p>Length of right padding. Default: 0</p>
10863
     * @param string $padStr   [optional] <p>String used to pad. Default: ' '</p>
10864
     * @param string $encoding [optional] <p>Default: UTF-8</p>
10865
     *
10866
     * @return string string with padding applied
10867
     */
10868
    private static function apply_padding(string $str, int $left = 0, int $right = 0, string $padStr = ' ', string $encoding): string
10869
    {
10870 25
        $strlen = self::strlen($str, $encoding);
10871
10872 25
        if ($left && $right) {
10873 8
            $length = ($left + $right) + $strlen;
10874 8
            $type = \STR_PAD_BOTH;
10875 17
        } elseif ($left) {
10876 7
            $length = $left + $strlen;
10877 7
            $type = \STR_PAD_LEFT;
10878 10
        } elseif ($right) {
10879 10
            $length = $right + $strlen;
10880 10
            $type = \STR_PAD_RIGHT;
10881
        } else {
10882
            $length = ($left + $right) + $strlen;
10883
            $type = \STR_PAD_BOTH;
10884
        }
10885
10886 25
        return self::str_pad($str, $length, $padStr, $type, $encoding);
10887
    }
10888
10889
    /**
10890
     * @param string $str
10891
     * @param bool   $useLower     <p>Use uppercase by default, otherwise use lowecase.</p>
10892
     * @param bool   $fullCaseFold <p>Convert not only common cases.</p>
10893
     *
10894
     * @return string
10895
     */
10896
    private static function fixStrCaseHelper(string $str, $useLower = false, $fullCaseFold = false): string
10897
    {
10898 54
        $upper = self::$COMMON_CASE_FOLD['upper'];
10899 54
        $lower = self::$COMMON_CASE_FOLD['lower'];
10900
10901 54
        if ($useLower === true) {
10902 2
            $str = (string) \str_replace(
10903 2
                $upper,
10904 2
                $lower,
10905 2
                $str
10906
            );
10907
        } else {
10908 52
            $str = (string) \str_replace(
10909 52
                $lower,
10910 52
                $upper,
10911 52
                $str
10912
            );
10913
        }
10914
10915 54
        if ($fullCaseFold) {
10916 52
            static $FULL_CASE_FOLD = null;
10917 52
            if ($FULL_CASE_FOLD === null) {
10918 1
                $FULL_CASE_FOLD = self::getData('caseFolding_full');
10919
            }
10920
10921 52
            if ($useLower === true) {
10922 2
                $str = (string) \str_replace($FULL_CASE_FOLD[0], $FULL_CASE_FOLD[1], $str);
10923
            } else {
10924 50
                $str = (string) \str_replace($FULL_CASE_FOLD[1], $FULL_CASE_FOLD[0], $str);
10925
            }
10926
        }
10927
10928 54
        return $str;
10929
    }
10930
10931
    /**
10932
     * get data from "/data/*.php"
10933
     *
10934
     * @param string $file
10935
     *
10936
     * @return mixed
10937
     */
10938
    private static function getData(string $file)
10939
    {
10940
        /** @noinspection PhpIncludeInspection */
10941 5
        return include __DIR__ . '/data/' . $file . '.php';
10942
    }
10943
10944
    /**
10945
     * get data from "/data/*.php"
10946
     *
10947
     * @param string $file
10948
     *
10949
     * @return false|mixed will return false on error
10950
     */
10951
    private static function getDataIfExists(string $file)
10952
    {
10953 9
        $file = __DIR__ . '/data/' . $file . '.php';
10954 9
        if (\file_exists($file)) {
10955
            /** @noinspection PhpIncludeInspection */
10956 8
            return include $file;
10957
        }
10958
10959 2
        return false;
10960
    }
10961
10962
    /**
10963
     * Checks whether mbstring "overloaded" is active on the server.
10964
     *
10965
     * @return bool
10966
     */
10967
    private static function mbstring_overloaded(): bool
10968
    {
10969
        /**
10970
         * INI directive 'mbstring.func_overload' is deprecated since PHP 7.2
10971
         */
10972
10973
        /** @noinspection PhpComposerExtensionStubsInspection */
10974
        /** @noinspection PhpUsageOfSilenceOperatorInspection */
10975
        return \defined('MB_OVERLOAD_STRING')
10976
               &&
10977
               (@\ini_get('mbstring.func_overload') & \MB_OVERLOAD_STRING);
10978
    }
10979
10980
    /**
10981
     * @param array $strings
10982
     * @param bool  $removeEmptyValues
10983
     * @param int   $removeShortValues
10984
     *
10985
     * @return array
10986
     */
10987
    private static function reduce_string_array(array $strings, bool $removeEmptyValues, int $removeShortValues = null): array
10988
    {
10989
        // init
10990 2
        $return = [];
10991
10992 2
        foreach ($strings as $str) {
10993
            if (
10994 2
                $removeShortValues !== null
10995
                &&
10996 2
                self::strlen($str) <= $removeShortValues
10997
            ) {
10998 2
                continue;
10999
            }
11000
11001
            if (
11002 2
                $removeEmptyValues === true
11003
                &&
11004 2
                \trim($str) === ''
11005
            ) {
11006 2
                continue;
11007
            }
11008
11009 2
            $return[] = $str;
11010
        }
11011
11012 2
        return $return;
11013
    }
11014
11015
    /**
11016
     * rxClass
11017
     *
11018
     * @param string $s
11019
     * @param string $class
11020
     *
11021
     * @return string
11022
     */
11023
    private static function rxClass(string $s, string $class = ''): string
11024
    {
11025 42
        static $RX_CLASSS_CACHE = [];
11026
11027 42
        $cacheKey = $s . $class;
11028
11029 42
        if (isset($RX_CLASSS_CACHE[$cacheKey])) {
11030 30
            return $RX_CLASSS_CACHE[$cacheKey];
11031
        }
11032
11033
        /** @noinspection CallableParameterUseCaseInTypeContextInspection */
11034 16
        $class = [$class];
11035
11036
        /** @noinspection SuspiciousLoopInspection */
11037 16
        foreach (self::str_split($s) as $s) {
11038 15
            if ($s === '-') {
11039
                $class[0] = '-' . $class[0];
11040 15
            } elseif (!isset($s[2])) {
11041 15
                $class[0] .= \preg_quote($s, '/');
11042 1
            } elseif (self::strlen($s) === 1) {
11043 1
                $class[0] .= $s;
11044
            } else {
11045 15
                $class[] = $s;
11046
            }
11047
        }
11048
11049 16
        if ($class[0]) {
11050 16
            $class[0] = '[' . $class[0] . ']';
11051
        }
11052
11053 16
        if (\count($class) === 1) {
11054 16
            $return = $class[0];
11055
        } else {
11056
            $return = '(?:' . \implode('|', $class) . ')';
11057
        }
11058
11059 16
        $RX_CLASSS_CACHE[$cacheKey] = $return;
11060
11061 16
        return $return;
11062
    }
11063
11064
    /**
11065
     * Personal names such as "Marcus Aurelius" are sometimes typed incorrectly using lowercase ("marcus aurelius").
11066
     *
11067
     * @param string $names
11068
     * @param string $delimiter
11069
     * @param string $encoding
11070
     *
11071
     * @return string
11072
     */
11073
    private static function str_capitalize_name_helper(string $names, string $delimiter, string $encoding = 'UTF-8'): string
11074
    {
11075
        // init
11076 1
        $namesArray = \explode($delimiter, $names);
11077
11078 1
        if ($namesArray === false) {
11079
            return '';
11080
        }
11081
11082
        $specialCases = [
11083 1
            'names' => [
11084
                'ab',
11085
                'af',
11086
                'al',
11087
                'and',
11088
                'ap',
11089
                'bint',
11090
                'binte',
11091
                'da',
11092
                'de',
11093
                'del',
11094
                'den',
11095
                'der',
11096
                'di',
11097
                'dit',
11098
                'ibn',
11099
                'la',
11100
                'mac',
11101
                'nic',
11102
                'of',
11103
                'ter',
11104
                'the',
11105
                'und',
11106
                'van',
11107
                'von',
11108
                'y',
11109
                'zu',
11110
            ],
11111
            'prefixes' => [
11112
                'al-',
11113
                "d'",
11114
                'ff',
11115
                "l'",
11116
                'mac',
11117
                'mc',
11118
                'nic',
11119
            ],
11120
        ];
11121
11122 1
        foreach ($namesArray as &$name) {
11123 1
            if (\in_array($name, $specialCases['names'], true)) {
11124 1
                continue;
11125
            }
11126
11127 1
            $continue = false;
11128
11129 1
            if ($delimiter === '-') {
11130 1
                foreach ($specialCases['names'] as $beginning) {
11131 1
                    if (self::strpos($name, $beginning, 0, $encoding) === 0) {
11132 1
                        $continue = true;
11133
                    }
11134
                }
11135
            }
11136
11137 1
            foreach ($specialCases['prefixes'] as $beginning) {
11138 1
                if (self::strpos($name, $beginning, 0, $encoding) === 0) {
11139 1
                    $continue = true;
11140
                }
11141
            }
11142
11143 1
            if ($continue === true) {
11144 1
                continue;
11145
            }
11146
11147 1
            $name = self::str_upper_first($name);
11148
        }
11149
11150 1
        return \implode($delimiter, $namesArray);
11151
    }
11152
11153
    /**
11154
     * Generic case sensitive transformation for collation matching.
11155
     *
11156
     * @param string $str <p>The input string</p>
11157
     *
11158
     * @return string
11159
     */
11160
    private static function strtonatfold(string $str): string
11161
    {
11162
        /** @noinspection PhpUndefinedClassInspection */
11163 6
        return \preg_replace('/\p{Mn}+/u', '', \Normalizer::normalize($str, \Normalizer::NFD));
11164
    }
11165
11166
    /**
11167
     * @param int|string $input
11168
     *
11169
     * @return string
11170
     */
11171
    private static function to_utf8_convert_helper($input): string
11172
    {
11173
        // init
11174 30
        $buf = '';
11175
11176 30
        if (self::$ORD === null) {
11177 1
            self::$ORD = self::getData('ord');
11178
        }
11179
11180 30
        if (self::$CHR === null) {
11181 1
            self::$CHR = self::getData('chr');
11182
        }
11183
11184 30
        if (self::$WIN1252_TO_UTF8 === null) {
11185 1
            self::$WIN1252_TO_UTF8 = self::getData('win1252_to_utf8');
11186
        }
11187
11188 30
        $ordC1 = self::$ORD[$input];
11189 30
        if (isset(self::$WIN1252_TO_UTF8[$ordC1])) { // found in Windows-1252 special cases
11190 30
            $buf .= self::$WIN1252_TO_UTF8[$ordC1];
11191
        } else {
11192 2
            $cc1 = self::$CHR[$ordC1 / 64] | "\xC0";
11193 2
            $cc2 = ((string) $input & "\x3F") | "\x80";
0 ignored issues
show
Bug introduced by
Are you sure you want to use the bitwise & or did you mean &&?
Loading history...
Bug introduced by
Are you sure you want to use the bitwise | or did you mean ||?
Loading history...
11194 2
            $buf .= $cc1 . $cc2;
11195
        }
11196
11197 30
        return $buf;
11198
    }
11199
}
11200