Passed
Push — master ( 17fc93...82a714 )
by Lars
02:32
created

UTF8   F

Complexity

Total Complexity 1752

Size/Duplication

Total Lines 13694
Duplicated Lines 0 %

Test Coverage

Coverage 81.1%

Importance

Changes 108
Bugs 53 Features 4
Metric Value
eloc 4232
c 108
b 53
f 4
dl 0
loc 13694
ccs 3115
cts 3841
cp 0.811
rs 0.8
wmc 1752

273 Methods

Rating   Name   Duplication   Size   Complexity  
A strripos_in_byte() 0 12 4
A is_serialized() 0 11 3
A encode_mimeheader() 0 26 5
F extract_text() 0 175 34
B chr_to_decimal() 0 38 8
A add_bom_to_string() 0 7 2
A count_chars() 0 11 1
A ctype_loaded() 0 3 1
D chr() 0 107 19
A chunk_split() 0 3 1
A css_identifier() 0 55 6
A css_stripe_media_queries() 0 6 1
A clean() 0 47 6
A __construct() 0 2 1
B between() 0 48 8
A codepoints() 0 36 5
A chr_map() 0 5 1
A cleanup() 0 24 2
A char_at() 0 7 2
A chars() 0 4 1
A chr_size_list() 0 17 3
A checkForSupport() 0 46 4
A collapse_whitespace() 0 7 2
A access() 0 11 4
A callback() 0 3 1
A binary_to_str() 0 12 3
A bom() 0 3 1
A array_change_key_case() 0 23 5
A emoji_decode() 0 21 3
A decode_mimeheader() 0 8 3
A emoji_encode() 0 21 3
A decimal_to_chr() 0 5 1
F encode() 0 144 37
A chr_to_hex() 0 11 3
A emoji_from_country_code() 0 17 3
A html_escape() 0 6 1
A file_has_bom() 0 8 2
A filter_input() 0 16 3
A is_bom() 0 10 3
A is_hexadecimal() 0 7 2
A has_uppercase() 0 7 2
A is_utf8() 0 13 4
B get_file_type() 0 60 7
D is_utf16() 0 76 18
C filter() 0 59 14
A is_html() 0 14 2
A is_alpha() 0 7 2
B get_random_string() 0 54 10
A is_uppercase() 0 7 2
A is_ascii() 0 3 1
A is_blank() 0 7 2
A htmlspecialchars() 0 15 3
A has_whitespace() 0 7 2
B is_binary() 0 39 10
A intlChar_loaded() 0 3 1
B is_url() 0 40 7
A is_binary_file() 0 16 4
A intl_loaded() 0 3 1
A html_stripe_empty_tags() 0 6 1
A json_loaded() 0 3 1
A is_lowercase() 0 7 2
A iconv_loaded() 0 3 1
A lcwords() 0 34 6
A is_empty() 0 3 1
D is_utf32() 0 76 18
A is_alphanumeric() 0 7 2
A json_decode() 0 17 3
B is_json() 0 26 8
A is_printable() 0 3 1
A int_to_hex() 0 7 2
A has_lowercase() 0 7 2
A json_encode() 0 13 3
A is_base64() 0 17 5
A hex_to_int() 0 14 3
A hex_to_chr() 0 4 1
A htmlentities() 0 28 3
A is_punctuation() 0 3 1
C html_entity_decode() 0 58 13
C file_get_contents() 0 60 12
B html_encode() 0 54 11
A str_substr_after_first_separator() 0 28 6
A max() 0 14 3
B str_camelize() 0 74 10
A parse_str() 0 18 4
A str_contains() 0 15 3
B str_to_lines() 0 28 8
A substr_in_byte() 0 18 6
A stripos_in_byte() 0 12 4
A get_unique_string() 0 21 3
A strnatcasecmp() 0 5 1
A substr_left() 0 15 4
D strlen() 0 104 19
A str_isubstr_last() 0 25 4
A to_int() 0 7 2
A str_replace_beginning() 0 25 6
A remove_left() 0 28 4
C stripos() 0 67 14
A str_offset_exists() 0 10 2
D strrchr() 0 104 20
A to_filename() 0 9 1
A str_iends_with() 0 11 3
A max_chr_width() 0 8 2
C utf8_decode() 0 59 13
A ltrim() 0 26 5
A levenshtein() 0 10 1
A remove_html() 0 3 1
B str_longest_common_suffix() 0 54 10
C wordwrap() 0 70 14
B ucfirst() 0 57 7
A str_pad_both() 0 12 1
A str_substr_last() 0 33 6
A mbstring_loaded() 0 3 1
A str_limit() 0 26 6
A string() 0 18 4
B str_obfuscate() 0 47 8
D normalize_encoding() 0 147 16
B rxClass() 0 44 8
A str_ensure_right() 0 13 4
B str_titleize_for_humans() 0 170 7
A normalize_whitespace() 0 11 1
A str_starts_with() 0 16 4
A str_humanize() 0 15 1
C substr_count_in_byte() 0 55 15
A strlen_in_byte() 0 12 3
A str_ireplace_ending() 0 21 6
A rtrim() 0 26 5
C str_longest_common_substring() 0 76 16
A regex_replace() 0 20 3
A titlecase() 0 35 5
A getData() 0 6 1
B strtolower() 0 58 10
A urldecode() 0 35 4
A str_isubstr_before_first_separator() 0 19 5
B strrev() 0 43 10
A replace_all() 0 11 2
D substr_replace() 0 123 27
A strstr_in_byte() 0 15 4
A str_matches_pattern() 0 3 1
C str_titleize() 0 69 12
A str_split_array() 0 17 2
A ws() 0 3 1
A str_replace_first() 0 20 2
A fix_utf8() 0 30 4
A str_pad_right() 0 12 1
B ucwords() 0 51 9
A first_char() 0 14 4
A to_boolean() 0 35 5
C stristr() 0 79 17
A strncasecmp() 0 10 1
B strwidth() 0 43 8
A trim() 0 26 5
A str_upper_camelize() 0 8 1
A substr_compare() 0 33 6
D substr_count() 0 73 17
A strnatcmp() 0 9 2
D str_pad() 0 146 16
A urldecode_unicode_helper() 0 12 3
A str_ireplace() 0 31 5
A str_replace_ending() 0 24 6
A string_has_bom() 0 9 3
B strtr() 0 41 11
B str_contains_all() 0 22 9
A normalize_line_ending() 0 3 1
A str_isubstr_after_last_separator() 0 26 5
D range() 0 71 23
B strspn() 0 30 10
A strcasecmp() 0 21 1
A rawurldecode() 0 35 4
B str_capitalize_name_helper() 0 86 10
A utf8_encode() 0 14 3
A normalize_msword() 0 3 1
C str_detect_encoding() 0 111 14
A spaces_to_tabs() 0 11 3
A str_istarts_with() 0 11 3
A str_replace() 0 18 1
A substr_iright() 0 15 4
D getCharDirection() 0 104 117
A replace() 0 11 2
A filter_var_array() 0 16 2
A to_iso8859() 0 16 4
A words_limit() 0 20 5
A strip_tags() 0 18 4
A pcre_utf8_support() 0 4 1
A str_isubstr_before_last_separator() 0 24 6
D str_truncate_safe() 0 86 18
A substr_right() 0 31 6
D str_split() 0 134 29
A str_ends_with_any() 0 13 4
A strrpos_in_byte() 0 12 4
F strrpos() 0 136 31
A remove_right() 0 25 4
A remove_html_breaks() 0 3 1
A showSupport() 0 16 3
A remove_invisible_characters() 0 11 1
A single_chr_html_encode() 0 18 4
A str_replace_last() 0 19 2
A str_substr_before_last_separator() 0 31 6
B strtocasefold() 0 33 7
A lcfirst() 0 44 5
A tabs_to_spaces() 0 11 3
A finfo_loaded() 0 3 1
B str_truncate() 0 43 7
F strripos() 0 113 25
A strpos_in_byte() 0 12 4
A str_ends_with() 0 16 4
A fits_inside() 0 3 1
A to_ascii() 0 6 1
A reduce_string_array() 0 29 6
B str_longest_common_prefix() 0 51 8
A mbstring_overloaded() 0 12 2
A str_pad_left() 0 12 1
A str_substr_first() 0 33 6
A remove_bom() 0 22 5
A str_repeat() 0 5 1
A strpbrk() 0 11 4
D to_utf8_string() 0 110 33
A whitespace_table() 0 3 1
A substr_count_simple() 0 31 6
A str_iends_with_any() 0 13 4
A str_substr_after_last_separator() 0 31 6
A str_isubstr_after_first_separator() 0 26 5
B str_snakeize() 0 57 6
A str_sort() 0 16 3
A to_utf8() 0 15 3
A str_underscored() 0 3 1
A str_offset_get() 0 14 4
A strip_whitespace() 0 7 2
A str_capitalize_name() 0 8 1
B str_limit_after_word() 0 51 11
A swapCase() 0 17 4
A filter_var() 0 16 2
A substr_ileft() 0 15 4
A str_dasherize() 0 3 1
A str_ensure_left() 0 11 3
C ord() 0 68 16
B to_string() 0 27 8
A strtonatfold() 0 11 2
C strcspn() 0 48 12
A fix_simple_utf8() 0 32 5
A fixStrCaseHelper() 0 41 5
C str_split_pattern() 0 54 13
D strstr() 0 107 21
A str_isubstr_first() 0 25 4
A str_last_char() 0 16 4
A str_ireplace_beginning() 0 22 6
A str_substr_before_first_separator() 0 32 6
F substr() 0 137 31
A wordwrap_per_line() 0 28 5
A str_surround() 0 3 1
A strncmp() 0 19 4
A filter_input_array() 0 16 3
A str_insert() 0 28 4
A getSupportInfo() 0 13 3
A replace_diamond_question_mark() 0 43 5
D is_utf8_string() 0 133 28
B str_delimit() 0 31 8
A to_utf8_convert_helper() 0 28 5
B strtoupper() 0 58 10
A min() 0 14 3
A str_starts_with_any() 0 17 5
B strrichr() 0 54 11
A str_istarts_with_any() 0 17 5
B str_contains_any() 0 28 8
A initEmojiData() 0 29 4
A remove_duplicates() 0 16 4
B str_slice() 0 33 10
F strpos() 0 151 33
A str_shuffle() 0 33 6
A strcmp() 0 11 2
A str_word_count() 0 23 5
A str_to_binary() 0 10 2
A symfony_polyfill_used() 0 16 5
B str_to_words() 0 36 8

How to fix   Complexity   

Complex Class

Complex classes like UTF8 often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.

Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.

While breaking up the class, it is a good idea to analyze how other classes use UTF8, and based on these observations, apply Extract Interface, too.

1
<?php
2
3
declare(strict_types=1);
4
5
namespace voku\helper;
6
7
/**
8
 * @psalm-immutable
9
 */
10
final class UTF8
11
{
12
    /**
13
     * Bom => Byte-Length
14
     *
15
     * INFO: https://en.wikipedia.org/wiki/Byte_order_mark
16
     *
17
     * @var array<string, int>
18
     */
19
    private static $BOM = [
20
        "\xef\xbb\xbf"     => 3, // UTF-8 BOM
21
        ''              => 6, // UTF-8 BOM as "WINDOWS-1252" (one char has [maybe] more then one byte ...)
22
        "\x00\x00\xfe\xff" => 4, // UTF-32 (BE) BOM
23
        '  þÿ'             => 6, // UTF-32 (BE) BOM as "WINDOWS-1252"
24
        "\xff\xfe\x00\x00" => 4, // UTF-32 (LE) BOM
25
        'ÿþ  '             => 6, // UTF-32 (LE) BOM as "WINDOWS-1252"
26
        "\xfe\xff"         => 2, // UTF-16 (BE) BOM
27
        'þÿ'               => 4, // UTF-16 (BE) BOM as "WINDOWS-1252"
28
        "\xff\xfe"         => 2, // UTF-16 (LE) BOM
29
        'ÿþ'               => 4, // UTF-16 (LE) BOM as "WINDOWS-1252"
30
    ];
31
32
    /**
33
     * Numeric code point => UTF-8 Character
34
     *
35
     * url: http://www.w3schools.com/charsets/ref_utf_punctuation.asp
36
     *
37
     * @var array<int, string>
38
     */
39
    private static $WHITESPACE = [
40
        // NULL Byte
41
        0 => "\x0",
42
        // Tab
43
        9 => "\x9",
44
        // New Line
45
        10 => "\xa",
46
        // Vertical Tab
47
        11 => "\xb",
48
        // Carriage Return
49
        13 => "\xd",
50
        // Ordinary Space
51
        32 => "\x20",
52
        // NO-BREAK SPACE
53
        160 => "\xc2\xa0",
54
        // OGHAM SPACE MARK
55
        5760 => "\xe1\x9a\x80",
56
        // MONGOLIAN VOWEL SEPARATOR
57
        6158 => "\xe1\xa0\x8e",
58
        // EN QUAD
59
        8192 => "\xe2\x80\x80",
60
        // EM QUAD
61
        8193 => "\xe2\x80\x81",
62
        // EN SPACE
63
        8194 => "\xe2\x80\x82",
64
        // EM SPACE
65
        8195 => "\xe2\x80\x83",
66
        // THREE-PER-EM SPACE
67
        8196 => "\xe2\x80\x84",
68
        // FOUR-PER-EM SPACE
69
        8197 => "\xe2\x80\x85",
70
        // SIX-PER-EM SPACE
71
        8198 => "\xe2\x80\x86",
72
        // FIGURE SPACE
73
        8199 => "\xe2\x80\x87",
74
        // PUNCTUATION SPACE
75
        8200 => "\xe2\x80\x88",
76
        // THIN SPACE
77
        8201 => "\xe2\x80\x89",
78
        // HAIR SPACE
79
        8202 => "\xe2\x80\x8a",
80
        // LINE SEPARATOR
81
        8232 => "\xe2\x80\xa8",
82
        // PARAGRAPH SEPARATOR
83
        8233 => "\xe2\x80\xa9",
84
        // NARROW NO-BREAK SPACE
85
        8239 => "\xe2\x80\xaf",
86
        // MEDIUM MATHEMATICAL SPACE
87
        8287 => "\xe2\x81\x9f",
88
        // HALFWIDTH HANGUL FILLER
89
        65440 => "\xef\xbe\xa0",
90
        // IDEOGRAPHIC SPACE
91
        12288 => "\xe3\x80\x80",
92
    ];
93
94
    /**
95
     * @var array<string, string>
96
     */
97
    private static $WHITESPACE_TABLE = [
98
        'SPACE'                     => "\x20",
99
        'NO-BREAK SPACE'            => "\xc2\xa0",
100
        'OGHAM SPACE MARK'          => "\xe1\x9a\x80",
101
        'EN QUAD'                   => "\xe2\x80\x80",
102
        'EM QUAD'                   => "\xe2\x80\x81",
103
        'EN SPACE'                  => "\xe2\x80\x82",
104
        'EM SPACE'                  => "\xe2\x80\x83",
105
        'THREE-PER-EM SPACE'        => "\xe2\x80\x84",
106
        'FOUR-PER-EM SPACE'         => "\xe2\x80\x85",
107
        'SIX-PER-EM SPACE'          => "\xe2\x80\x86",
108
        'FIGURE SPACE'              => "\xe2\x80\x87",
109
        'PUNCTUATION SPACE'         => "\xe2\x80\x88",
110
        'THIN SPACE'                => "\xe2\x80\x89",
111
        'HAIR SPACE'                => "\xe2\x80\x8a",
112
        'LINE SEPARATOR'            => "\xe2\x80\xa8",
113
        'PARAGRAPH SEPARATOR'       => "\xe2\x80\xa9",
114
        'ZERO WIDTH SPACE'          => "\xe2\x80\x8b",
115
        'NARROW NO-BREAK SPACE'     => "\xe2\x80\xaf",
116
        'MEDIUM MATHEMATICAL SPACE' => "\xe2\x81\x9f",
117
        'IDEOGRAPHIC SPACE'         => "\xe3\x80\x80",
118
        'HALFWIDTH HANGUL FILLER'   => "\xef\xbe\xa0",
119
    ];
120
121
    /**
122
     * @var array
123
     *
124
     * @phpstan-var array{upper: string[], lower: string[]}
125
     */
126
    private static $COMMON_CASE_FOLD = [
127
        'upper' => [
128
            'µ',
129
            'ſ',
130
            "\xCD\x85",
131
            'ς',
132
            'ẞ',
133
            "\xCF\x90",
134
            "\xCF\x91",
135
            "\xCF\x95",
136
            "\xCF\x96",
137
            "\xCF\xB0",
138
            "\xCF\xB1",
139
            "\xCF\xB5",
140
            "\xE1\xBA\x9B",
141
            "\xE1\xBE\xBE",
142
        ],
143
        'lower' => [
144
            'μ',
145
            's',
146
            'ι',
147
            'σ',
148
            'ß',
149
            'β',
150
            'θ',
151
            'φ',
152
            'π',
153
            'κ',
154
            'ρ',
155
            'ε',
156
            "\xE1\xB9\xA1",
157
            'ι',
158
        ],
159
    ];
160
161
    /**
162
     * @var array
163
     *
164
     * @phpstan-var array<string, mixed>
165
     */
166
    private static $SUPPORT = [];
167
168
    /**
169
     * @var string[]|null
170
     *
171
     * @phpstan-var array<string, string>|null
172
     */
173
    private static $BROKEN_UTF8_FIX;
174
175
    /**
176
     * @var string[]|null
177
     *
178
     * @phpstan-var array<int, string>|null
179
     */
180
    private static $WIN1252_TO_UTF8;
181
182
    /**
183
     * @var string[]|null
184
     *
185
     * @phpstan-var array<int ,string>|null
186
     */
187
    private static $INTL_TRANSLITERATOR_LIST;
188
189
    /**
190
     * @var string[]|null
191
     *
192
     * @phpstan-var array<string>|null
193
     */
194
    private static $ENCODINGS;
195
196
    /**
197
     * @var int[]|null
198
     *
199
     * @phpstan-var array<string ,int>|null
200
     */
201
    private static $ORD;
202
203
    /**
204
     * @var string[]|null
205
     *
206
     * @phpstan-var array<string, string>|null
207
     */
208
    private static $EMOJI;
209
210
    /**
211
     * @var string[]|null
212
     *
213
     * @phpstan-var array<string>|null
214
     */
215
    private static $EMOJI_VALUES_CACHE;
216
217
    /**
218
     * @var string[]|null
219
     *
220
     * @phpstan-var array<string>|null
221
     */
222
    private static $EMOJI_KEYS_CACHE;
223
224
    /**
225
     * @var string[]|null
226
     *
227
     * @phpstan-var array<string>|null
228
     */
229
    private static $EMOJI_KEYS_REVERSIBLE_CACHE;
230
231
    /**
232
     * @var string[]|null
233
     *
234
     * @phpstan-var array<int, string>|null
235
     */
236
    private static $CHR;
237
238
    /**
239
     * __construct()
240
     */
241 34
    public function __construct()
242
    {
243 34
    }
244
245
    /**
246
     * Return the character at the specified position: $str[1] like functionality.
247
     *
248
     * EXAMPLE: <code>UTF8::access('fòô', 1); // 'ò'</code>
249
     *
250
     * @param string $str      <p>A UTF-8 string.</p>
251
     * @param int    $pos      <p>The position of character to return.</p>
252
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
253
     *
254
     * @psalm-pure
255
     *
256
     * @return string
257
     *                <p>Single multi-byte character.</p>
258
     */
259 3
    public static function access(string $str, int $pos, string $encoding = 'UTF-8'): string
260
    {
261 3
        if ($str === '' || $pos < 0) {
262 2
            return '';
263
        }
264
265 3
        if ($encoding === 'UTF-8') {
266 3
            return (string) \mb_substr($str, $pos, 1);
267
        }
268
269
        return (string) self::substr($str, $pos, 1, $encoding);
270
    }
271
272
    /**
273
     * Prepends UTF-8 BOM character to the string and returns the whole string.
274
     *
275
     * INFO: If BOM already existed there, the Input string is returned.
276
     *
277
     * EXAMPLE: <code>UTF8::add_bom_to_string('fòô'); // "\xEF\xBB\xBF" . 'fòô'</code>
278
     *
279
     * @param string $str <p>The input string.</p>
280
     *
281
     * @psalm-pure
282
     *
283
     * @return string
284
     *                <p>The output string that contains BOM.</p>
285
     */
286 2
    public static function add_bom_to_string(string $str): string
287
    {
288 2
        if (!self::string_has_bom($str)) {
289 2
            $str = self::bom() . $str;
290
        }
291
292 2
        return $str;
293
    }
294
295
    /**
296
     * Changes all keys in an array.
297
     *
298
     * @param array<string, mixed> $array    <p>The array to work on</p>
299
     * @param int                  $case     [optional] <p> Either <strong>CASE_UPPER</strong><br>
300
     *                                       or <strong>CASE_LOWER</strong> (default)</p>
301
     * @param string               $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
302
     *
303
     * @psalm-pure
304
     *
305
     * @return string[]
306
     *                  <p>An array with its keys lower- or uppercased.</p>
307
     */
308 2
    public static function array_change_key_case(
309
        array $array,
310
        int $case = \CASE_LOWER,
311
        string $encoding = 'UTF-8'
312
    ): array {
313
        if (
314 2
            $case !== \CASE_LOWER
315
            &&
316 2
            $case !== \CASE_UPPER
317
        ) {
318
            $case = \CASE_LOWER;
319
        }
320
321 2
        $return = [];
322 2
        foreach ($array as $key => &$value) {
323 2
            $key = $case === \CASE_LOWER
324 2
                ? self::strtolower($key, $encoding)
325 2
                : self::strtoupper($key, $encoding);
326
327 2
            $return[$key] = $value;
328
        }
329
330 2
        return $return;
331
    }
332
333
    /**
334
     * Returns the substring between $start and $end, if found, or an empty
335
     * string. An optional offset may be supplied from which to begin the
336
     * search for the start string.
337
     *
338
     * @param string $str
339
     * @param string $start    <p>Delimiter marking the start of the substring.</p>
340
     * @param string $end      <p>Delimiter marking the end of the substring.</p>
341
     * @param int    $offset   [optional] <p>Index from which to begin the search. Default: 0</p>
342
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
343
     *
344
     * @psalm-pure
345
     *
346
     * @return string
347
     */
348 16
    public static function between(
349
        string $str,
350
        string $start,
351
        string $end,
352
        int $offset = 0,
353
        string $encoding = 'UTF-8'
354
    ): string {
355 16
        if ($encoding === 'UTF-8') {
356 8
            $start_position = \mb_strpos($str, $start, $offset);
357 8
            if ($start_position === false) {
358 1
                return '';
359
            }
360
361 7
            $substr_index = $start_position + (int) \mb_strlen($start);
362 7
            $end_position = \mb_strpos($str, $end, $substr_index);
363
            if (
364 7
                $end_position === false
365
                ||
366 7
                $end_position === $substr_index
367
            ) {
368 2
                return '';
369
            }
370
371 5
            return (string) \mb_substr($str, $substr_index, $end_position - $substr_index);
372
        }
373
374 8
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
375
376 8
        $start_position = self::strpos($str, $start, $offset, $encoding);
377 8
        if ($start_position === false) {
378 1
            return '';
379
        }
380
381 7
        $substr_index = $start_position + (int) self::strlen($start, $encoding);
382 7
        $end_position = self::strpos($str, $end, $substr_index, $encoding);
383
        if (
384 7
            $end_position === false
385
            ||
386 7
            $end_position === $substr_index
387
        ) {
388 2
            return '';
389
        }
390
391 5
        return (string) self::substr(
392 5
            $str,
393
            $substr_index,
394 5
            $end_position - $substr_index,
395
            $encoding
396
        );
397
    }
398
399
    /**
400
     * Convert binary into a string.
401
     *
402
     * INFO: opposite to UTF8::str_to_binary()
403
     *
404
     * EXAMPLE: <code>UTF8::binary_to_str('11110000100111111001100010000011'); // '😃'</code>
405
     *
406
     * @param string $bin 1|0
407
     *
408
     * @psalm-pure
409
     *
410
     * @return string
411
     */
412 2
    public static function binary_to_str($bin): string
413
    {
414 2
        if (!isset($bin[0])) {
415
            return '';
416
        }
417
418 2
        $convert = \base_convert($bin, 2, 16);
419 2
        if ($convert === '0') {
420 1
            return '';
421
        }
422
423 2
        return \pack('H*', $convert);
424
    }
425
426
    /**
427
     * Returns the UTF-8 Byte Order Mark Character.
428
     *
429
     * INFO: take a look at UTF8::$bom for e.g. UTF-16 and UTF-32 BOM values
430
     *
431
     * EXAMPLE: <code>UTF8::bom(); // "\xEF\xBB\xBF"</code>
432
     *
433
     * @psalm-pure
434
     *
435
     * @return string
436
     *                <p>UTF-8 Byte Order Mark.</p>
437
     */
438 4
    public static function bom(): string
439
    {
440 4
        return "\xef\xbb\xbf";
441
    }
442
443
    /**
444
     * @alias of UTF8::chr_map()
445
     *
446
     * @param callable $callback
447
     * @param string   $str
448
     *
449
     * @psalm-pure
450
     *
451
     * @return string[]
452
     *
453
     * @see   UTF8::chr_map()
454
     */
455 2
    public static function callback($callback, string $str): array
456
    {
457 2
        return self::chr_map($callback, $str);
458
    }
459
460
    /**
461
     * Returns the character at $index, with indexes starting at 0.
462
     *
463
     * @param string $str      <p>The input string.</p>
464
     * @param int    $index    <p>Position of the character.</p>
465
     * @param string $encoding [optional] <p>Default is UTF-8</p>
466
     *
467
     * @psalm-pure
468
     *
469
     * @return string
470
     *                <p>The character at $index.</p>
471
     */
472 9
    public static function char_at(string $str, int $index, string $encoding = 'UTF-8'): string
473
    {
474 9
        if ($encoding === 'UTF-8') {
475 5
            return (string) \mb_substr($str, $index, 1);
476
        }
477
478 4
        return (string) self::substr($str, $index, 1, $encoding);
479
    }
480
481
    /**
482
     * Returns an array consisting of the characters in the string.
483
     *
484
     * @param string $str <p>The input string.</p>
485
     *
486
     * @psalm-pure
487
     *
488
     * @return string[]
489
     *                  <p>An array of chars.</p>
490
     */
491 4
    public static function chars(string $str): array
492
    {
493
        /** @var string[] */
494 4
        return self::str_split($str);
0 ignored issues
show
Bug Best Practice introduced by
The expression return self::str_split($str) returns an array which contains values of type string[] which are incompatible with the documented value type string.
Loading history...
495
    }
496
497
    /**
498
     * This method will auto-detect your server environment for UTF-8 support.
499
     *
500
     * @return true|null
501
     *
502
     * @internal <p>You don't need to run it manually, it will be triggered if it's needed.</p>
503
     */
504 4
    public static function checkForSupport()
505
    {
506 4
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
507
            self::$SUPPORT['already_checked_via_portable_utf8'] = true;
508
509
            // http://php.net/manual/en/book.mbstring.php
510
            self::$SUPPORT['mbstring'] = self::mbstring_loaded();
511
512
            self::$SUPPORT['mbstring_func_overload'] = self::mbstring_overloaded();
513
            if (self::$SUPPORT['mbstring'] === true) {
514
                \mb_internal_encoding('UTF-8');
515
                \mb_regex_encoding('UTF-8');
516
                self::$SUPPORT['mbstring_internal_encoding'] = 'UTF-8';
517
            }
518
519
            // http://php.net/manual/en/book.iconv.php
520
            self::$SUPPORT['iconv'] = self::iconv_loaded();
521
522
            // http://php.net/manual/en/book.intl.php
523
            self::$SUPPORT['intl'] = self::intl_loaded();
524
525
            // http://php.net/manual/en/class.intlchar.php
526
            self::$SUPPORT['intlChar'] = self::intlChar_loaded();
527
528
            // http://php.net/manual/en/book.ctype.php
529
            self::$SUPPORT['ctype'] = self::ctype_loaded();
530
531
            // http://php.net/manual/en/class.finfo.php
532
            self::$SUPPORT['finfo'] = self::finfo_loaded();
533
534
            // http://php.net/manual/en/book.json.php
535
            self::$SUPPORT['json'] = self::json_loaded();
536
537
            // http://php.net/manual/en/book.pcre.php
538
            self::$SUPPORT['pcre_utf8'] = self::pcre_utf8_support();
539
540
            self::$SUPPORT['symfony_polyfill_used'] = self::symfony_polyfill_used();
541
            if (self::$SUPPORT['symfony_polyfill_used'] === true) {
542
                \mb_internal_encoding('UTF-8');
543
                self::$SUPPORT['mbstring_internal_encoding'] = 'UTF-8';
544
            }
545
546
            return true;
547
        }
548
549 4
        return null;
550
    }
551
552
    /**
553
     * Generates a UTF-8 encoded character from the given code point.
554
     *
555
     * INFO: opposite to UTF8::ord()
556
     *
557
     * EXAMPLE: <code>UTF8::chr(0x2603); // '☃'</code>
558
     *
559
     * @param int    $code_point <p>The code point for which to generate a character.</p>
560
     * @param string $encoding   [optional] <p>Default is UTF-8</p>
561
     *
562
     * @psalm-pure
563
     *
564
     * @return string|null
565
     *                     <p>Multi-byte character, returns null on failure or empty input.</p>
566
     */
567 21
    public static function chr($code_point, string $encoding = 'UTF-8')
568
    {
569
        // init
570
        /**
571
         * @psalm-suppress ImpureStaticVariable
572
         *
573
         * @var array<string,string>
574
         */
575 21
        static $CHAR_CACHE = [];
576
577 21
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
578 5
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
579
        }
580
581
        if (
582 21
            $encoding !== 'UTF-8'
583
            &&
584 21
            $encoding !== 'ISO-8859-1'
585
            &&
586 21
            $encoding !== 'WINDOWS-1252'
587
            &&
588 21
            self::$SUPPORT['mbstring'] === false
589
        ) {
590
            /**
591
             * @psalm-suppress ImpureFunctionCall - is is only a warning
592
             */
593
            \trigger_error('UTF8::chr() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
594
        }
595
596 21
        if (!\is_int($code_point) || $code_point <= 0) {
0 ignored issues
show
introduced by
The condition is_int($code_point) is always true.
Loading history...
597 5
            return null;
598
        }
599
600 21
        $cache_key = $code_point . '_' . $encoding;
601 21
        if (isset($CHAR_CACHE[$cache_key])) {
602 19
            return $CHAR_CACHE[$cache_key];
603
        }
604
605 10
        if ($code_point <= 0x80) { // only for "simple"-chars
606
607 9
            if (self::$CHR === null) {
608 1
                self::$CHR = self::getData('chr');
609
            }
610
611
            /**
612
             * @psalm-suppress PossiblyNullArrayAccess
613
             */
614 9
            $chr = self::$CHR[$code_point];
615
616 9
            if ($encoding !== 'UTF-8') {
617 1
                $chr = self::encode($encoding, $chr);
618
            }
619
620 9
            return $CHAR_CACHE[$cache_key] = $chr;
621
        }
622
623
        //
624
        // fallback via "IntlChar"
625
        //
626
627 6
        if (self::$SUPPORT['intlChar'] === true) {
628 6
            $chr = \IntlChar::chr($code_point);
629
630 6
            if ($encoding !== 'UTF-8') {
631
                $chr = self::encode($encoding, $chr);
632
            }
633
634 6
            return $CHAR_CACHE[$cache_key] = $chr;
635
        }
636
637
        //
638
        // fallback via vanilla php
639
        //
640
641
        if (self::$CHR === null) {
642
            self::$CHR = self::getData('chr');
643
        }
644
645
        $code_point = (int) $code_point;
646
        if ($code_point <= 0x7FF) {
647
            /**
648
             * @psalm-suppress PossiblyNullArrayAccess
649
             */
650
            $chr = self::$CHR[($code_point >> 6) + 0xC0] .
651
                   self::$CHR[($code_point & 0x3F) + 0x80];
652
        } elseif ($code_point <= 0xFFFF) {
653
            /**
654
             * @psalm-suppress PossiblyNullArrayAccess
655
             */
656
            $chr = self::$CHR[($code_point >> 12) + 0xE0] .
657
                   self::$CHR[(($code_point >> 6) & 0x3F) + 0x80] .
658
                   self::$CHR[($code_point & 0x3F) + 0x80];
659
        } else {
660
            /**
661
             * @psalm-suppress PossiblyNullArrayAccess
662
             */
663
            $chr = self::$CHR[($code_point >> 18) + 0xF0] .
664
                   self::$CHR[(($code_point >> 12) & 0x3F) + 0x80] .
665
                   self::$CHR[(($code_point >> 6) & 0x3F) + 0x80] .
666
                   self::$CHR[($code_point & 0x3F) + 0x80];
667
        }
668
669
        if ($encoding !== 'UTF-8') {
670
            $chr = self::encode($encoding, $chr);
671
        }
672
673
        return $CHAR_CACHE[$cache_key] = $chr;
674
    }
675
676
    /**
677
     * Applies callback to all characters of a string.
678
     *
679
     * EXAMPLE: <code>UTF8::chr_map([UTF8::class, 'strtolower'], 'Κόσμε'); // ['κ','ό', 'σ', 'μ', 'ε']</code>
680
     *
681
     * @param callable $callback <p>The callback function.</p>
682
     * @param string   $str      <p>UTF-8 string to run callback on.</p>
683
     *
684
     * @psalm-pure
685
     *
686
     * @return string[]
687
     *                  <p>The outcome of the callback, as array.</p>
688
     */
689 2
    public static function chr_map($callback, string $str): array
690
    {
691 2
        return \array_map(
692 2
            $callback,
693 2
            self::str_split($str)
694
        );
695
    }
696
697
    /**
698
     * Generates an array of byte length of each character of a Unicode string.
699
     *
700
     * 1 byte => U+0000  - U+007F
701
     * 2 byte => U+0080  - U+07FF
702
     * 3 byte => U+0800  - U+FFFF
703
     * 4 byte => U+10000 - U+10FFFF
704
     *
705
     * EXAMPLE: <code>UTF8::chr_size_list('中文空白-test'); // [3, 3, 3, 3, 1, 1, 1, 1, 1]</code>
706
     *
707
     * @param string $str <p>The original unicode string.</p>
708
     *
709
     * @psalm-pure
710
     *
711
     * @return int[]
712
     *               <p>An array of byte lengths of each character.</p>
713
     */
714 4
    public static function chr_size_list(string $str): array
715
    {
716 4
        if ($str === '') {
717 4
            return [];
718
        }
719
720 4
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
721
            return \array_map(
722
                static function (string $data): int {
723
                    // "mb_" is available if overload is used, so use it ...
724
                    return \mb_strlen($data, 'CP850'); // 8-BIT
725
                },
726
                self::str_split($str)
727
            );
728
        }
729
730 4
        return \array_map('\strlen', self::str_split($str));
731
    }
732
733
    /**
734
     * Get a decimal code representation of a specific character.
735
     *
736
     * INFO: opposite to UTF8::decimal_to_chr()
737
     *
738
     * EXAMPLE: <code>UTF8::chr_to_decimal('§'); // 0xa7</code>
739
     *
740
     * @param string $char <p>The input character.</p>
741
     *
742
     * @psalm-pure
743
     *
744
     * @return int
745
     */
746 5
    public static function chr_to_decimal(string $char): int
747
    {
748 5
        if (self::$SUPPORT['iconv'] === true) {
749 5
            $chr_tmp = \iconv('UTF-8', 'UCS-4LE', $char);
750 5
            if ($chr_tmp !== false) {
751
                /** @phpstan-ignore-next-line - "unpack": only false if the format string contains errors */
752 5
                return \unpack('V', $chr_tmp)[1];
753
            }
754
        }
755
756
        $code = self::ord($char[0]);
757
        $bytes = 1;
758
759
        if (!($code & 0x80)) {
760
            // 0xxxxxxx
761
            return $code;
762
        }
763
764
        if (($code & 0xe0) === 0xc0) {
765
            // 110xxxxx
766
            $bytes = 2;
767
            $code &= ~0xc0;
768
        } elseif (($code & 0xf0) === 0xe0) {
769
            // 1110xxxx
770
            $bytes = 3;
771
            $code &= ~0xe0;
772
        } elseif (($code & 0xf8) === 0xf0) {
773
            // 11110xxx
774
            $bytes = 4;
775
            $code &= ~0xf0;
776
        }
777
778
        for ($i = 2; $i <= $bytes; ++$i) {
779
            // 10xxxxxx
780
            $code = ($code << 6) + (self::ord($char[$i - 1]) & ~0x80);
781
        }
782
783
        return $code;
784
    }
785
786
    /**
787
     * Get hexadecimal code point (U+xxxx) of a UTF-8 encoded character.
788
     *
789
     * EXAMPLE: <code>UTF8::chr_to_hex('§'); // U+00a7</code>
790
     *
791
     * @param int|string $char   <p>The input character</p>
792
     * @param string     $prefix [optional]
793
     *
794
     * @psalm-pure
795
     *
796
     * @return string
797
     *                <p>The code point encoded as U+xxxx.</p>
798
     */
799 2
    public static function chr_to_hex($char, string $prefix = 'U+'): string
800
    {
801 2
        if ($char === '') {
802 2
            return '';
803
        }
804
805 2
        if ($char === '&#0;') {
806
            $char = '';
807
        }
808
809 2
        return self::int_to_hex(self::ord((string) $char), $prefix);
810
    }
811
812
    /**
813
     * Splits a string into smaller chunks and multiple lines, using the specified line ending character.
814
     *
815
     * EXAMPLE: <code>UTF8::chunk_split('ABC-ÖÄÜ-中文空白-κόσμε', 3); // "ABC\r\n-ÖÄ\r\nÜ-中\r\n文空白\r\n-κό\r\nσμε"</code>
816
     *
817
     * @param string $body         <p>The original string to be split.</p>
818
     * @param int    $chunk_length [optional] <p>The maximum character length of a chunk.</p>
819
     * @param string $end          [optional] <p>The character(s) to be inserted at the end of each chunk.</p>
820
     *
821
     * @psalm-pure
822
     *
823
     * @return string
824
     *                <p>The chunked string.</p>
825
     */
826 4
    public static function chunk_split(string $body, int $chunk_length = 76, string $end = "\r\n"): string
827
    {
828 4
        return \implode($end, self::str_split($body, $chunk_length));
829
    }
830
831
    /**
832
     * Accepts a string and removes all non-UTF-8 characters from it + extras if needed.
833
     *
834
     * EXAMPLE: <code>UTF8::clean("\xEF\xBB\xBF„Abcdef\xc2\xa0\x20…” — 😃 - Düsseldorf", true, true); // '„Abcdef  …” — 😃 - Düsseldorf'</code>
835
     *
836
     * @param string $str                                     <p>The string to be sanitized.</p>
837
     * @param bool   $remove_bom                              [optional] <p>Set to true, if you need to remove
838
     *                                                        UTF-BOM.</p>
839
     * @param bool   $normalize_whitespace                    [optional] <p>Set to true, if you need to normalize the
840
     *                                                        whitespace.</p>
841
     * @param bool   $normalize_msword                        [optional] <p>Set to true, if you need to normalize MS
842
     *                                                        Word chars e.g.: "…"
843
     *                                                        => "..."</p>
844
     * @param bool   $keep_non_breaking_space                 [optional] <p>Set to true, to keep non-breaking-spaces,
845
     *                                                        in
846
     *                                                        combination with
847
     *                                                        $normalize_whitespace</p>
848
     * @param bool   $replace_diamond_question_mark           [optional] <p>Set to true, if you need to remove diamond
849
     *                                                        question mark e.g.: "�"</p>
850
     * @param bool   $remove_invisible_characters             [optional] <p>Set to false, if you not want to remove
851
     *                                                        invisible characters e.g.: "\0"</p>
852
     * @param bool   $remove_invisible_characters_url_encoded [optional] <p>Set to true, if you not want to remove
853
     *                                                        invisible url encoded characters e.g.: "%0B"<br> WARNING:
854
     *                                                        maybe contains false-positives e.g. aa%0Baa -> aaaa.
855
     *                                                        </p>
856
     *
857
     * @psalm-pure
858
     *
859
     * @return string
860
     *                <p>An clean UTF-8 encoded string.</p>
861
     */
862 90
    public static function clean(
863
        string $str,
864
        bool $remove_bom = false,
865
        bool $normalize_whitespace = false,
866
        bool $normalize_msword = false,
867
        bool $keep_non_breaking_space = false,
868
        bool $replace_diamond_question_mark = false,
869
        bool $remove_invisible_characters = true,
870
        bool $remove_invisible_characters_url_encoded = false
871
    ): string {
872
        // http://stackoverflow.com/questions/1401317/remove-non-utf8-characters-from-string
873
        // caused connection reset problem on larger strings
874
875 90
        $regex = '/
876
          (
877
            (?: [\x00-\x7F]               # single-byte sequences   0xxxxxxx
878
            |   [\xC0-\xDF][\x80-\xBF]    # double-byte sequences   110xxxxx 10xxxxxx
879
            |   [\xE0-\xEF][\x80-\xBF]{2} # triple-byte sequences   1110xxxx 10xxxxxx * 2
880
            |   [\xF0-\xF7][\x80-\xBF]{3} # quadruple-byte sequence 11110xxx 10xxxxxx * 3
881
            ){1,100}                      # ...one or more times
882
          )
883
        | ( [\x80-\xBF] )                 # invalid byte in range 10000000 - 10111111
884
        | ( [\xC0-\xFF] )                 # invalid byte in range 11000000 - 11111111
885
        /x';
886 90
        $str = (string) \preg_replace($regex, '$1', $str);
887
888 90
        if ($replace_diamond_question_mark) {
889 33
            $str = self::replace_diamond_question_mark($str);
890
        }
891
892 90
        if ($remove_invisible_characters) {
893 90
            $str = self::remove_invisible_characters($str, $remove_invisible_characters_url_encoded);
894
        }
895
896 90
        if ($normalize_whitespace) {
897 37
            $str = self::normalize_whitespace($str, $keep_non_breaking_space);
898
        }
899
900 90
        if ($normalize_msword) {
901 4
            $str = self::normalize_msword($str);
902
        }
903
904 90
        if ($remove_bom) {
905 37
            $str = self::remove_bom($str);
906
        }
907
908 90
        return $str;
909
    }
910
911
    /**
912
     * Clean-up a string and show only printable UTF-8 chars at the end  + fix UTF-8 encoding.
913
     *
914
     * EXAMPLE: <code>UTF8::cleanup("\xEF\xBB\xBF„Abcdef\xc2\xa0\x20…” — 😃 - Düsseldorf", true, true); // '„Abcdef  …” — 😃 - Düsseldorf'</code>
915
     *
916
     * @param string $str <p>The input string.</p>
917
     *
918
     * @psalm-pure
919
     *
920
     * @return string
921
     */
922 33
    public static function cleanup($str): string
923
    {
924
        // init
925 33
        $str = (string) $str;
926
927 33
        if ($str === '') {
928 5
            return '';
929
        }
930
931
        // fixed ISO <-> UTF-8 Errors
932 33
        $str = self::fix_simple_utf8($str);
933
934
        // remove all none UTF-8 symbols
935
        // && remove diamond question mark (�)
936
        // && remove remove invisible characters (e.g. "\0")
937
        // && remove BOM
938
        // && normalize whitespace chars (but keep non-breaking-spaces)
939 33
        return self::clean(
940 33
            $str,
941 33
            true,
942 33
            true,
943 33
            false,
944 33
            true,
945 33
            true
946
        );
947
    }
948
949
    /**
950
     * Accepts a string or a array of strings and returns an array of Unicode code points.
951
     *
952
     * INFO: opposite to UTF8::string()
953
     *
954
     * EXAMPLE: <code>
955
     * UTF8::codepoints('κöñ'); // array(954, 246, 241)
956
     * // ... OR ...
957
     * UTF8::codepoints('κöñ', true); // array('U+03ba', 'U+00f6', 'U+00f1')
958
     * </code>
959
     *
960
     * @param string|string[] $arg         <p>A UTF-8 encoded string or an array of such strings.</p>
961
     * @param bool            $use_u_style <p>If True, will return code points in U+xxxx format,
962
     *                                     default, code points will be returned as integers.</p>
963
     *
964
     * @psalm-pure
965
     *
966
     * @return int[]|string[]
967
     *                        <p>
968
     *                        The array of code points:<br>
969
     *                        int[] for $u_style === false<br>
970
     *                        string[] for $u_style === true<br>
971
     *                        </p>
972
     */
973 12
    public static function codepoints($arg, bool $use_u_style = false): array
974
    {
975 12
        if (\is_string($arg)) {
976 12
            $arg = self::str_split($arg);
977
        }
978
979
        /**
980
         * @psalm-suppress DocblockTypeContradiction
981
         */
982 12
        if (!\is_array($arg)) {
0 ignored issues
show
introduced by
The condition is_array($arg) is always true.
Loading history...
983 4
            return [];
984
        }
985
986 12
        if ($arg === []) {
987 7
            return [];
988
        }
989
990 11
        $arg = \array_map(
991
            [
992 11
                self::class,
993
                'ord',
994
            ],
995 11
            $arg
996
        );
997
998 11
        if ($use_u_style) {
999 2
            $arg = \array_map(
1000
                [
1001 2
                    self::class,
1002
                    'int_to_hex',
1003
                ],
1004 2
                $arg
1005
            );
1006
        }
1007
1008 11
        return $arg;
1009
    }
1010
1011
    /**
1012
     * Trims the string and replaces consecutive whitespace characters with a
1013
     * single space. This includes tabs and newline characters, as well as
1014
     * multibyte whitespace such as the thin space and ideographic space.
1015
     *
1016
     * @param string $str <p>The input string.</p>
1017
     *
1018
     * @psalm-pure
1019
     *
1020
     * @return string
1021
     *                <p>A string with trimmed $str and condensed whitespace.</p>
1022
     */
1023 13
    public static function collapse_whitespace(string $str): string
1024
    {
1025 13
        if (self::$SUPPORT['mbstring'] === true) {
1026 13
            return \trim((string) \mb_ereg_replace('[[:space:]]+', ' ', $str));
1027
        }
1028
1029
        return \trim(self::regex_replace($str, '[[:space:]]+', ' '));
1030
    }
1031
1032
    /**
1033
     * Returns count of characters used in a string.
1034
     *
1035
     * EXAMPLE: <code>UTF8::count_chars('κaκbκc'); // array('κ' => 3, 'a' => 1, 'b' => 1, 'c' => 1)</code>
1036
     *
1037
     * @param string $str                     <p>The input string.</p>
1038
     * @param bool   $clean_utf8              [optional] <p>Remove non UTF-8 chars from the string.</p>
1039
     * @param bool   $try_to_use_mb_functions [optional] <p>Set to false, if you don't want to use
1040
     *
1041
     * @psalm-pure
1042
     *
1043
     * @return int[]
1044
     *               <p>An associative array of Character as keys and
1045
     *               their count as values.</p>
1046
     */
1047 19
    public static function count_chars(
1048
        string $str,
1049
        bool $clean_utf8 = false,
1050
        bool $try_to_use_mb_functions = true
1051
    ): array {
1052 19
        return \array_count_values(
1053 19
            self::str_split(
1054 19
                $str,
1055 19
                1,
1056
                $clean_utf8,
1057
                $try_to_use_mb_functions
1058
            )
1059
        );
1060
    }
1061
1062
    /**
1063
     * Create a valid CSS identifier for e.g. "class"- or "id"-attributes.
1064
     *
1065
     * EXAMPLE: <code>UTF8::css_identifier('123foo/bar!!!'); // _23foo-bar</code>
1066
     *
1067
     * copy&past from https://github.com/drupal/core/blob/8.8.x/lib/Drupal/Component/Utility/Html.php#L95
1068
     *
1069
     * @param string   $str        <p>INFO: if no identifier is given e.g. " " or "", we will create a unique string automatically</p>
1070
     * @param string[] $filter
1071
     * @param bool     $strip_tags
1072
     * @param bool     $strtolower
1073
     *
1074
     * @psalm-pure
1075
     *
1076
     * @return string
1077
     *
1078
     * @phpstan-param array<string,string> $filter
1079
     */
1080 1
    public static function css_identifier(
1081
        string $str = '',
1082
        array $filter = [
1083
            ' ' => '-',
1084
            '/' => '-',
1085
            '[' => '',
1086
            ']' => '',
1087
        ],
1088
        bool $strip_tags = false,
1089
        bool $strtolower = true
1090
    ): string {
1091
        // We could also use strtr() here but its much slower than str_replace(). In
1092
        // order to keep '__' to stay '__' we first replace it with a different
1093
        // placeholder after checking that it is not defined as a filter.
1094 1
        $double_underscore_replacements = 0;
1095
1096
        // Fallback ...
1097 1
        if (\trim($str) === '') {
1098 1
            $str = \uniqid('auto-generated-css-class', true);
1099
        } else {
1100 1
            $str = self::clean($str);
1101
        }
1102
1103 1
        if ($strip_tags) {
1104
            $str = \strip_tags($str);
1105
        }
1106
1107 1
        if ($strtolower) {
1108 1
            $str = \strtolower($str);
1109
        }
1110
1111 1
        if (!isset($filter['__'])) {
1112 1
            $str = \str_replace('__', '##', $str, $double_underscore_replacements);
1113
        }
1114
1115 1
        $str = \str_replace(\array_keys($filter), \array_values($filter), $str);
1116
        // Replace temporary placeholder '##' with '__' only if the original
1117
        // $identifier contained '__'.
1118 1
        if ($double_underscore_replacements > 0) {
1119
            $str = \str_replace('##', '__', $str);
1120
        }
1121
1122
        // Valid characters in a CSS identifier are:
1123
        // - the hyphen (U+002D)
1124
        // - a-z (U+0030 - U+0039)
1125
        // - A-Z (U+0041 - U+005A)
1126
        // - the underscore (U+005F)
1127
        // - 0-9 (U+0061 - U+007A)
1128
        // - ISO 10646 characters U+00A1 and higher
1129
        // We strip out any character not in the above list.
1130 1
        $str = (string) \preg_replace('/[^\x{002D}\x{0030}-\x{0039}\x{0041}-\x{005A}\x{005F}\x{0061}-\x{007A}\x{00A1}-\x{FFFF}]/u', '', $str);
1131
        // Identifiers cannot start with a digit, two hyphens, or a hyphen followed by a digit.
1132 1
        $str = (string) \preg_replace(['/^[0-9]/', '/^(-[0-9])|^(--)/'], ['_', '__'], $str);
1133
1134 1
        return \trim($str, '-');
1135
    }
1136
1137
    /**
1138
     * Remove css media-queries.
1139
     *
1140
     * @param string $str
1141
     *
1142
     * @psalm-pure
1143
     *
1144
     * @return string
1145
     */
1146 1
    public static function css_stripe_media_queries(string $str): string
1147
    {
1148 1
        return (string) \preg_replace(
1149 1
            '#@media\\s+(?:only\\s)?(?:[\\s{(]|screen|all)\\s?[^{]+{.*}\\s*}\\s*#isumU',
1150 1
            '',
1151 1
            $str
1152
        );
1153
    }
1154
1155
    /**
1156
     * Checks whether ctype is available on the server.
1157
     *
1158
     * @psalm-pure
1159
     *
1160
     * @return bool
1161
     *              <p><strong>true</strong> if available, <strong>false</strong> otherwise</p>
1162
     *
1163
     * @internal <p>Please do not use it anymore, we will make is private in next major version.</p>
1164
     */
1165
    public static function ctype_loaded(): bool
1166
    {
1167
        return \extension_loaded('ctype');
1168
    }
1169
1170
    /**
1171
     * Converts an int value into a UTF-8 character.
1172
     *
1173
     * INFO: opposite to UTF8::string()
1174
     *
1175
     * EXAMPLE: <code>UTF8::decimal_to_chr(931); // 'Σ'</code>
1176
     *
1177
     * @param int|string $int
1178
     *
1179
     * @phpstan-param int|numeric-string $int
1180
     *
1181
     * @psalm-pure
1182
     *
1183
     * @return string
1184
     */
1185 20
    public static function decimal_to_chr($int): string
1186
    {
1187
        // We cannot use html_entity_decode() here, as it will not return
1188
        // characters for many values < 160.
1189 20
        return mb_convert_encoding('&#' . $int . ';', 'UTF-8', 'HTML-ENTITIES');
0 ignored issues
show
Bug Best Practice introduced by
The expression return mb_convert_encodi...TF-8', 'HTML-ENTITIES') could return the type array which is incompatible with the type-hinted return string. Consider adding an additional type-check to rule them out.
Loading history...
1190
    }
1191
1192
    /**
1193
     * Decodes a MIME header field
1194
     *
1195
     * @param string $str
1196
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
1197
     *
1198
     * @psalm-pure
1199
     *
1200
     * @return false|string
1201
     *                      <p>A decoded MIME field on success,
1202
     *                      or false if an error occurs during the decoding.</p>
1203
     */
1204 2
    public static function decode_mimeheader($str, string $encoding = 'UTF-8')
1205
    {
1206 2
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
1207 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
1208
        }
1209
1210
        // always fallback via symfony polyfill
1211 2
        return \iconv_mime_decode($str, \ICONV_MIME_DECODE_CONTINUE_ON_ERROR, $encoding);
1212
    }
1213
1214
    /**
1215
     * Convert any two-letter country code (ISO 3166-1) to the corresponding Emoji.
1216
     *
1217
     * @see https://en.wikipedia.org/wiki/ISO_3166-1
1218
     *
1219
     * @param string $country_code_iso_3166_1 <p>e.g. DE</p>
1220
     *
1221
     * @return string
1222
     *                <p>Emoji or empty string on error.</p>
1223
     */
1224 1
    public static function emoji_from_country_code(string $country_code_iso_3166_1): string
1225
    {
1226 1
        if ($country_code_iso_3166_1 === '') {
1227 1
            return '';
1228
        }
1229
1230 1
        if (self::strlen($country_code_iso_3166_1) !== 2) {
1231 1
            return '';
1232
        }
1233
1234 1
        $country_code_iso_3166_1 = \strtoupper($country_code_iso_3166_1);
1235
1236 1
        $flagOffset = 0x1F1E6;
1237 1
        $asciiOffset = 0x41;
1238
1239 1
        return (self::chr((self::ord($country_code_iso_3166_1[0]) - $asciiOffset + $flagOffset)) ?? '') .
1240 1
               (self::chr((self::ord($country_code_iso_3166_1[1]) - $asciiOffset + $flagOffset)) ?? '');
1241
    }
1242
1243
    /**
1244
     * Decodes a string which was encoded by "UTF8::emoji_encode()".
1245
     *
1246
     * INFO: opposite to UTF8::emoji_encode()
1247
     *
1248
     * EXAMPLE: <code>
1249
     * UTF8::emoji_decode('foo CHARACTER_OGRE', false); // 'foo 👹'
1250
     * //
1251
     * UTF8::emoji_decode('foo _-_PORTABLE_UTF8_-_308095726_-_627590803_-_8FTU_ELBATROP_-_', true); // 'foo 👹'
1252
     * </code>
1253
     *
1254
     * @param string $str                            <p>The input string.</p>
1255
     * @param bool   $use_reversible_string_mappings [optional] <p>
1256
     *                                               When <b>TRUE</b>, we se a reversible string mapping
1257
     *                                               between "emoji_encode" and "emoji_decode".</p>
1258
     *
1259
     * @psalm-pure
1260
     *
1261
     * @return string
1262
     */
1263 9
    public static function emoji_decode(
1264
        string $str,
1265
        bool $use_reversible_string_mappings = false
1266
    ): string {
1267 9
        if (self::$EMOJI_KEYS_CACHE === null) {
1268
            /** @phpstan-ignore-next-line - we need to load the data first */
1269
            self::initEmojiData();
1270
        }
1271
1272 9
        if ($use_reversible_string_mappings) {
1273 9
            return (string) \str_replace(
1274 9
                (array) self::$EMOJI_KEYS_REVERSIBLE_CACHE,
1275 9
                (array) self::$EMOJI_VALUES_CACHE,
1276 9
                $str
1277
            );
1278
        }
1279
1280 1
        return (string) \str_replace(
1281 1
            (array) self::$EMOJI_KEYS_CACHE,
1282 1
            (array) self::$EMOJI_VALUES_CACHE,
1283 1
            $str
1284
        );
1285
    }
1286
1287
    /**
1288
     * Encode a string with emoji chars into a non-emoji string.
1289
     *
1290
     * INFO: opposite to UTF8::emoji_decode()
1291
     *
1292
     * EXAMPLE: <code>
1293
     * UTF8::emoji_encode('foo 👹', false)); // 'foo CHARACTER_OGRE'
1294
     * //
1295
     * UTF8::emoji_encode('foo 👹', true)); // 'foo _-_PORTABLE_UTF8_-_308095726_-_627590803_-_8FTU_ELBATROP_-_'
1296
     * </code>
1297
     *
1298
     * @param string $str                            <p>The input string</p>
1299
     * @param bool   $use_reversible_string_mappings [optional] <p>
1300
     *                                               when <b>TRUE</b>, we use a reversible string mapping
1301
     *                                               between "emoji_encode" and "emoji_decode"</p>
1302
     *
1303
     * @psalm-pure
1304
     *
1305
     * @return string
1306
     */
1307 12
    public static function emoji_encode(
1308
        string $str,
1309
        bool $use_reversible_string_mappings = false
1310
    ): string {
1311 12
        if (self::$EMOJI_KEYS_CACHE === null) {
1312
            /** @phpstan-ignore-next-line - we need to load the data first */
1313 1
            self::initEmojiData();
1314
        }
1315
1316 12
        if ($use_reversible_string_mappings) {
1317 9
            return (string) \str_replace(
1318 9
                (array) self::$EMOJI_VALUES_CACHE,
1319 9
                (array) self::$EMOJI_KEYS_REVERSIBLE_CACHE,
1320 9
                $str
1321
            );
1322
        }
1323
1324 4
        return (string) \str_replace(
1325 4
            (array) self::$EMOJI_VALUES_CACHE,
1326 4
            (array) self::$EMOJI_KEYS_CACHE,
1327 4
            $str
1328
        );
1329
    }
1330
1331
    /**
1332
     * Encode a string with a new charset-encoding.
1333
     *
1334
     * INFO:  This function will also try to fix broken / double encoding,
1335
     *        so you can call this function also on a UTF-8 string and you don't mess up the string.
1336
     *
1337
     * EXAMPLE: <code>
1338
     * UTF8::encode('ISO-8859-1', '-ABC-中文空白-'); // '-ABC-????-'
1339
     * //
1340
     * UTF8::encode('UTF-8', '-ABC-中文空白-'); // '-ABC-中文空白-'
1341
     * //
1342
     * UTF8::encode('HTML', '-ABC-中文空白-'); // '-ABC-&#20013;&#25991;&#31354;&#30333;-'
1343
     * //
1344
     * UTF8::encode('BASE64', '-ABC-中文空白-'); // 'LUFCQy3kuK3mlofnqbrnmb0t'
1345
     * </code>
1346
     *
1347
     * @param string $to_encoding                   <p>e.g. 'UTF-16', 'UTF-8', 'ISO-8859-1', etc.</p>
1348
     * @param string $str                           <p>The input string</p>
1349
     * @param bool   $auto_detect_the_from_encoding [optional] <p>Force the new encoding (we try to fix broken / double
1350
     *                                              encoding for UTF-8)<br> otherwise we auto-detect the current
1351
     *                                              string-encoding</p>
1352
     * @param string $from_encoding                 [optional] <p>e.g. 'UTF-16', 'UTF-8', 'ISO-8859-1', etc.<br>
1353
     *                                              A empty string will trigger the autodetect anyway.</p>
1354
     *
1355
     * @psalm-pure
1356
     *
1357
     * @return string
1358
     *
1359
     * @psalm-suppress InvalidReturnStatement
1360
     */
1361 28
    public static function encode(
1362
        string $to_encoding,
1363
        string $str,
1364
        bool $auto_detect_the_from_encoding = true,
1365
        string $from_encoding = ''
1366
    ): string {
1367 28
        if ($str === '' || $to_encoding === '') {
1368 13
            return $str;
1369
        }
1370
1371 28
        if ($to_encoding !== 'UTF-8' && $to_encoding !== 'CP850') {
1372 7
            $to_encoding = self::normalize_encoding($to_encoding, 'UTF-8');
1373
        }
1374
1375 28
        if ($from_encoding && $from_encoding !== 'UTF-8' && $from_encoding !== 'CP850') {
1376 2
            $from_encoding = self::normalize_encoding($from_encoding);
1377
        }
1378
1379
        if (
1380 28
            $to_encoding
1381
            &&
1382 28
            $from_encoding
1383
            &&
1384 28
            $from_encoding === $to_encoding
1385
        ) {
1386
            return $str;
1387
        }
1388
1389 28
        if ($to_encoding === 'JSON') {
1390 1
            $return = self::json_encode($str);
1391 1
            if ($return === false) {
1392
                throw new \InvalidArgumentException('The input string [' . $str . '] can not be used for json_encode().');
1393
            }
1394
1395 1
            return $return;
1396
        }
1397 28
        if ($from_encoding === 'JSON') {
1398 1
            $str = self::json_decode($str);
1399 1
            $from_encoding = '';
1400
        }
1401
1402 28
        if ($to_encoding === 'BASE64') {
1403 2
            return \base64_encode($str);
1404
        }
1405 28
        if ($from_encoding === 'BASE64') {
1406 2
            $str = \base64_decode($str, true);
1407 2
            $from_encoding = '';
1408
        }
1409
1410 28
        if ($to_encoding === 'HTML-ENTITIES') {
1411 2
            return self::html_encode($str, true);
1412
        }
1413 28
        if ($from_encoding === 'HTML-ENTITIES') {
1414 2
            $str = self::html_entity_decode($str, \ENT_COMPAT);
1415 2
            $from_encoding = '';
1416
        }
1417
1418 28
        $from_encoding_auto_detected = false;
1419
        if (
1420 28
            $auto_detect_the_from_encoding
1421
            ||
1422 28
            !$from_encoding
1423
        ) {
1424 28
            $from_encoding_auto_detected = self::str_detect_encoding($str);
1425
        }
1426
1427
        // DEBUG
1428
        //var_dump($to_encoding, $from_encoding, $from_encoding_auto_detected, $str, "\n\n");
1429
1430 28
        if ($from_encoding_auto_detected !== false) {
1431 25
            $from_encoding = $from_encoding_auto_detected;
1432 6
        } elseif ($auto_detect_the_from_encoding) {
1433
            // fallback for the "autodetect"-mode
1434 6
            return self::to_utf8($str);
1435
        }
1436
1437
        if (
1438 25
            !$from_encoding
1439
            ||
1440 25
            $from_encoding === $to_encoding
1441
        ) {
1442 15
            return $str;
1443
        }
1444
1445
        if (
1446 20
            $to_encoding === 'UTF-8'
1447
            &&
1448
            (
1449 18
                $from_encoding === 'WINDOWS-1252'
1450
                ||
1451 20
                $from_encoding === 'ISO-8859-1'
1452
            )
1453
        ) {
1454 14
            return self::to_utf8($str);
1455
        }
1456
1457
        if (
1458 12
            $to_encoding === 'ISO-8859-1'
1459
            &&
1460
            (
1461 6
                $from_encoding === 'WINDOWS-1252'
1462
                ||
1463 12
                $from_encoding === 'UTF-8'
1464
            )
1465
        ) {
1466 6
            return self::to_iso8859($str);
1467
        }
1468
1469
        if (
1470 10
            $to_encoding !== 'UTF-8'
1471
            &&
1472 10
            $to_encoding !== 'ISO-8859-1'
1473
            &&
1474 10
            $to_encoding !== 'WINDOWS-1252'
1475
            &&
1476 10
            self::$SUPPORT['mbstring'] === false
1477
        ) {
1478
            /**
1479
             * @psalm-suppress ImpureFunctionCall - is is only a warning
1480
             */
1481
            \trigger_error('UTF8::encode() without mbstring cannot handle "' . $to_encoding . '" encoding', \E_USER_WARNING);
1482
        }
1483
1484 10
        if (self::$SUPPORT['mbstring'] === true) {
1485 10
            $str_encoded = \mb_convert_encoding(
1486 10
                $str,
1487 10
                $to_encoding,
1488 10
                $from_encoding
1489
            );
1490
1491 10
            if ($str_encoded) {
1492
                \assert(\is_string($str_encoded));
1493
1494 10
                return $str_encoded;
1495
            }
1496
        }
1497
1498
        /** @noinspection PhpUsageOfSilenceOperatorInspection - Detected an incomplete multibyte character in input string */
1499
        $return = @\iconv($from_encoding, $to_encoding, $str);
1500
        if ($return !== false) {
1501
            return $return;
1502
        }
1503
1504
        return $str;
1505
    }
1506
1507
    /**
1508
     * @param string $str
1509
     * @param string $from_charset      [optional] <p>Set the input charset.</p>
1510
     * @param string $to_charset        [optional] <p>Set the output charset.</p>
1511
     * @param string $transfer_encoding [optional] <p>Set the transfer encoding.</p>
1512
     * @param string $linefeed          [optional] <p>Set the used linefeed.</p>
1513
     * @param int    $indent            [optional] <p>Set the max length indent.</p>
1514
     *
1515
     * @psalm-pure
1516
     *
1517
     * @return false|string
1518
     *                      <p>An encoded MIME field on success,
1519
     *                      or false if an error occurs during the encoding.</p>
1520
     */
1521 1
    public static function encode_mimeheader(
1522
        string $str,
1523
        string $from_charset = 'UTF-8',
1524
        string $to_charset = 'UTF-8',
1525
        string $transfer_encoding = 'Q',
1526
        string $linefeed = "\r\n",
1527
        int $indent = 76
1528
    ) {
1529 1
        if ($from_charset !== 'UTF-8' && $from_charset !== 'CP850') {
1530
            $from_charset = self::normalize_encoding($from_charset, 'UTF-8');
1531
        }
1532
1533 1
        if ($to_charset !== 'UTF-8' && $to_charset !== 'CP850') {
1534 1
            $to_charset = self::normalize_encoding($to_charset, 'UTF-8');
1535
        }
1536
1537
        // always fallback via symfony polyfill
1538 1
        return \iconv_mime_encode(
1539 1
            '',
1540 1
            $str,
1541
            [
1542 1
                'scheme'           => $transfer_encoding,
1543 1
                'line-length'      => $indent,
1544 1
                'input-charset'    => $from_charset,
1545 1
                'output-charset'   => $to_charset,
1546 1
                'line-break-chars' => $linefeed,
1547
            ]
1548
        );
1549
    }
1550
1551
    /**
1552
     * Create an extract from a sentence, so if the search-string was found, it try to centered in the output.
1553
     *
1554
     * @param string   $str                       <p>The input string.</p>
1555
     * @param string   $search                    <p>The searched string.</p>
1556
     * @param int|null $length                    [optional] <p>Default: null === text->length / 2</p>
1557
     * @param string   $replacer_for_skipped_text [optional] <p>Default: …</p>
1558
     * @param string   $encoding                  [optional] <p>Set the charset for e.g. "mb_" function</p>
1559
     *
1560
     * @psalm-pure
1561
     *
1562
     * @return string
1563
     */
1564 1
    public static function extract_text(
1565
        string $str,
1566
        string $search = '',
1567
        int $length = null,
1568
        string $replacer_for_skipped_text = '…',
1569
        string $encoding = 'UTF-8'
1570
    ): string {
1571 1
        if ($str === '') {
1572 1
            return '';
1573
        }
1574
1575 1
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
1576
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
1577
        }
1578
1579 1
        $trim_chars = "\t\r\n -_()!~?=+/*\\,.:;\"'[]{}`&";
1580
1581 1
        if ($length === null) {
1582 1
            $length = (int) \round((int) self::strlen($str, $encoding) / 2);
1583
        }
1584
1585 1
        if ($search === '') {
1586 1
            if ($encoding === 'UTF-8') {
1587 1
                if ($length > 0) {
1588 1
                    $string_length = (int) \mb_strlen($str);
1589 1
                    $end = ($length - 1) > $string_length ? $string_length : ($length - 1);
1590
                } else {
1591 1
                    $end = 0;
1592
                }
1593
1594 1
                $pos = (int) \min(
1595 1
                    \mb_strpos($str, ' ', $end),
1596 1
                    \mb_strpos($str, '.', $end)
1597
                );
1598
            } else {
1599
                if ($length > 0) {
1600
                    $string_length = (int) self::strlen($str, $encoding);
1601
                    $end = ($length - 1) > $string_length ? $string_length : ($length - 1);
1602
                } else {
1603
                    $end = 0;
1604
                }
1605
1606
                $pos = (int) \min(
1607
                    self::strpos($str, ' ', $end, $encoding),
1608
                    self::strpos($str, '.', $end, $encoding)
1609
                );
1610
            }
1611
1612 1
            if ($pos) {
1613 1
                if ($encoding === 'UTF-8') {
1614 1
                    $str_sub = \mb_substr($str, 0, $pos);
1615
                } else {
1616
                    $str_sub = self::substr($str, 0, $pos, $encoding);
1617
                }
1618
1619 1
                if ($str_sub === false) {
1620
                    return '';
1621
                }
1622
1623 1
                return \rtrim($str_sub, $trim_chars) . $replacer_for_skipped_text;
1624
            }
1625
1626
            return $str;
1627
        }
1628
1629 1
        if ($encoding === 'UTF-8') {
1630 1
            $word_position = (int) \mb_stripos($str, $search);
1631 1
            $half_side = (int) ($word_position - $length / 2 + (int) \mb_strlen($search) / 2);
1632
        } else {
1633
            $word_position = (int) self::stripos($str, $search, 0, $encoding);
1634
            $half_side = (int) ($word_position - $length / 2 + (int) self::strlen($search, $encoding) / 2);
1635
        }
1636
1637 1
        $pos_start = 0;
1638 1
        if ($half_side > 0) {
1639 1
            if ($encoding === 'UTF-8') {
1640 1
                $half_text = \mb_substr($str, 0, $half_side);
1641
            } else {
1642
                $half_text = self::substr($str, 0, $half_side, $encoding);
1643
            }
1644 1
            if ($half_text !== false) {
1645 1
                if ($encoding === 'UTF-8') {
1646 1
                    $pos_start = (int) \max(
1647 1
                        \mb_strrpos($half_text, ' '),
1648 1
                        \mb_strrpos($half_text, '.')
1649
                    );
1650
                } else {
1651
                    $pos_start = (int) \max(
1652
                        self::strrpos($half_text, ' ', 0, $encoding),
1653
                        self::strrpos($half_text, '.', 0, $encoding)
1654
                    );
1655
                }
1656
            }
1657
        }
1658
1659 1
        if ($word_position && $half_side > 0) {
1660 1
            $offset = $pos_start + $length - 1;
1661 1
            $real_length = (int) self::strlen($str, $encoding);
1662
1663 1
            if ($offset > $real_length) {
1664
                $offset = $real_length;
1665
            }
1666
1667 1
            if ($encoding === 'UTF-8') {
1668 1
                $pos_end = (int) \min(
1669 1
                    \mb_strpos($str, ' ', $offset),
1670 1
                    \mb_strpos($str, '.', $offset)
1671 1
                ) - $pos_start;
1672
            } else {
1673
                $pos_end = (int) \min(
1674
                    self::strpos($str, ' ', $offset, $encoding),
1675
                    self::strpos($str, '.', $offset, $encoding)
1676
                ) - $pos_start;
1677
            }
1678
1679 1
            if (!$pos_end || $pos_end <= 0) {
1680 1
                if ($encoding === 'UTF-8') {
1681 1
                    $str_sub = \mb_substr($str, $pos_start, (int) \mb_strlen($str));
1682
                } else {
1683
                    $str_sub = self::substr($str, $pos_start, (int) self::strlen($str, $encoding), $encoding);
1684
                }
1685 1
                if ($str_sub !== false) {
1686 1
                    $extract = $replacer_for_skipped_text . \ltrim($str_sub, $trim_chars);
1687
                } else {
1688 1
                    $extract = '';
1689
                }
1690
            } else {
1691 1
                if ($encoding === 'UTF-8') {
1692 1
                    $str_sub = \mb_substr($str, $pos_start, $pos_end);
1693
                } else {
1694
                    $str_sub = self::substr($str, $pos_start, $pos_end, $encoding);
1695
                }
1696 1
                if ($str_sub !== false) {
1697 1
                    $extract = $replacer_for_skipped_text . \trim($str_sub, $trim_chars) . $replacer_for_skipped_text;
1698
                } else {
1699 1
                    $extract = '';
1700
                }
1701
            }
1702
        } else {
1703 1
            $offset = $length - 1;
1704 1
            $true_length = (int) self::strlen($str, $encoding);
1705
1706 1
            if ($offset > $true_length) {
1707
                $offset = $true_length;
1708
            }
1709
1710 1
            if ($encoding === 'UTF-8') {
1711 1
                $pos_end = (int) \min(
1712 1
                    \mb_strpos($str, ' ', $offset),
1713 1
                    \mb_strpos($str, '.', $offset)
1714
                );
1715
            } else {
1716
                $pos_end = (int) \min(
1717
                    self::strpos($str, ' ', $offset, $encoding),
1718
                    self::strpos($str, '.', $offset, $encoding)
1719
                );
1720
            }
1721
1722 1
            if ($pos_end) {
1723 1
                if ($encoding === 'UTF-8') {
1724 1
                    $str_sub = \mb_substr($str, 0, $pos_end);
1725
                } else {
1726
                    $str_sub = self::substr($str, 0, $pos_end, $encoding);
1727
                }
1728 1
                if ($str_sub !== false) {
1729 1
                    $extract = \rtrim($str_sub, $trim_chars) . $replacer_for_skipped_text;
1730
                } else {
1731 1
                    $extract = '';
1732
                }
1733
            } else {
1734 1
                $extract = $str;
1735
            }
1736
        }
1737
1738 1
        return $extract;
1739
    }
1740
1741
    /**
1742
     * Reads entire file into a string.
1743
     *
1744
     * EXAMPLE: <code>UTF8::file_get_contents('utf16le.txt'); // ...</code>
1745
     *
1746
     * WARNING: Do not use UTF-8 Option ($convert_to_utf8) for binary files (e.g.: images) !!!
1747
     *
1748
     * @see http://php.net/manual/en/function.file-get-contents.php
1749
     *
1750
     * @param string        $filename         <p>
1751
     *                                        Name of the file to read.
1752
     *                                        </p>
1753
     * @param bool          $use_include_path [optional] <p>
1754
     *                                        Prior to PHP 5, this parameter is called
1755
     *                                        use_include_path and is a bool.
1756
     *                                        As of PHP 5 the FILE_USE_INCLUDE_PATH can be used
1757
     *                                        to trigger include path
1758
     *                                        search.
1759
     *                                        </p>
1760
     * @param resource|null $context          [optional] <p>
1761
     *                                        A valid context resource created with
1762
     *                                        stream_context_create. If you don't need to use a
1763
     *                                        custom context, you can skip this parameter by &null;.
1764
     *                                        </p>
1765
     * @param int|null      $offset           [optional] <p>
1766
     *                                        The offset where the reading starts.
1767
     *                                        </p>
1768
     * @param int|null      $max_length       [optional] <p>
1769
     *                                        Maximum length of data read. The default is to read until end
1770
     *                                        of file is reached.
1771
     *                                        </p>
1772
     * @param int           $timeout          <p>The time in seconds for the timeout.</p>
1773
     * @param bool          $convert_to_utf8  <strong>WARNING!!!</strong> <p>Maybe you can't use this option for
1774
     *                                        some files, because they used non default utf-8 chars. Binary files
1775
     *                                        like images or pdf will not be converted.</p>
1776
     * @param string        $from_encoding    [optional] <p>e.g. 'UTF-16', 'UTF-8', 'ISO-8859-1', etc.<br>
1777
     *                                        A empty string will trigger the autodetect anyway.</p>
1778
     *
1779
     * @psalm-pure
1780
     *
1781
     * @return false|string
1782
     *                      <p>The function returns the read data as string or <b>false</b> on failure.</p>
1783
     */
1784 12
    public static function file_get_contents(
1785
        string $filename,
1786
        bool $use_include_path = false,
1787
        $context = null,
1788
        int $offset = null,
1789
        int $max_length = null,
1790
        int $timeout = 10,
1791
        bool $convert_to_utf8 = true,
1792
        string $from_encoding = ''
1793
    ) {
1794
        // init
1795
        /** @noinspection CallableParameterUseCaseInTypeContextInspection - is ok here */
1796 12
        $filename = Bootup::filter_sanitize_string_polyfill($filename);
1797 12
        if ($filename === false) {
1798
            return false;
1799
        }
1800
1801 12
        if ($timeout && $context === null) {
1802 9
            $context = \stream_context_create(
1803
                [
1804
                    'http' => [
1805 9
                        'timeout' => $timeout,
1806
                    ],
1807
                ]
1808
            );
1809
        }
1810
1811 12
        if ($offset === null) {
1812 12
            $offset = 0;
1813
        }
1814
1815 12
        if (\is_int($max_length)) {
1816 2
            if ($max_length < 0) {
1817
                $max_length = 0;
1818
            }
1819
1820 2
            $data = \file_get_contents($filename, $use_include_path, $context, $offset, $max_length);
1821
        } else {
1822 12
            $data = \file_get_contents($filename, $use_include_path, $context, $offset);
1823
        }
1824
1825
        // return false on error
1826 12
        if ($data === false) {
1827
            return false;
1828
        }
1829
1830 12
        if ($convert_to_utf8) {
1831
            if (
1832 12
                !self::is_binary($data, true)
1833
                ||
1834 9
                self::is_utf16($data, false) !== false
1835
                ||
1836 12
                self::is_utf32($data, false) !== false
1837
            ) {
1838 9
                $data = self::encode('UTF-8', $data, false, $from_encoding);
1839 9
                $data = self::cleanup($data);
1840
            }
1841
        }
1842
1843 12
        return $data;
1844
    }
1845
1846
    /**
1847
     * Checks if a file starts with BOM (Byte Order Mark) character.
1848
     *
1849
     * EXAMPLE: <code>UTF8::file_has_bom('utf8_with_bom.txt'); // true</code>
1850
     *
1851
     * @param string $file_path <p>Path to a valid file.</p>
1852
     *
1853
     * @throws \RuntimeException if file_get_contents() returned false
1854
     *
1855
     * @return bool
1856
     *              <p><strong>true</strong> if the file has BOM at the start, <strong>false</strong> otherwise</p>
1857
     *
1858
     * @psalm-pure
1859
     */
1860 2
    public static function file_has_bom(string $file_path): bool
1861
    {
1862 2
        $file_content = \file_get_contents($file_path);
1863 2
        if ($file_content === false) {
1864
            throw new \RuntimeException('file_get_contents() returned false for:' . $file_path);
1865
        }
1866
1867 2
        return self::string_has_bom($file_content);
1868
    }
1869
1870
    /**
1871
     * Normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1872
     *
1873
     * EXAMPLE: <code>UTF8::filter(array("\xE9", 'à', 'a')); // array('é', 'à', 'a')</code>
1874
     *
1875
     * @param array|object|string $var
1876
     * @param int                 $normalization_form
1877
     * @param string              $leading_combining
1878
     *
1879
     * @psalm-pure
1880
     *
1881
     * @return mixed
1882
     *
1883
     * @template TFilter
1884
     * @phpstan-param TFilter $var
1885
     * @phpstan-return TFilter
1886
     */
1887 64
    public static function filter(
1888
        $var,
1889
        int $normalization_form = \Normalizer::NFC,
1890
        string $leading_combining = '◌'
1891
    ) {
1892 64
        switch (\gettype($var)) {
1893 64
            case 'object':
1894 64
            case 'array':
1895 6
                foreach ($var as &$v) {
1896 6
                    $v = self::filter($v, $normalization_form, $leading_combining);
1897
                }
1898 6
                unset($v);
1899
1900 6
                break;
1901 64
            case 'string':
1902
1903 62
                if (\strpos($var, "\r") !== false) {
1904 2
                    $var = self::normalize_line_ending($var);
1905
                }
1906
1907 62
                if (!ASCII::is_ascii($var)) {
1908 32
                    if (\Normalizer::isNormalized($var, $normalization_form)) {
1909 26
                        $n = '-';
1910
                    } else {
1911 12
                        $n = \Normalizer::normalize($var, $normalization_form);
1912
1913 12
                        if ($n && isset($n[0])) {
1914 6
                            $var = $n;
1915
                        } else {
1916 8
                            $var = self::encode('UTF-8', $var);
1917
                        }
1918
                    }
1919
1920
                    \assert(\is_string($var));
1921
                    if (
1922 32
                        $n
1923
                        &&
1924 32
                        $var[0] >= "\x80"
1925
                        &&
1926 32
                        isset($n[0], $leading_combining[0])
1927
                        &&
1928 32
                        \preg_match('/^\\p{Mn}/u', $var)
1929
                    ) {
1930
                        // Prevent leading combining chars
1931
                        // for NFC-safe concatenations.
1932 2
                        $var = $leading_combining . $var;
1933
                    }
1934
                }
1935
1936 62
                break;
1937
            default:
1938
                // nothing
1939
        }
1940
1941
        /** @noinspection PhpSillyAssignmentInspection */
1942
        /** @phpstan-var TFilter $var */
1943 64
        $var = $var;
1944
1945 64
        return $var;
1946
    }
1947
1948
    /**
1949
     * "filter_input()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1950
     *
1951
     * Gets a specific external variable by name and optionally filters it.
1952
     *
1953
     * EXAMPLE: <code>
1954
     * // _GET['foo'] = 'bar';
1955
     * UTF8::filter_input(INPUT_GET, 'foo', FILTER_UNSAFE_RAW)); // 'bar'
1956
     * </code>
1957
     *
1958
     * @see http://php.net/manual/en/function.filter-input.php
1959
     *
1960
     * @param int            $type          <p>
1961
     *                                      One of <b>INPUT_GET</b>, <b>INPUT_POST</b>,
1962
     *                                      <b>INPUT_COOKIE</b>, <b>INPUT_SERVER</b>, or
1963
     *                                      <b>INPUT_ENV</b>.
1964
     *                                      </p>
1965
     * @param string         $variable_name <p>
1966
     *                                      Name of a variable to get.
1967
     *                                      </p>
1968
     * @param int            $filter        [optional] <p>
1969
     *                                      The ID of the filter to apply. The
1970
     *                                      manual page lists the available filters.
1971
     *                                      </p>
1972
     * @param int|int[]|null $options       [optional] <p>
1973
     *                                      Associative array of options or bitwise disjunction of flags. If filter
1974
     *                                      accepts options, flags can be provided in "flags" field of array.
1975
     *                                      </p>
1976
     *
1977
     * @psalm-pure
1978
     *
1979
     * @return mixed
1980
     *               <p>
1981
     *               Value of the requested variable on success, <b>FALSE</b> if the filter fails, or <b>NULL</b> if the
1982
     *               <i>variable_name</i> variable is not set. If the flag <b>FILTER_NULL_ON_FAILURE</b> is used, it
1983
     *               returns <b>FALSE</b> if the variable is not set and <b>NULL</b> if the filter fails.
1984
     *               </p>
1985
     */
1986 1
    public static function filter_input(
1987
        int $type,
1988
        string $variable_name,
1989
        int $filter = \FILTER_DEFAULT,
1990
        $options = null
1991
    ) {
1992
        /**
1993
         * @psalm-suppress ImpureFunctionCall - we use func_num_args only for args count matching here
1994
         */
1995 1
        if ($options === null || \func_num_args() < 4) {
1996 1
            $var = \filter_input($type, $variable_name, $filter);
1997
        } else {
1998
            $var = \filter_input($type, $variable_name, $filter, $options);
1999
        }
2000
2001 1
        return self::filter($var);
2002
    }
2003
2004
    /**
2005
     * "filter_input_array()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
2006
     *
2007
     * Gets external variables and optionally filters them.
2008
     *
2009
     * EXAMPLE: <code>
2010
     * // _GET['foo'] = 'bar';
2011
     * UTF8::filter_input_array(INPUT_GET, array('foo' => 'FILTER_UNSAFE_RAW')); // array('bar')
2012
     * </code>
2013
     *
2014
     * @see http://php.net/manual/en/function.filter-input-array.php
2015
     *
2016
     * @param int                       $type       <p>
2017
     *                                              One of <b>INPUT_GET</b>, <b>INPUT_POST</b>,
2018
     *                                              <b>INPUT_COOKIE</b>, <b>INPUT_SERVER</b>, or
2019
     *                                              <b>INPUT_ENV</b>.
2020
     *                                              </p>
2021
     * @param array<string, mixed>|null $definition [optional] <p>
2022
     *                                              An array defining the arguments. A valid key is a string
2023
     *                                              containing a variable name and a valid value is either a filter type, or an array
2024
     *                                              optionally specifying the filter, flags and options. If the value is an
2025
     *                                              array, valid keys are filter which specifies the
2026
     *                                              filter type,
2027
     *                                              flags which specifies any flags that apply to the
2028
     *                                              filter, and options which specifies any options that
2029
     *                                              apply to the filter. See the example below for a better understanding.
2030
     *                                              </p>
2031
     *                                              <p>
2032
     *                                              This parameter can be also an integer holding a filter constant. Then all values in the
2033
     *                                              input array are filtered by this filter.
2034
     *                                              </p>
2035
     * @param bool                      $add_empty  [optional] <p>
2036
     *                                              Add missing keys as <b>NULL</b> to the return value.
2037
     *                                              </p>
2038
     *
2039
     * @psalm-pure
2040
     *
2041
     * @return array<string, mixed>|false|null
2042
     *                                         <p>
2043
     *                                         An array containing the values of the requested variables on success, or <b>FALSE</b> on failure.
2044
     *                                         An array value will be <b>FALSE</b> if the filter fails, or <b>NULL</b> if the variable is not
2045
     *                                         set. Or if the flag <b>FILTER_NULL_ON_FAILURE</b> is used, it returns <b>FALSE</b> if the variable
2046
     *                                         is not set and <b>NULL</b> if the filter fails.
2047
     *                                         </p>
2048
     */
2049 1
    public static function filter_input_array(
2050
        int $type,
2051
        $definition = null,
2052
        bool $add_empty = true
2053
    ) {
2054
        /**
2055
         * @psalm-suppress ImpureFunctionCall - we use func_num_args only for args count matching here
2056
         */
2057 1
        if ($definition === null || \func_num_args() < 2) {
2058
            $a = \filter_input_array($type);
2059
        } else {
2060 1
            $a = \filter_input_array($type, $definition, $add_empty);
2061
        }
2062
2063
        /* @phpstan-ignore-next-line | magic frm self::filter :/ */
2064 1
        return self::filter($a);
0 ignored issues
show
Bug Best Practice introduced by
The expression return self::filter($a) also could return the type object|string which is incompatible with the documented return type array<string,mixed>|false|null.
Loading history...
2065
    }
2066
2067
    /**
2068
     * "filter_var()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
2069
     *
2070
     * Filters a variable with a specified filter.
2071
     *
2072
     * EXAMPLE: <code>UTF8::filter_var('-ABC-中文空白-', FILTER_VALIDATE_URL); // false</code>
2073
     *
2074
     * @see http://php.net/manual/en/function.filter-var.php
2075
     *
2076
     * @param float|int|string|null $variable <p>
2077
     *                                        Value to filter.
2078
     *                                        </p>
2079
     * @param int                   $filter   [optional] <p>
2080
     *                                        The ID of the filter to apply. The
2081
     *                                        manual page lists the available filters.
2082
     *                                        </p>
2083
     * @param int|int[]|null        $options  [optional] <p>
2084
     *                                        Associative array of options or bitwise disjunction of flags. If filter
2085
     *                                        accepts options, flags can be provided in "flags" field of array. For
2086
     *                                        the "callback" filter, callable type should be passed. The
2087
     *                                        callback must accept one argument, the value to be filtered, and return
2088
     *                                        the value after filtering/sanitizing it.
2089
     *                                        </p>
2090
     *                                        <p>
2091
     *                                        <code>
2092
     *                                        // for filters that accept options, use this format
2093
     *                                        $options = array(
2094
     *                                        'options' => array(
2095
     *                                        'default' => 3, // value to return if the filter fails
2096
     *                                        // other options here
2097
     *                                        'min_range' => 0
2098
     *                                        ),
2099
     *                                        'flags' => FILTER_FLAG_ALLOW_OCTAL,
2100
     *                                        );
2101
     *                                        $var = filter_var('0755', FILTER_VALIDATE_INT, $options);
2102
     *                                        // for filter that only accept flags, you can pass them directly
2103
     *                                        $var = filter_var('oops', FILTER_VALIDATE_BOOLEAN, FILTER_NULL_ON_FAILURE);
2104
     *                                        // for filter that only accept flags, you can also pass as an array
2105
     *                                        $var = filter_var('oops', FILTER_VALIDATE_BOOLEAN,
2106
     *                                        array('flags' => FILTER_NULL_ON_FAILURE));
2107
     *                                        // callback validate filter
2108
     *                                        function foo($value)
2109
     *                                        {
2110
     *                                        // Expected format: Surname, GivenNames
2111
     *                                        if (strpos($value, ", ") === false) return false;
2112
     *                                        list($surname, $givennames) = explode(", ", $value, 2);
2113
     *                                        $empty = (empty($surname) || empty($givennames));
2114
     *                                        $notstrings = (!is_string($surname) || !is_string($givennames));
2115
     *                                        if ($empty || $notstrings) {
2116
     *                                        return false;
2117
     *                                        } else {
2118
     *                                        return $value;
2119
     *                                        }
2120
     *                                        }
2121
     *                                        $var = filter_var('Doe, Jane Sue', FILTER_CALLBACK, array('options' => 'foo'));
2122
     *                                        </code>
2123
     *                                        </p>
2124
     *
2125
     * @psalm-pure
2126
     *
2127
     * @return mixed
2128
     *               <p>The filtered data, or <b>FALSE</b> if the filter fails.</p>
2129
     */
2130 2
    public static function filter_var(
2131
        $variable,
2132
        int $filter = \FILTER_DEFAULT,
2133
        $options = null
2134
    ) {
2135
        /**
2136
         * @psalm-suppress ImpureFunctionCall - we use func_num_args only for args count matching here
2137
         */
2138 2
        if (\func_num_args() < 3) {
2139 2
            $variable = \filter_var($variable, $filter);
2140
        } else {
2141 2
            $variable = \filter_var($variable, $filter, $options);
0 ignored issues
show
Bug introduced by
It seems like $options can also be of type null; however, parameter $options of filter_var() does only seem to accept array|integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

2141
            $variable = \filter_var($variable, $filter, /** @scrutinizer ignore-type */ $options);
Loading history...
2142
        }
2143
2144
        /* @phpstan-ignore-next-line | magic frm self::filter :/ */
2145 2
        return self::filter($variable);
2146
    }
2147
2148
    /**
2149
     * "filter_var_array()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
2150
     *
2151
     * Gets multiple variables and optionally filters them.
2152
     *
2153
     * EXAMPLE: <code>
2154
     * $filters = [
2155
     *     'name'  => ['filter'  => FILTER_CALLBACK, 'options' => [UTF8::class, 'ucwords']],
2156
     *     'age'   => ['filter'  => FILTER_VALIDATE_INT, 'options' => ['min_range' => 1, 'max_range' => 120]],
2157
     *     'email' => FILTER_VALIDATE_EMAIL,
2158
     * ];
2159
     *
2160
     * $data = [
2161
     *     'name' => 'κόσμε',
2162
     *     'age' => '18',
2163
     *     'email' => '[email protected]'
2164
     * ];
2165
     *
2166
     * UTF8::filter_var_array($data, $filters, true); // ['name' => 'Κόσμε', 'age' => 18, 'email' => '[email protected]']
2167
     * </code>
2168
     *
2169
     * @see http://php.net/manual/en/function.filter-var-array.php
2170
     *
2171
     * @param array<string, mixed>          $data       <p>
2172
     *                                                  An array with string keys containing the data to filter.
2173
     *                                                  </p>
2174
     * @param array<string, mixed>|int|null $definition [optional] <p>
2175
     *                                                  An array defining the arguments. A valid key is a string
2176
     *                                                  containing a variable name and a valid value is either a
2177
     *                                                  filter type, or an
2178
     *                                                  array optionally specifying the filter, flags and options.
2179
     *                                                  If the value is an array, valid keys are filter
2180
     *                                                  which specifies the filter type,
2181
     *                                                  flags which specifies any flags that apply to the
2182
     *                                                  filter, and options which specifies any options that
2183
     *                                                  apply to the filter. See the example below for a better understanding.
2184
     *                                                  </p>
2185
     *                                                  <p>
2186
     *                                                  This parameter can be also an integer holding a filter constant. Then all values
2187
     *                                                  in the input array are filtered by this filter.
2188
     *                                                  </p>
2189
     * @param bool                          $add_empty  [optional] <p>
2190
     *                                                  Add missing keys as <b>NULL</b> to the return value.
2191
     *                                                  </p>
2192
     *
2193
     * @psalm-pure
2194
     *
2195
     * @return array<string, mixed>|false|null
2196
     *                                         <p>
2197
     *                                         An array containing the values of the requested variables on success, or <b>FALSE</b> on failure.
2198
     *                                         An array value will be <b>FALSE</b> if the filter fails, or <b>NULL</b> if the variable is not
2199
     *                                         set.
2200
     *                                         </p>
2201
     */
2202 2
    public static function filter_var_array(
2203
        array $data,
2204
        $definition = null,
2205
        bool $add_empty = true
2206
    ) {
2207
        /**
2208
         * @psalm-suppress ImpureFunctionCall - we use func_num_args only for args count matching here
2209
         */
2210 2
        if (\func_num_args() < 2) {
2211 2
            $a = \filter_var_array($data);
2212
        } else {
2213 2
            $a = \filter_var_array($data, $definition, $add_empty);
0 ignored issues
show
Bug introduced by
It seems like $definition can also be of type null; however, parameter $options of filter_var_array() does only seem to accept array|integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

2213
            $a = \filter_var_array($data, /** @scrutinizer ignore-type */ $definition, $add_empty);
Loading history...
2214
        }
2215
2216
        /* @phpstan-ignore-next-line | magic frm self::filter :/ */
2217 2
        return self::filter($a);
0 ignored issues
show
Bug Best Practice introduced by
The expression return self::filter($a) also could return the type object|string which is incompatible with the documented return type array<string,mixed>|false|null.
Loading history...
2218
    }
2219
2220
    /**
2221
     * Checks whether finfo is available on the server.
2222
     *
2223
     * @psalm-pure
2224
     *
2225
     * @return bool
2226
     *              <p><strong>true</strong> if available, <strong>false</strong> otherwise</p>
2227
     *
2228
     * @internal <p>Please do not use it anymore, we will make is private in next major version.</p>
2229
     */
2230
    public static function finfo_loaded(): bool
2231
    {
2232
        return \class_exists('finfo');
2233
    }
2234
2235
    /**
2236
     * Returns the first $n characters of the string.
2237
     *
2238
     * @param string $str      <p>The input string.</p>
2239
     * @param int    $n        <p>Number of characters to retrieve from the start.</p>
2240
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
2241
     *
2242
     * @psalm-pure
2243
     *
2244
     * @return string
2245
     */
2246 13
    public static function first_char(
2247
        string $str,
2248
        int $n = 1,
2249
        string $encoding = 'UTF-8'
2250
    ): string {
2251 13
        if ($str === '' || $n <= 0) {
2252 5
            return '';
2253
        }
2254
2255 8
        if ($encoding === 'UTF-8') {
2256 4
            return (string) \mb_substr($str, 0, $n);
2257
        }
2258
2259 4
        return (string) self::substr($str, 0, $n, $encoding);
2260
    }
2261
2262
    /**
2263
     * Check if the number of Unicode characters isn't greater than the specified integer.
2264
     *
2265
     * EXAMPLE: <code>UTF8::fits_inside('κόσμε', 6); // false</code>
2266
     *
2267
     * @param string $str      the original string to be checked
2268
     * @param int    $box_size the size in number of chars to be checked against string
2269
     *
2270
     * @psalm-pure
2271
     *
2272
     * @return bool
2273
     *              <p><strong>TRUE</strong> if string is less than or equal to $box_size, <strong>FALSE</strong> otherwise.</p>
2274
     */
2275 2
    public static function fits_inside(string $str, int $box_size): bool
2276
    {
2277 2
        return (int) self::strlen($str) <= $box_size;
2278
    }
2279
2280
    /**
2281
     * Try to fix simple broken UTF-8 strings.
2282
     *
2283
     * INFO: Take a look at "UTF8::fix_utf8()" if you need a more advanced fix for broken UTF-8 strings.
2284
     *
2285
     * EXAMPLE: <code>UTF8::fix_simple_utf8('Düsseldorf'); // 'Düsseldorf'</code>
2286
     *
2287
     * If you received an UTF-8 string that was converted from Windows-1252 as it was ISO-8859-1
2288
     * (ignoring Windows-1252 chars from 80 to 9F) use this function to fix it.
2289
     * See: http://en.wikipedia.org/wiki/Windows-1252
2290
     *
2291
     * @param string $str <p>The input string</p>
2292
     *
2293
     * @psalm-pure
2294
     *
2295
     * @return string
2296
     */
2297 46
    public static function fix_simple_utf8(string $str): string
2298
    {
2299 46
        if ($str === '') {
2300 4
            return '';
2301
        }
2302
2303
        /**
2304
         * @psalm-suppress ImpureStaticVariable
2305
         *
2306
         * @var array<mixed>|null
2307
         */
2308 46
        static $BROKEN_UTF8_TO_UTF8_KEYS_CACHE = null;
2309
2310
        /**
2311
         * @psalm-suppress ImpureStaticVariable
2312
         *
2313
         * @var array<mixed>|null
2314
         */
2315 46
        static $BROKEN_UTF8_TO_UTF8_VALUES_CACHE = null;
2316
2317 46
        if ($BROKEN_UTF8_TO_UTF8_KEYS_CACHE === null) {
2318 1
            if (self::$BROKEN_UTF8_FIX === null) {
2319 1
                self::$BROKEN_UTF8_FIX = self::getData('utf8_fix');
2320
            }
2321
2322 1
            $BROKEN_UTF8_TO_UTF8_KEYS_CACHE = \array_keys(self::$BROKEN_UTF8_FIX ?: []);
2323 1
            $BROKEN_UTF8_TO_UTF8_VALUES_CACHE = self::$BROKEN_UTF8_FIX;
2324
        }
2325
2326
        \assert(\is_array($BROKEN_UTF8_TO_UTF8_VALUES_CACHE));
2327
2328 46
        return \str_replace($BROKEN_UTF8_TO_UTF8_KEYS_CACHE, $BROKEN_UTF8_TO_UTF8_VALUES_CACHE, $str);
2329
    }
2330
2331
    /**
2332
     * Fix a double (or multiple) encoded UTF8 string.
2333
     *
2334
     * EXAMPLE: <code>UTF8::fix_utf8('Fédération'); // 'Fédération'</code>
2335
     *
2336
     * @param string|string[] $str you can use a string or an array of strings
2337
     *
2338
     * @psalm-pure
2339
     *
2340
     * @return string|string[]
2341
     *                         <p>Will return the fixed input-"array" or
2342
     *                         the fixed input-"string".</p>
2343
     *
2344
     * @template TFixUtf8
2345
     * @phpstan-param TFixUtf8 $str
2346
     * @phpstan-return TFixUtf8
2347
     */
2348 2
    public static function fix_utf8($str)
2349
    {
2350 2
        if (\is_array($str)) {
2351 2
            foreach ($str as &$v) {
2352 2
                $v = self::fix_utf8($v);
2353
            }
2354 2
            unset($v);
2355
2356
            /**
2357
             * @psalm-suppress InvalidReturnStatement
2358
             */
2359 2
            return $str;
2360
        }
2361
2362 2
        $str = (string) $str;
2363 2
        $last = '';
2364 2
        while ($last !== $str) {
2365 2
            $last = $str;
2366
            /**
2367
             * @psalm-suppress PossiblyInvalidArgument
2368
             */
2369 2
            $str = self::to_utf8(
2370 2
                self::utf8_decode($str, true)
2371
            );
2372
        }
2373
2374
        /**
2375
         * @psalm-suppress InvalidReturnStatement
2376
         */
2377 2
        return $str;
2378
    }
2379
2380
    /**
2381
     * Get character of a specific character.
2382
     *
2383
     * EXAMPLE: <code>UTF8::getCharDirection('ا'); // 'RTL'</code>
2384
     *
2385
     * @param string $char
2386
     *
2387
     * @psalm-pure
2388
     *
2389
     * @return string
2390
     *                <p>'RTL' or 'LTR'.</p>
2391
     */
2392 2
    public static function getCharDirection(string $char): string
2393
    {
2394 2
        if (self::$SUPPORT['intlChar'] === true) {
2395 2
            $tmp_return = \IntlChar::charDirection($char);
2396
2397
            // from "IntlChar"-Class
2398 2
            $char_direction = [
2399
                'RTL' => [1, 13, 14, 15, 21],
2400
                'LTR' => [0, 11, 12, 20],
2401
            ];
2402
2403 2
            if (\in_array($tmp_return, $char_direction['LTR'], true)) {
2404
                return 'LTR';
2405
            }
2406
2407 2
            if (\in_array($tmp_return, $char_direction['RTL'], true)) {
2408 2
                return 'RTL';
2409
            }
2410
        }
2411
2412 2
        $c = static::chr_to_decimal($char);
2413
2414 2
        if (!($c >= 0x5be && $c <= 0x10b7f)) {
2415 2
            return 'LTR';
2416
        }
2417
2418 2
        if ($c <= 0x85e) {
2419 2
            if ($c === 0x5be ||
2420 2
                $c === 0x5c0 ||
2421 2
                $c === 0x5c3 ||
2422 2
                $c === 0x5c6 ||
2423 2
                ($c >= 0x5d0 && $c <= 0x5ea) ||
2424 2
                ($c >= 0x5f0 && $c <= 0x5f4) ||
2425 2
                $c === 0x608 ||
2426 2
                $c === 0x60b ||
2427 2
                $c === 0x60d ||
2428 2
                $c === 0x61b ||
2429 2
                ($c >= 0x61e && $c <= 0x64a) ||
2430
                ($c >= 0x66d && $c <= 0x66f) ||
2431
                ($c >= 0x671 && $c <= 0x6d5) ||
2432
                ($c >= 0x6e5 && $c <= 0x6e6) ||
2433
                ($c >= 0x6ee && $c <= 0x6ef) ||
2434
                ($c >= 0x6fa && $c <= 0x70d) ||
2435
                $c === 0x710 ||
2436
                ($c >= 0x712 && $c <= 0x72f) ||
2437
                ($c >= 0x74d && $c <= 0x7a5) ||
2438
                $c === 0x7b1 ||
2439
                ($c >= 0x7c0 && $c <= 0x7ea) ||
2440
                ($c >= 0x7f4 && $c <= 0x7f5) ||
2441
                $c === 0x7fa ||
2442
                ($c >= 0x800 && $c <= 0x815) ||
2443
                $c === 0x81a ||
2444
                $c === 0x824 ||
2445
                $c === 0x828 ||
2446
                ($c >= 0x830 && $c <= 0x83e) ||
2447
                ($c >= 0x840 && $c <= 0x858) ||
2448 2
                $c === 0x85e
2449
            ) {
2450 2
                return 'RTL';
2451
            }
2452 2
        } elseif ($c === 0x200f) {
2453
            return 'RTL';
2454 2
        } elseif ($c >= 0xfb1d) {
2455 2
            if ($c === 0xfb1d ||
2456 2
                ($c >= 0xfb1f && $c <= 0xfb28) ||
2457 2
                ($c >= 0xfb2a && $c <= 0xfb36) ||
2458 2
                ($c >= 0xfb38 && $c <= 0xfb3c) ||
2459 2
                $c === 0xfb3e ||
2460 2
                ($c >= 0xfb40 && $c <= 0xfb41) ||
2461 2
                ($c >= 0xfb43 && $c <= 0xfb44) ||
2462 2
                ($c >= 0xfb46 && $c <= 0xfbc1) ||
2463 2
                ($c >= 0xfbd3 && $c <= 0xfd3d) ||
2464 2
                ($c >= 0xfd50 && $c <= 0xfd8f) ||
2465 2
                ($c >= 0xfd92 && $c <= 0xfdc7) ||
2466 2
                ($c >= 0xfdf0 && $c <= 0xfdfc) ||
2467 2
                ($c >= 0xfe70 && $c <= 0xfe74) ||
2468 2
                ($c >= 0xfe76 && $c <= 0xfefc) ||
2469 2
                ($c >= 0x10800 && $c <= 0x10805) ||
2470 2
                $c === 0x10808 ||
2471 2
                ($c >= 0x1080a && $c <= 0x10835) ||
2472 2
                ($c >= 0x10837 && $c <= 0x10838) ||
2473 2
                $c === 0x1083c ||
2474 2
                ($c >= 0x1083f && $c <= 0x10855) ||
2475 2
                ($c >= 0x10857 && $c <= 0x1085f) ||
2476 2
                ($c >= 0x10900 && $c <= 0x1091b) ||
2477 2
                ($c >= 0x10920 && $c <= 0x10939) ||
2478 2
                $c === 0x1093f ||
2479 2
                $c === 0x10a00 ||
2480 2
                ($c >= 0x10a10 && $c <= 0x10a13) ||
2481 2
                ($c >= 0x10a15 && $c <= 0x10a17) ||
2482 2
                ($c >= 0x10a19 && $c <= 0x10a33) ||
2483 2
                ($c >= 0x10a40 && $c <= 0x10a47) ||
2484 2
                ($c >= 0x10a50 && $c <= 0x10a58) ||
2485 2
                ($c >= 0x10a60 && $c <= 0x10a7f) ||
2486 2
                ($c >= 0x10b00 && $c <= 0x10b35) ||
2487 2
                ($c >= 0x10b40 && $c <= 0x10b55) ||
2488 2
                ($c >= 0x10b58 && $c <= 0x10b72) ||
2489 2
                ($c >= 0x10b78)
2490
            ) {
2491 2
                return 'RTL';
2492
            }
2493
        }
2494
2495 2
        return 'LTR';
2496
    }
2497
2498
    /**
2499
     * Check for php-support.
2500
     *
2501
     * @param string|null $key
2502
     *
2503
     * @psalm-pure
2504
     *
2505
     * @return mixed
2506
     *               Return the full support-"array", if $key === null<br>
2507
     *               return bool-value, if $key is used and available<br>
2508
     *               otherwise return <strong>null</strong>
2509
     */
2510 27
    public static function getSupportInfo(string $key = null)
2511
    {
2512 27
        if ($key === null) {
2513 4
            return self::$SUPPORT;
2514
        }
2515
2516 25
        if (self::$INTL_TRANSLITERATOR_LIST === null) {
2517 1
            self::$INTL_TRANSLITERATOR_LIST = self::getData('transliterator_list');
2518
        }
2519
        // compatibility fix for old versions
2520 25
        self::$SUPPORT['intl__transliterator_list_ids'] = self::$INTL_TRANSLITERATOR_LIST;
2521
2522 25
        return self::$SUPPORT[$key] ?? null;
2523
    }
2524
2525
    /**
2526
     * Warning: this method only works for some file-types (png, jpg)
2527
     *          if you need more supported types, please use e.g. "finfo"
2528
     *
2529
     * @param string                                                        $str
2530
     * @param array{ext: null|string, mime: null|string, type: null|string} $fallback
2531
     *
2532
     * @return array{ext: null|string, mime: null|string, type: null|string}
2533
     *
2534
     * @psalm-pure
2535
     */
2536 39
    public static function get_file_type(
2537
        string $str,
2538
        array $fallback = [
2539
            'ext'  => null,
2540
            'mime' => 'application/octet-stream',
2541
            'type' => null,
2542
        ]
2543
    ): array {
2544 39
        if ($str === '') {
2545
            return $fallback;
2546
        }
2547
2548
        /** @var false|string $str_info - needed for PhpStan (stubs error) */
2549 39
        $str_info = \substr($str, 0, 2);
2550 39
        if ($str_info === false || \strlen($str_info) !== 2) {
2551 10
            return $fallback;
2552
        }
2553
2554
        // DEBUG
2555
        //var_dump($str_info);
2556
2557 36
        $str_info = \unpack('C2chars', $str_info);
2558
2559 36
        if ($str_info === false) {
2560
            return $fallback;
2561
        }
2562 36
        $type_code = (int) ($str_info['chars1'] . $str_info['chars2']);
2563
2564
        // DEBUG
2565
        //var_dump($type_code);
2566
2567
        //
2568
        // info: https://en.wikipedia.org/wiki/Magic_number_%28programming%29#Format_indicator
2569
        //
2570
        switch ($type_code) {
2571
            // WARNING: do not add too simple comparisons, because of false-positive results:
2572
            //
2573
            // 3780 => 'pdf', 7790 => 'exe', 7784 => 'midi', 8075 => 'zip',
2574
            // 8297 => 'rar', 7173 => 'gif', 7373 => 'tiff' 6677 => 'bmp', ...
2575
            //
2576 36
            case 255216:
2577
                $ext = 'jpg';
2578
                $mime = 'image/jpeg';
2579
                $type = 'binary';
2580
2581
                break;
2582 36
            case 13780:
2583 7
                $ext = 'png';
2584 7
                $mime = 'image/png';
2585 7
                $type = 'binary';
2586
2587 7
                break;
2588
            default:
2589 35
                return $fallback;
2590
        }
2591
2592
        return [
2593 7
            'ext'  => $ext,
2594 7
            'mime' => $mime,
2595 7
            'type' => $type,
2596
        ];
2597
    }
2598
2599
    /**
2600
     * @param int    $length         <p>Length of the random string.</p>
2601
     * @param string $possible_chars [optional] <p>Characters string for the random selection.</p>
2602
     * @param string $encoding       [optional] <p>Set the charset for e.g. "mb_" function</p>
2603
     *
2604
     * @return string
2605
     */
2606 1
    public static function get_random_string(
2607
        int $length,
2608
        string $possible_chars = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789',
2609
        string $encoding = 'UTF-8'
2610
    ): string {
2611
        // init
2612 1
        $i = 0;
2613 1
        $str = '';
2614
2615
        //
2616
        // add random chars
2617
        //
2618
2619 1
        if ($encoding === 'UTF-8') {
2620 1
            $max_length = (int) \mb_strlen($possible_chars);
2621 1
            if ($max_length === 0) {
2622 1
                return '';
2623
            }
2624
2625 1
            while ($i < $length) {
2626
                try {
2627 1
                    $rand_int = \random_int(0, $max_length - 1);
2628
                } catch (\Exception $e) {
2629
                    $rand_int = \mt_rand(0, $max_length - 1);
2630
                }
2631 1
                $char = \mb_substr($possible_chars, $rand_int, 1);
2632 1
                if ($char !== false) {
2633 1
                    $str .= $char;
2634 1
                    ++$i;
2635
                }
2636
            }
2637
        } else {
2638
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
2639
2640
            $max_length = (int) self::strlen($possible_chars, $encoding);
2641
            if ($max_length === 0) {
2642
                return '';
2643
            }
2644
2645
            while ($i < $length) {
2646
                try {
2647
                    $rand_int = \random_int(0, $max_length - 1);
2648
                } catch (\Exception $e) {
2649
                    $rand_int = \mt_rand(0, $max_length - 1);
2650
                }
2651
                $char = self::substr($possible_chars, $rand_int, 1, $encoding);
2652
                if ($char !== false) {
2653
                    $str .= $char;
2654
                    ++$i;
2655
                }
2656
            }
2657
        }
2658
2659 1
        return $str;
2660
    }
2661
2662
    /**
2663
     * @param int|string $extra_entropy [optional] <p>Extra entropy via a string or int value.</p>
2664
     * @param bool       $use_md5       [optional] <p>Return the unique identifier as md5-hash? Default: true</p>
2665
     *
2666
     * @return string
2667
     */
2668 1
    public static function get_unique_string($extra_entropy = '', bool $use_md5 = true): string
2669
    {
2670
        try {
2671 1
            $rand_int = \random_int(0, \mt_getrandmax());
2672
        } catch (\Exception $e) {
2673
            $rand_int = \mt_rand(0, \mt_getrandmax());
2674
        }
2675
2676 1
        $unique_helper = $rand_int .
2677 1
                         \session_id() .
2678 1
                         ($_SERVER['REMOTE_ADDR'] ?? '') .
2679 1
                         ($_SERVER['SERVER_ADDR'] ?? '') .
2680 1
                         $extra_entropy;
2681
2682 1
        $unique_string = \uniqid($unique_helper, true);
2683
2684 1
        if ($use_md5) {
2685 1
            $unique_string = \md5($unique_string . $unique_helper);
2686
        }
2687
2688 1
        return $unique_string;
2689
    }
2690
2691
    /**
2692
     * Returns true if the string contains a lower case char, false otherwise.
2693
     *
2694
     * @param string $str <p>The input string.</p>
2695
     *
2696
     * @psalm-pure
2697
     *
2698
     * @return bool
2699
     *              <p>Whether or not the string contains a lower case character.</p>
2700
     */
2701 47
    public static function has_lowercase(string $str): bool
2702
    {
2703 47
        if (self::$SUPPORT['mbstring'] === true) {
2704 47
            return \mb_ereg_match('.*[[:lower:]]', $str);
2705
        }
2706
2707
        return self::str_matches_pattern($str, '.*[[:lower:]]');
2708
    }
2709
2710
    /**
2711
     * Returns true if the string contains whitespace, false otherwise.
2712
     *
2713
     * @param string $str <p>The input string.</p>
2714
     *
2715
     * @psalm-pure
2716
     *
2717
     * @return bool
2718
     *              <p>Whether or not the string contains whitespace.</p>
2719
     */
2720 11
    public static function has_whitespace(string $str): bool
2721
    {
2722 11
        if (self::$SUPPORT['mbstring'] === true) {
2723 11
            return \mb_ereg_match('.*[[:space:]]', $str);
2724
        }
2725
2726
        return self::str_matches_pattern($str, '.*[[:space:]]');
2727
    }
2728
2729
    /**
2730
     * Returns true if the string contains an upper case char, false otherwise.
2731
     *
2732
     * @param string $str <p>The input string.</p>
2733
     *
2734
     * @psalm-pure
2735
     *
2736
     * @return bool
2737
     *              <p>Whether or not the string contains an upper case character.</p>
2738
     */
2739 12
    public static function has_uppercase(string $str): bool
2740
    {
2741 12
        if (self::$SUPPORT['mbstring'] === true) {
2742 12
            return \mb_ereg_match('.*[[:upper:]]', $str);
2743
        }
2744
2745
        return self::str_matches_pattern($str, '.*[[:upper:]]');
2746
    }
2747
2748
    /**
2749
     * Converts a hexadecimal value into a UTF-8 character.
2750
     *
2751
     * INFO: opposite to UTF8::chr_to_hex()
2752
     *
2753
     * EXAMPLE: <code>UTF8::hex_to_chr('U+00a7'); // '§'</code>
2754
     *
2755
     * @param string $hexdec <p>The hexadecimal value.</p>
2756
     *
2757
     * @psalm-pure
2758
     *
2759
     * @return false|string one single UTF-8 character
2760
     */
2761 4
    public static function hex_to_chr(string $hexdec)
2762
    {
2763
        /** @noinspection PhpUsageOfSilenceOperatorInspection - Invalid characters passed for attempted conversion, these have been ignored */
2764 4
        return self::decimal_to_chr((int) @\hexdec($hexdec));
2765
    }
2766
2767
    /**
2768
     * Converts hexadecimal U+xxxx code point representation to integer.
2769
     *
2770
     * INFO: opposite to UTF8::int_to_hex()
2771
     *
2772
     * EXAMPLE: <code>UTF8::hex_to_int('U+00f1'); // 241</code>
2773
     *
2774
     * @param string $hexdec <p>The hexadecimal code point representation.</p>
2775
     *
2776
     * @psalm-pure
2777
     *
2778
     * @return false|int
2779
     *                   <p>The code point, or false on failure.</p>
2780
     */
2781 2
    public static function hex_to_int($hexdec)
2782
    {
2783
        // init
2784 2
        $hexdec = (string) $hexdec;
2785
2786 2
        if ($hexdec === '') {
2787 2
            return false;
2788
        }
2789
2790 2
        if (\preg_match('/^(?:\\\u|U\+|)([a-zA-Z0-9]{4,6})$/', $hexdec, $match)) {
2791 2
            return \intval($match[1], 16);
2792
        }
2793
2794 2
        return false;
2795
    }
2796
2797
    /**
2798
     * Converts a UTF-8 string to a series of HTML numbered entities.
2799
     *
2800
     * INFO: opposite to UTF8::html_decode()
2801
     *
2802
     * EXAMPLE: <code>UTF8::html_encode('中文空白'); // '&#20013;&#25991;&#31354;&#30333;'</code>
2803
     *
2804
     * @param string $str              <p>The Unicode string to be encoded as numbered entities.</p>
2805
     * @param bool   $keep_ascii_chars [optional] <p>Keep ASCII chars.</p>
2806
     * @param string $encoding         [optional] <p>Set the charset for e.g. "mb_" function</p>
2807
     *
2808
     * @psalm-pure
2809
     *
2810
     * @return string HTML numbered entities
2811
     */
2812 14
    public static function html_encode(
2813
        string $str,
2814
        bool $keep_ascii_chars = false,
2815
        string $encoding = 'UTF-8'
2816
    ): string {
2817 14
        if ($str === '') {
2818 4
            return '';
2819
        }
2820
2821 14
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
2822 4
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
2823
        }
2824
2825
        // INFO: http://stackoverflow.com/questions/35854535/better-explanation-of-convmap-in-mb-encode-numericentity
2826 14
        if (self::$SUPPORT['mbstring'] === true) {
2827 14
            if ($keep_ascii_chars) {
2828 13
                $start_code = 0x80;
2829
            } else {
2830 3
                $start_code = 0x00;
2831
            }
2832
2833 14
            if ($encoding === 'UTF-8') {
2834
                /** @var false|string|null $return - needed for PhpStan (stubs error) */
2835 14
                $return = \mb_encode_numericentity(
0 ignored issues
show
Bug introduced by
The call to mb_encode_numericentity() has too few arguments starting with encoding. ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-call  annotation

2835
                $return = /** @scrutinizer ignore-call */ \mb_encode_numericentity(

This check compares calls to functions or methods with their respective definitions. If the call has less arguments than are defined, it raises an issue.

If a function is defined several times with a different number of parameters, the check may pick up the wrong definition and report false positives. One codebase where this has been known to happen is Wordpress. Please note the @ignore annotation hint above.

Loading history...
2836 14
                    $str,
2837 14
                    [$start_code, 0xfffff, 0, 0xfffff]
2838
                );
2839 14
                if ($return !== null && $return !== false) {
2840 14
                    return $return;
2841
                }
2842
            }
2843
2844
            /** @var false|string|null $return - needed for PhpStan (stubs error) */
2845 4
            $return = \mb_encode_numericentity(
2846 4
                $str,
2847 4
                [$start_code, 0xfffff, 0, 0xfffff],
2848 4
                $encoding
2849
            );
2850 4
            if ($return !== null && $return !== false) {
2851 4
                return $return;
2852
            }
2853
        }
2854
2855
        //
2856
        // fallback via vanilla php
2857
        //
2858
2859
        return \implode(
2860
            '',
2861
            \array_map(
2862
                static function (string $chr) use ($keep_ascii_chars, $encoding): string {
2863
                    return self::single_chr_html_encode($chr, $keep_ascii_chars, $encoding);
2864
                },
2865
                self::str_split($str)
2866
            )
2867
        );
2868
    }
2869
2870
    /**
2871
     * UTF-8 version of html_entity_decode()
2872
     *
2873
     * The reason we are not using html_entity_decode() by itself is because
2874
     * while it is not technically correct to leave out the semicolon
2875
     * at the end of an entity most browsers will still interpret the entity
2876
     * correctly. html_entity_decode() does not convert entities without
2877
     * semicolons, so we are left with our own little solution here. Bummer.
2878
     *
2879
     * Convert all HTML entities to their applicable characters.
2880
     *
2881
     * INFO: opposite to UTF8::html_encode()
2882
     *
2883
     * EXAMPLE: <code>UTF8::html_entity_decode('&#20013;&#25991;&#31354;&#30333;'); // '中文空白'</code>
2884
     *
2885
     * @see http://php.net/manual/en/function.html-entity-decode.php
2886
     *
2887
     * @param string   $str      <p>
2888
     *                           The input string.
2889
     *                           </p>
2890
     * @param int|null $flags    [optional] <p>
2891
     *                           A bitmask of one or more of the following flags, which specify how to handle quotes
2892
     *                           and which document type to use. The default is ENT_COMPAT | ENT_HTML401.
2893
     *                           <table>
2894
     *                           Available <i>flags</i> constants
2895
     *                           <tr valign="top">
2896
     *                           <td>Constant Name</td>
2897
     *                           <td>Description</td>
2898
     *                           </tr>
2899
     *                           <tr valign="top">
2900
     *                           <td><b>ENT_COMPAT</b></td>
2901
     *                           <td>Will convert double-quotes and leave single-quotes alone.</td>
2902
     *                           </tr>
2903
     *                           <tr valign="top">
2904
     *                           <td><b>ENT_QUOTES</b></td>
2905
     *                           <td>Will convert both double and single quotes.</td>
2906
     *                           </tr>
2907
     *                           <tr valign="top">
2908
     *                           <td><b>ENT_NOQUOTES</b></td>
2909
     *                           <td>Will leave both double and single quotes unconverted.</td>
2910
     *                           </tr>
2911
     *                           <tr valign="top">
2912
     *                           <td><b>ENT_HTML401</b></td>
2913
     *                           <td>
2914
     *                           Handle code as HTML 4.01.
2915
     *                           </td>
2916
     *                           </tr>
2917
     *                           <tr valign="top">
2918
     *                           <td><b>ENT_XML1</b></td>
2919
     *                           <td>
2920
     *                           Handle code as XML 1.
2921
     *                           </td>
2922
     *                           </tr>
2923
     *                           <tr valign="top">
2924
     *                           <td><b>ENT_XHTML</b></td>
2925
     *                           <td>
2926
     *                           Handle code as XHTML.
2927
     *                           </td>
2928
     *                           </tr>
2929
     *                           <tr valign="top">
2930
     *                           <td><b>ENT_HTML5</b></td>
2931
     *                           <td>
2932
     *                           Handle code as HTML 5.
2933
     *                           </td>
2934
     *                           </tr>
2935
     *                           </table>
2936
     *                           </p>
2937
     * @param string   $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
2938
     *
2939
     * @psalm-pure
2940
     *
2941
     * @return string the decoded string
2942
     */
2943 34
    public static function html_entity_decode(
2944
        string $str,
2945
        int $flags = null,
2946
        string $encoding = 'UTF-8'
2947
    ): string {
2948
        if (
2949 34
            !isset($str[3]) // examples: &; || &x;
2950
            ||
2951 34
            \strpos($str, '&') === false // no "&"
2952
        ) {
2953 23
            return $str;
2954
        }
2955
2956 34
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
2957 9
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
2958
        }
2959
2960 34
        if ($flags === null) {
2961 11
            $flags = \ENT_QUOTES | \ENT_HTML5;
2962
        }
2963
2964
        if (
2965 34
            $encoding !== 'UTF-8'
2966
            &&
2967 34
            $encoding !== 'ISO-8859-1'
2968
            &&
2969 34
            $encoding !== 'WINDOWS-1252'
2970
            &&
2971 34
            self::$SUPPORT['mbstring'] === false
2972
        ) {
2973
            /**
2974
             * @psalm-suppress ImpureFunctionCall - is is only a warning
2975
             */
2976
            \trigger_error('UTF8::html_entity_decode() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
2977
        }
2978
2979
        do {
2980 34
            $str_compare = $str;
2981
2982 34
            if (\strpos($str, '&') !== false) {
2983 34
                if (\strpos($str, '&#') !== false) {
2984
                    // decode also numeric & UTF16 two byte entities
2985 25
                    $str = (string) \preg_replace(
2986 25
                        '/(&#(?:x0*[0-9a-fA-F]{2,6}(?![0-9a-fA-F;])|(?:0*\d{2,6}(?![0-9;]))))/S',
2987 25
                        '$1;',
2988 25
                        $str
2989
                    );
2990
                }
2991
2992 34
                $str = \html_entity_decode(
2993 34
                    $str,
2994 34
                    $flags,
2995 34
                    $encoding
2996
                );
2997
            }
2998 34
        } while ($str_compare !== $str);
2999
3000 34
        return $str;
3001
    }
3002
3003
    /**
3004
     * Create a escape html version of the string via "UTF8::htmlspecialchars()".
3005
     *
3006
     * @param string $str
3007
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
3008
     *
3009
     * @psalm-pure
3010
     *
3011
     * @return string
3012
     */
3013 6
    public static function html_escape(string $str, string $encoding = 'UTF-8'): string
3014
    {
3015 6
        return self::htmlspecialchars(
3016 6
            $str,
3017 6
            \ENT_QUOTES | \ENT_SUBSTITUTE,
3018
            $encoding
3019
        );
3020
    }
3021
3022
    /**
3023
     * Remove empty html-tag.
3024
     *
3025
     * e.g.: <pre><tag></tag></pre>
3026
     *
3027
     * @param string $str
3028
     *
3029
     * @psalm-pure
3030
     *
3031
     * @return string
3032
     */
3033 1
    public static function html_stripe_empty_tags(string $str): string
3034
    {
3035 1
        return (string) \preg_replace(
3036 1
            '/<[^\\/>]*?>\\s*?<\\/[^>]*?>/u',
3037 1
            '',
3038 1
            $str
3039
        );
3040
    }
3041
3042
    /**
3043
     * Convert all applicable characters to HTML entities: UTF-8 version of htmlentities().
3044
     *
3045
     * EXAMPLE: <code>UTF8::htmlentities('<白-öäü>'); // '&lt;&#30333;-&ouml;&auml;&uuml;&gt;'</code>
3046
     *
3047
     * @see http://php.net/manual/en/function.htmlentities.php
3048
     *
3049
     * @param string $str           <p>
3050
     *                              The input string.
3051
     *                              </p>
3052
     * @param int    $flags         [optional] <p>
3053
     *                              A bitmask of one or more of the following flags, which specify how to handle
3054
     *                              quotes, invalid code unit sequences and the used document type. The default is
3055
     *                              ENT_COMPAT | ENT_HTML401.
3056
     *                              <table>
3057
     *                              Available <i>flags</i> constants
3058
     *                              <tr valign="top">
3059
     *                              <td>Constant Name</td>
3060
     *                              <td>Description</td>
3061
     *                              </tr>
3062
     *                              <tr valign="top">
3063
     *                              <td><b>ENT_COMPAT</b></td>
3064
     *                              <td>Will convert double-quotes and leave single-quotes alone.</td>
3065
     *                              </tr>
3066
     *                              <tr valign="top">
3067
     *                              <td><b>ENT_QUOTES</b></td>
3068
     *                              <td>Will convert both double and single quotes.</td>
3069
     *                              </tr>
3070
     *                              <tr valign="top">
3071
     *                              <td><b>ENT_NOQUOTES</b></td>
3072
     *                              <td>Will leave both double and single quotes unconverted.</td>
3073
     *                              </tr>
3074
     *                              <tr valign="top">
3075
     *                              <td><b>ENT_IGNORE</b></td>
3076
     *                              <td>
3077
     *                              Silently discard invalid code unit sequences instead of returning
3078
     *                              an empty string. Using this flag is discouraged as it
3079
     *                              may have security implications.
3080
     *                              </td>
3081
     *                              </tr>
3082
     *                              <tr valign="top">
3083
     *                              <td><b>ENT_SUBSTITUTE</b></td>
3084
     *                              <td>
3085
     *                              Replace invalid code unit sequences with a Unicode Replacement Character
3086
     *                              U+FFFD (UTF-8) or &#38;#38;#FFFD; (otherwise) instead of returning an empty
3087
     *                              string.
3088
     *                              </td>
3089
     *                              </tr>
3090
     *                              <tr valign="top">
3091
     *                              <td><b>ENT_DISALLOWED</b></td>
3092
     *                              <td>
3093
     *                              Replace invalid code points for the given document type with a
3094
     *                              Unicode Replacement Character U+FFFD (UTF-8) or &#38;#38;#FFFD;
3095
     *                              (otherwise) instead of leaving them as is. This may be useful, for
3096
     *                              instance, to ensure the well-formedness of XML documents with
3097
     *                              embedded external content.
3098
     *                              </td>
3099
     *                              </tr>
3100
     *                              <tr valign="top">
3101
     *                              <td><b>ENT_HTML401</b></td>
3102
     *                              <td>
3103
     *                              Handle code as HTML 4.01.
3104
     *                              </td>
3105
     *                              </tr>
3106
     *                              <tr valign="top">
3107
     *                              <td><b>ENT_XML1</b></td>
3108
     *                              <td>
3109
     *                              Handle code as XML 1.
3110
     *                              </td>
3111
     *                              </tr>
3112
     *                              <tr valign="top">
3113
     *                              <td><b>ENT_XHTML</b></td>
3114
     *                              <td>
3115
     *                              Handle code as XHTML.
3116
     *                              </td>
3117
     *                              </tr>
3118
     *                              <tr valign="top">
3119
     *                              <td><b>ENT_HTML5</b></td>
3120
     *                              <td>
3121
     *                              Handle code as HTML 5.
3122
     *                              </td>
3123
     *                              </tr>
3124
     *                              </table>
3125
     *                              </p>
3126
     * @param string $encoding      [optional] <p>
3127
     *                              Like <b>htmlspecialchars</b>,
3128
     *                              <b>htmlentities</b> takes an optional third argument
3129
     *                              <i>encoding</i> which defines encoding used in
3130
     *                              conversion.
3131
     *                              Although this argument is technically optional, you are highly
3132
     *                              encouraged to specify the correct value for your code.
3133
     *                              </p>
3134
     * @param bool   $double_encode [optional] <p>
3135
     *                              When <i>double_encode</i> is turned off PHP will not
3136
     *                              encode existing html entities. The default is to convert everything.
3137
     *                              </p>
3138
     *
3139
     * @psalm-pure
3140
     *
3141
     * @return string
3142
     *                <p>
3143
     *                The encoded string.
3144
     *                <br><br>
3145
     *                If the input <i>string</i> contains an invalid code unit
3146
     *                sequence within the given <i>encoding</i> an empty string
3147
     *                will be returned, unless either the <b>ENT_IGNORE</b> or
3148
     *                <b>ENT_SUBSTITUTE</b> flags are set.
3149
     *                </p>
3150
     */
3151 9
    public static function htmlentities(
3152
        string $str,
3153
        int $flags = \ENT_COMPAT,
3154
        string $encoding = 'UTF-8',
3155
        bool $double_encode = true
3156
    ): string {
3157 9
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
3158 7
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
3159
        }
3160
3161 9
        $str = \htmlentities(
3162 9
            $str,
3163 9
            $flags,
3164 9
            $encoding,
3165 9
            $double_encode
3166
        );
3167
3168
        /**
3169
         * PHP doesn't replace a backslash to its html entity since this is something
3170
         * that's mostly used to escape characters when inserting in a database. Since
3171
         * we're using a decent database layer, we don't need this shit and we're replacing
3172
         * the double backslashes by its' html entity equivalent.
3173
         *
3174
         * https://github.com/forkcms/library/blob/master/spoon/filter/filter.php#L303
3175
         */
3176 9
        $str = \str_replace('\\', '&#92;', $str);
3177
3178 9
        return self::html_encode($str, true, $encoding);
3179
    }
3180
3181
    /**
3182
     * Convert only special characters to HTML entities: UTF-8 version of htmlspecialchars()
3183
     *
3184
     * INFO: Take a look at "UTF8::htmlentities()"
3185
     *
3186
     * EXAMPLE: <code>UTF8::htmlspecialchars('<白-öäü>'); // '&lt;白-öäü&gt;'</code>
3187
     *
3188
     * @see http://php.net/manual/en/function.htmlspecialchars.php
3189
     *
3190
     * @param string $str           <p>
3191
     *                              The string being converted.
3192
     *                              </p>
3193
     * @param int    $flags         [optional] <p>
3194
     *                              A bitmask of one or more of the following flags, which specify how to handle
3195
     *                              quotes, invalid code unit sequences and the used document type. The default is
3196
     *                              ENT_COMPAT | ENT_HTML401.
3197
     *                              <table>
3198
     *                              Available <i>flags</i> constants
3199
     *                              <tr valign="top">
3200
     *                              <td>Constant Name</td>
3201
     *                              <td>Description</td>
3202
     *                              </tr>
3203
     *                              <tr valign="top">
3204
     *                              <td><b>ENT_COMPAT</b></td>
3205
     *                              <td>Will convert double-quotes and leave single-quotes alone.</td>
3206
     *                              </tr>
3207
     *                              <tr valign="top">
3208
     *                              <td><b>ENT_QUOTES</b></td>
3209
     *                              <td>Will convert both double and single quotes.</td>
3210
     *                              </tr>
3211
     *                              <tr valign="top">
3212
     *                              <td><b>ENT_NOQUOTES</b></td>
3213
     *                              <td>Will leave both double and single quotes unconverted.</td>
3214
     *                              </tr>
3215
     *                              <tr valign="top">
3216
     *                              <td><b>ENT_IGNORE</b></td>
3217
     *                              <td>
3218
     *                              Silently discard invalid code unit sequences instead of returning
3219
     *                              an empty string. Using this flag is discouraged as it
3220
     *                              may have security implications.
3221
     *                              </td>
3222
     *                              </tr>
3223
     *                              <tr valign="top">
3224
     *                              <td><b>ENT_SUBSTITUTE</b></td>
3225
     *                              <td>
3226
     *                              Replace invalid code unit sequences with a Unicode Replacement Character
3227
     *                              U+FFFD (UTF-8) or &#38;#38;#FFFD; (otherwise) instead of returning an empty
3228
     *                              string.
3229
     *                              </td>
3230
     *                              </tr>
3231
     *                              <tr valign="top">
3232
     *                              <td><b>ENT_DISALLOWED</b></td>
3233
     *                              <td>
3234
     *                              Replace invalid code points for the given document type with a
3235
     *                              Unicode Replacement Character U+FFFD (UTF-8) or &#38;#38;#FFFD;
3236
     *                              (otherwise) instead of leaving them as is. This may be useful, for
3237
     *                              instance, to ensure the well-formedness of XML documents with
3238
     *                              embedded external content.
3239
     *                              </td>
3240
     *                              </tr>
3241
     *                              <tr valign="top">
3242
     *                              <td><b>ENT_HTML401</b></td>
3243
     *                              <td>
3244
     *                              Handle code as HTML 4.01.
3245
     *                              </td>
3246
     *                              </tr>
3247
     *                              <tr valign="top">
3248
     *                              <td><b>ENT_XML1</b></td>
3249
     *                              <td>
3250
     *                              Handle code as XML 1.
3251
     *                              </td>
3252
     *                              </tr>
3253
     *                              <tr valign="top">
3254
     *                              <td><b>ENT_XHTML</b></td>
3255
     *                              <td>
3256
     *                              Handle code as XHTML.
3257
     *                              </td>
3258
     *                              </tr>
3259
     *                              <tr valign="top">
3260
     *                              <td><b>ENT_HTML5</b></td>
3261
     *                              <td>
3262
     *                              Handle code as HTML 5.
3263
     *                              </td>
3264
     *                              </tr>
3265
     *                              </table>
3266
     *                              </p>
3267
     * @param string $encoding      [optional] <p>
3268
     *                              Defines encoding used in conversion.
3269
     *                              </p>
3270
     *                              <p>
3271
     *                              For the purposes of this function, the encodings
3272
     *                              ISO-8859-1, ISO-8859-15,
3273
     *                              UTF-8, cp866,
3274
     *                              cp1251, cp1252, and
3275
     *                              KOI8-R are effectively equivalent, provided the
3276
     *                              <i>string</i> itself is valid for the encoding, as
3277
     *                              the characters affected by <b>htmlspecialchars</b> occupy
3278
     *                              the same positions in all of these encodings.
3279
     *                              </p>
3280
     * @param bool   $double_encode [optional] <p>
3281
     *                              When <i>double_encode</i> is turned off PHP will not
3282
     *                              encode existing html entities, the default is to convert everything.
3283
     *                              </p>
3284
     *
3285
     * @psalm-pure
3286
     *
3287
     * @return string the converted string.
3288
     *                </p>
3289
     *                <p>
3290
     *                If the input <i>string</i> contains an invalid code unit
3291
     *                sequence within the given <i>encoding</i> an empty string
3292
     *                will be returned, unless either the <b>ENT_IGNORE</b> or
3293
     *                <b>ENT_SUBSTITUTE</b> flags are set
3294
     */
3295 8
    public static function htmlspecialchars(
3296
        string $str,
3297
        int $flags = \ENT_COMPAT,
3298
        string $encoding = 'UTF-8',
3299
        bool $double_encode = true
3300
    ): string {
3301 8
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
3302 8
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
3303
        }
3304
3305 8
        return \htmlspecialchars(
3306 8
            $str,
3307 8
            $flags,
3308 8
            $encoding,
3309 8
            $double_encode
3310
        );
3311
    }
3312
3313
    /**
3314
     * Checks whether iconv is available on the server.
3315
     *
3316
     * @psalm-pure
3317
     *
3318
     * @return bool
3319
     *              <p><strong>true</strong> if available, <strong>false</strong> otherwise</p>
3320
     *
3321
     * @internal <p>Please do not use it anymore, we will make is private in next major version.</p>
3322
     */
3323
    public static function iconv_loaded(): bool
3324
    {
3325
        return \extension_loaded('iconv');
3326
    }
3327
3328
    /**
3329
     * Converts Integer to hexadecimal U+xxxx code point representation.
3330
     *
3331
     * INFO: opposite to UTF8::hex_to_int()
3332
     *
3333
     * EXAMPLE: <code>UTF8::int_to_hex(241); // 'U+00f1'</code>
3334
     *
3335
     * @param int    $int    <p>The integer to be converted to hexadecimal code point.</p>
3336
     * @param string $prefix [optional]
3337
     *
3338
     * @psalm-pure
3339
     *
3340
     * @return string the code point, or empty string on failure
3341
     */
3342 6
    public static function int_to_hex(int $int, string $prefix = 'U+'): string
3343
    {
3344 6
        $hex = \dechex($int);
3345
3346 6
        $hex = (\strlen($hex) < 4 ? \substr('0000' . $hex, -4) : $hex);
3347
3348 6
        return $prefix . $hex . '';
3349
    }
3350
3351
    /**
3352
     * Checks whether intl-char is available on the server.
3353
     *
3354
     * @psalm-pure
3355
     *
3356
     * @return bool
3357
     *              <p><strong>true</strong> if available, <strong>false</strong> otherwise</p>
3358
     *
3359
     * @internal <p>Please do not use it anymore, we will make is private in next major version.</p>
3360
     */
3361
    public static function intlChar_loaded(): bool
3362
    {
3363
        return \class_exists('IntlChar');
3364
    }
3365
3366
    /**
3367
     * Checks whether intl is available on the server.
3368
     *
3369
     * @psalm-pure
3370
     *
3371
     * @return bool
3372
     *              <p><strong>true</strong> if available, <strong>false</strong> otherwise</p>
3373
     *
3374
     * @internal <p>Please do not use it anymore, we will make is private in next major version.</p>
3375
     */
3376 5
    public static function intl_loaded(): bool
3377
    {
3378 5
        return \extension_loaded('intl');
3379
    }
3380
3381
    /**
3382
     * Returns true if the string contains only alphabetic chars, false otherwise.
3383
     *
3384
     * @param string $str <p>The input string.</p>
3385
     *
3386
     * @psalm-pure
3387
     *
3388
     * @return bool
3389
     *              <p>Whether or not $str contains only alphabetic chars.</p>
3390
     */
3391 10
    public static function is_alpha(string $str): bool
3392
    {
3393 10
        if (self::$SUPPORT['mbstring'] === true) {
3394 10
            return \mb_ereg_match('^[[:alpha:]]*$', $str);
3395
        }
3396
3397
        return self::str_matches_pattern($str, '^[[:alpha:]]*$');
3398
    }
3399
3400
    /**
3401
     * Returns true if the string contains only alphabetic and numeric chars, false otherwise.
3402
     *
3403
     * @param string $str <p>The input string.</p>
3404
     *
3405
     * @psalm-pure
3406
     *
3407
     * @return bool
3408
     *              <p>Whether or not $str contains only alphanumeric chars.</p>
3409
     */
3410 13
    public static function is_alphanumeric(string $str): bool
3411
    {
3412 13
        if (self::$SUPPORT['mbstring'] === true) {
3413 13
            return \mb_ereg_match('^[[:alnum:]]*$', $str);
3414
        }
3415
3416
        return self::str_matches_pattern($str, '^[[:alnum:]]*$');
3417
    }
3418
3419
    /**
3420
     * Returns true if the string contains only punctuation chars, false otherwise.
3421
     *
3422
     * @param string $str <p>The input string.</p>
3423
     *
3424
     * @psalm-pure
3425
     *
3426
     * @return bool
3427
     *              <p>Whether or not $str contains only punctuation chars.</p>
3428
     */
3429 10
    public static function is_punctuation(string $str): bool
3430
    {
3431 10
        return self::str_matches_pattern($str, '^[[:punct:]]*$');
3432
    }
3433
3434
    /**
3435
     * Returns true if the string contains only printable (non-invisible) chars, false otherwise.
3436
     *
3437
     * @param string $str                       <p>The input string.</p>
3438
     * @param bool   $ignore_control_characters [optional] <p>Ignore control characters like [LRM] or [LSEP].</p>
3439
     *
3440
     * @psalm-pure
3441
     *
3442
     * @return bool
3443
     *              <p>Whether or not $str contains only printable (non-invisible) chars.</p>
3444
     */
3445 1
    public static function is_printable(string $str, bool $ignore_control_characters = false): bool
3446
    {
3447 1
        return self::remove_invisible_characters($str, false, '', $ignore_control_characters) === $str;
3448
    }
3449
3450
    /**
3451
     * Checks if a string is 7 bit ASCII.
3452
     *
3453
     * EXAMPLE: <code>UTF8::is_ascii('白'); // false</code>
3454
     *
3455
     * @param string $str <p>The string to check.</p>
3456
     *
3457
     * @psalm-pure
3458
     *
3459
     * @return bool
3460
     *              <p>
3461
     *              <strong>true</strong> if it is ASCII<br>
3462
     *              <strong>false</strong> otherwise
3463
     *              </p>
3464
     */
3465 8
    public static function is_ascii(string $str): bool
3466
    {
3467 8
        return ASCII::is_ascii($str);
3468
    }
3469
3470
    /**
3471
     * Returns true if the string is base64 encoded, false otherwise.
3472
     *
3473
     * EXAMPLE: <code>UTF8::is_base64('4KSu4KWL4KSo4KS/4KSa'); // true</code>
3474
     *
3475
     * @param string|null $str                   <p>The input string.</p>
3476
     * @param bool        $empty_string_is_valid [optional] <p>Is an empty string valid base64 or not?</p>
3477
     *
3478
     * @psalm-pure
3479
     *
3480
     * @return bool
3481
     *              <p>Whether or not $str is base64 encoded.</p>
3482
     */
3483 16
    public static function is_base64($str, bool $empty_string_is_valid = false): bool
3484
    {
3485
        if (
3486 16
            !$empty_string_is_valid
3487
            &&
3488 16
            $str === ''
3489
        ) {
3490 3
            return false;
3491
        }
3492
3493 15
        if (!\is_string($str)) {
3494 2
            return false;
3495
        }
3496
3497 15
        $base64String = \base64_decode($str, true);
3498
3499 15
        return $base64String !== false && \base64_encode($base64String) === $str;
3500
    }
3501
3502
    /**
3503
     * Check if the input is binary... (is look like a hack).
3504
     *
3505
     * EXAMPLE: <code>UTF8::is_binary(01); // true</code>
3506
     *
3507
     * @param int|string $input
3508
     * @param bool       $strict
3509
     *
3510
     * @psalm-pure
3511
     *
3512
     * @return bool
3513
     */
3514 39
    public static function is_binary($input, bool $strict = false): bool
3515
    {
3516 39
        $input = (string) $input;
3517 39
        if ($input === '') {
3518 10
            return false;
3519
        }
3520
3521 39
        if (\preg_match('~^[01]+$~', $input)) {
3522 13
            return true;
3523
        }
3524
3525 39
        $ext = self::get_file_type($input);
3526 39
        if ($ext['type'] === 'binary') {
3527 7
            return true;
3528
        }
3529
3530 38
        if (!$strict) {
3531 7
            $test_length = \strlen($input);
3532 7
            $test_null_counting = \substr_count($input, "\x0", 0, $test_length);
3533 7
            if (($test_null_counting / $test_length) > 0.25) {
3534 5
                return true;
3535
            }
3536
        }
3537
3538 38
        if ($strict) {
3539 38
            if (self::$SUPPORT['finfo'] === false) {
3540
                throw new \RuntimeException('ext-fileinfo: is not installed');
3541
            }
3542
3543
            /**
3544
             * @psalm-suppress ImpureMethodCall - it will return the same result for the same file ...
3545
             */
3546 38
            $finfo_encoding = (new \finfo(\FILEINFO_MIME_ENCODING))->buffer($input);
3547 38
            if ($finfo_encoding && $finfo_encoding === 'binary') {
3548 20
                return true;
3549
            }
3550
        }
3551
3552 33
        return false;
3553
    }
3554
3555
    /**
3556
     * Check if the file is binary.
3557
     *
3558
     * EXAMPLE: <code>UTF8::is_binary('./utf32.txt'); // true</code>
3559
     *
3560
     * @param string $file
3561
     *
3562
     * @return bool
3563
     */
3564 6
    public static function is_binary_file($file): bool
3565
    {
3566
        // init
3567 6
        $block = '';
3568
3569 6
        $fp = \fopen($file, 'rb');
3570 6
        if (\is_resource($fp)) {
3571 6
            $block = \fread($fp, 512);
3572 6
            \fclose($fp);
3573
        }
3574
3575 6
        if ($block === '' || $block === false) {
3576 2
            return false;
3577
        }
3578
3579 6
        return self::is_binary($block, true);
3580
    }
3581
3582
    /**
3583
     * Returns true if the string contains only whitespace chars, false otherwise.
3584
     *
3585
     * @param string $str <p>The input string.</p>
3586
     *
3587
     * @psalm-pure
3588
     *
3589
     * @return bool
3590
     *              <p>Whether or not $str contains only whitespace characters.</p>
3591
     */
3592 15
    public static function is_blank(string $str): bool
3593
    {
3594 15
        if (self::$SUPPORT['mbstring'] === true) {
3595 15
            return \mb_ereg_match('^[[:space:]]*$', $str);
3596
        }
3597
3598
        return self::str_matches_pattern($str, '^[[:space:]]*$');
3599
    }
3600
3601
    /**
3602
     * Checks if the given string is equal to any "Byte Order Mark".
3603
     *
3604
     * WARNING: Use "UTF8::string_has_bom()" if you will check BOM in a string.
3605
     *
3606
     * EXAMPLE: <code>UTF8::is_bom("\xef\xbb\xbf"); // true</code>
3607
     *
3608
     * @param string $str <p>The input string.</p>
3609
     *
3610
     * @psalm-pure
3611
     *
3612
     * @return bool
3613
     *              <p><strong>true</strong> if the $utf8_chr is Byte Order Mark, <strong>false</strong> otherwise.</p>
3614
     */
3615 2
    public static function is_bom($str): bool
3616
    {
3617
        /** @noinspection PhpUnusedLocalVariableInspection */
3618 2
        foreach (self::$BOM as $bom_string => &$bom_byte_length) {
3619 2
            if ($str === $bom_string) {
3620 2
                return true;
3621
            }
3622
        }
3623
3624 2
        return false;
3625
    }
3626
3627
    /**
3628
     * Determine whether the string is considered to be empty.
3629
     *
3630
     * A variable is considered empty if it does not exist or if its value equals FALSE.
3631
     * empty() does not generate a warning if the variable does not exist.
3632
     *
3633
     * @param array|float|int|string $str
3634
     *
3635
     * @psalm-pure
3636
     *
3637
     * @return bool
3638
     *              <p>Whether or not $str is empty().</p>
3639
     */
3640 1
    public static function is_empty($str): bool
3641
    {
3642 1
        return empty($str);
3643
    }
3644
3645
    /**
3646
     * Returns true if the string contains only hexadecimal chars, false otherwise.
3647
     *
3648
     * @param string $str <p>The input string.</p>
3649
     *
3650
     * @psalm-pure
3651
     *
3652
     * @return bool
3653
     *              <p>Whether or not $str contains only hexadecimal chars.</p>
3654
     */
3655 13
    public static function is_hexadecimal(string $str): bool
3656
    {
3657 13
        if (self::$SUPPORT['mbstring'] === true) {
3658 13
            return \mb_ereg_match('^[[:xdigit:]]*$', $str);
3659
        }
3660
3661
        return self::str_matches_pattern($str, '^[[:xdigit:]]*$');
3662
    }
3663
3664
    /**
3665
     * Check if the string contains any HTML tags.
3666
     *
3667
     * EXAMPLE: <code>UTF8::is_html('<b>lall</b>'); // true</code>
3668
     *
3669
     * @param string $str <p>The input string.</p>
3670
     *
3671
     * @psalm-pure
3672
     *
3673
     * @return bool
3674
     *              <p>Whether or not $str contains html elements.</p>
3675
     */
3676 3
    public static function is_html(string $str): bool
3677
    {
3678 3
        if ($str === '') {
3679 3
            return false;
3680
        }
3681
3682
        // init
3683 3
        $matches = [];
3684
3685 3
        $str = self::emoji_encode($str); // hack for emoji support :/
3686
3687 3
        \preg_match("/<\\/?\\w+(?:(?:\\s+\\w+(?:\\s*=\\s*(?:\".*?\"|'.*?'|[^'\">\\s]+))?)*\\s*|\\s*)\\/?>/u", $str, $matches);
3688
3689 3
        return $matches !== [];
3690
    }
3691
3692
    /**
3693
     * Check if $url is an correct url.
3694
     *
3695
     * @param string $url
3696
     * @param bool   $disallow_localhost
3697
     *
3698
     * @psalm-pure
3699
     *
3700
     * @return bool
3701
     */
3702 1
    public static function is_url(string $url, bool $disallow_localhost = false): bool
3703
    {
3704 1
        if ($url === '') {
3705 1
            return false;
3706
        }
3707
3708
        // WARNING: keep this as hack protection
3709 1
        if (!self::str_istarts_with_any($url, ['http://', 'https://'])) {
3710 1
            return false;
3711
        }
3712
3713
        // e.g. -> the server itself connect to "https://foo.localhost/phpmyadmin/...
3714 1
        if ($disallow_localhost) {
3715 1
            if (self::str_istarts_with_any(
3716 1
                $url,
3717
                [
3718 1
                    'http://localhost',
3719
                    'https://localhost',
3720
                    'http://127.0.0.1',
3721
                    'https://127.0.0.1',
3722
                    'http://::1',
3723
                    'https://::1',
3724
                ]
3725
            )) {
3726 1
                return false;
3727
            }
3728
3729 1
            $regex = '/^(?:http(?:s)?:\/\/).*?(?:\.localhost)/iu';
3730 1
            if (\preg_match($regex, $url)) {
3731 1
                return false;
3732
            }
3733
        }
3734
3735
        // INFO: this is needed for e.g. "http://müller.de/" (internationalized domain names) and non ASCII-parameters
3736 1
        $regex = '/^(?:http(?:s)?:\\/\\/)(?:[\p{L}0-9][\p{L}0-9_-]*(?:\\.[\p{L}0-9][\p{L}0-9_-]*))(?:\\d+)?(?:\\/\\.*)?/iu';
3737 1
        if (\preg_match($regex, $url)) {
3738 1
            return true;
3739
        }
3740
3741 1
        return \filter_var($url, \FILTER_VALIDATE_URL) !== false;
3742
    }
3743
3744
    /**
3745
     * Try to check if "$str" is a JSON-string.
3746
     *
3747
     * EXAMPLE: <code>UTF8::is_json('{"array":[1,"¥","ä"]}'); // true</code>
3748
     *
3749
     * @param string $str                                    <p>The input string.</p>
3750
     * @param bool   $only_array_or_object_results_are_valid [optional] <p>Only array and objects are valid json
3751
     *                                                       results.</p>
3752
     *
3753
     * @return bool
3754
     *              <p>Whether or not the $str is in JSON format.</p>
3755
     */
3756 42
    public static function is_json(string $str, bool $only_array_or_object_results_are_valid = true): bool
3757
    {
3758 42
        if ($str === '') {
3759 4
            return false;
3760
        }
3761
3762 40
        if (self::$SUPPORT['json'] === false) {
3763
            throw new \RuntimeException('ext-json: is not installed');
3764
        }
3765
3766 40
        $jsonOrNull = self::json_decode($str);
3767 40
        if ($jsonOrNull === null && \strtoupper($str) !== 'NULL') {
3768 18
            return false;
3769
        }
3770
3771
        if (
3772 24
            $only_array_or_object_results_are_valid
3773
            &&
3774 24
            !\is_object($jsonOrNull)
3775
            &&
3776 24
            !\is_array($jsonOrNull)
3777
        ) {
3778 5
            return false;
3779
        }
3780
3781 19
        return \json_last_error() === \JSON_ERROR_NONE;
3782
    }
3783
3784
    /**
3785
     * @param string $str <p>The input string.</p>
3786
     *
3787
     * @psalm-pure
3788
     *
3789
     * @return bool
3790
     *              <p>Whether or not $str contains only lowercase chars.</p>
3791
     */
3792 8
    public static function is_lowercase(string $str): bool
3793
    {
3794 8
        if (self::$SUPPORT['mbstring'] === true) {
3795 8
            return \mb_ereg_match('^[[:lower:]]*$', $str);
3796
        }
3797
3798
        return self::str_matches_pattern($str, '^[[:lower:]]*$');
3799
    }
3800
3801
    /**
3802
     * Returns true if the string is serialized, false otherwise.
3803
     *
3804
     * @param string $str <p>The input string.</p>
3805
     *
3806
     * @psalm-pure
3807
     *
3808
     * @return bool
3809
     *              <p>Whether or not $str is serialized.</p>
3810
     */
3811 7
    public static function is_serialized(string $str): bool
3812
    {
3813 7
        if ($str === '') {
3814 1
            return false;
3815
        }
3816
3817
        /** @noinspection PhpUsageOfSilenceOperatorInspection */
3818
        /** @noinspection UnserializeExploitsInspection */
3819 6
        return $str === 'b:0;'
3820
               ||
3821 6
               @\unserialize($str, []) !== false;
3822
    }
3823
3824
    /**
3825
     * Returns true if the string contains only lower case chars, false
3826
     * otherwise.
3827
     *
3828
     * @param string $str <p>The input string.</p>
3829
     *
3830
     * @psalm-pure
3831
     *
3832
     * @return bool
3833
     *              <p>Whether or not $str contains only lower case characters.</p>
3834
     */
3835 8
    public static function is_uppercase(string $str): bool
3836
    {
3837 8
        if (self::$SUPPORT['mbstring'] === true) {
3838 8
            return \mb_ereg_match('^[[:upper:]]*$', $str);
3839
        }
3840
3841
        return self::str_matches_pattern($str, '^[[:upper:]]*$');
3842
    }
3843
3844
    /**
3845
     * Check if the string is UTF-16.
3846
     *
3847
     * EXAMPLE: <code>
3848
     * UTF8::is_utf16(file_get_contents('utf-16-le.txt')); // 1
3849
     * //
3850
     * UTF8::is_utf16(file_get_contents('utf-16-be.txt')); // 2
3851
     * //
3852
     * UTF8::is_utf16(file_get_contents('utf-8.txt')); // false
3853
     * </code>
3854
     *
3855
     * @param string $str                       <p>The input string.</p>
3856
     * @param bool   $check_if_string_is_binary
3857
     *
3858
     * @psalm-pure
3859
     *
3860
     * @return false|int
3861
     *                   <strong>false</strong> if is't not UTF-16,<br>
3862
     *                   <strong>1</strong> for UTF-16LE,<br>
3863
     *                   <strong>2</strong> for UTF-16BE
3864
     */
3865 21
    public static function is_utf16($str, bool $check_if_string_is_binary = true)
3866
    {
3867
        // init
3868 21
        $str = (string) $str;
3869 21
        $str_chars = [];
3870
3871
        // fix for the "binary"-check
3872 21
        if ($check_if_string_is_binary !== false && self::string_has_bom($str)) {
3873 2
            $check_if_string_is_binary = false;
3874
        }
3875
3876
        if (
3877 21
            $check_if_string_is_binary
3878
            &&
3879 21
            !self::is_binary($str, true)
3880
        ) {
3881 2
            return false;
3882
        }
3883
3884 21
        if (self::$SUPPORT['mbstring'] === false) {
3885
            /**
3886
             * @psalm-suppress ImpureFunctionCall - is is only a warning
3887
             */
3888 3
            \trigger_error('UTF8::is_utf16() without mbstring may did not work correctly', \E_USER_WARNING);
3889
        }
3890
3891 21
        $str = self::remove_bom($str);
3892
3893 21
        $maybe_utf16le = 0;
3894 21
        $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-16LE');
3895 21
        if ($test) {
3896 15
            $test2 = \mb_convert_encoding($test, 'UTF-16LE', 'UTF-8');
3897 15
            $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-16LE');
3898 15
            if ($test3 === $test) {
3899
                /**
3900
                 * @psalm-suppress RedundantCondition
3901
                 */
3902 15
                if ($str_chars === []) {
3903 15
                    $str_chars = self::count_chars($str, true, false);
3904
                }
3905 15
                foreach (self::count_chars($test3) as $test3char => &$test3charEmpty) {
0 ignored issues
show
Bug introduced by
The expression self::count_chars($test3) cannot be used as a reference.

Let?s assume that you have the following foreach statement:

foreach ($array as &$itemValue) { }

$itemValue is assigned by reference. This is possible because the expression (in the example $array) can be used as a reference target.

However, if we were to replace $array with something different like the result of a function call as in

foreach (getArray() as &$itemValue) { }

then assigning by reference is not possible anymore as there is no target that could be modified.

Available Fixes

1. Do not assign by reference
foreach (getArray() as $itemValue) { }
2. Assign to a local variable first
$array = getArray();
foreach ($array as &$itemValue) {}
3. Return a reference
function &getArray() { $array = array(); return $array; }

foreach (getArray() as &$itemValue) { }
Loading history...
Bug introduced by
It seems like $test3 can also be of type array; however, parameter $str of voku\helper\UTF8::count_chars() does only seem to accept string, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

3905
                foreach (self::count_chars(/** @scrutinizer ignore-type */ $test3) as $test3char => &$test3charEmpty) {
Loading history...
3906 15
                    if (\in_array($test3char, $str_chars, true)) {
3907 5
                        ++$maybe_utf16le;
3908
                    }
3909
                }
3910 15
                unset($test3charEmpty);
3911
            }
3912
        }
3913
3914 21
        $maybe_utf16be = 0;
3915 21
        $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-16BE');
3916 21
        if ($test) {
3917 15
            $test2 = \mb_convert_encoding($test, 'UTF-16BE', 'UTF-8');
3918 15
            $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-16BE');
3919 15
            if ($test3 === $test) {
3920 15
                if ($str_chars === []) {
3921 7
                    $str_chars = self::count_chars($str, true, false);
3922
                }
3923 15
                foreach (self::count_chars($test3) as $test3char => &$test3charEmpty) {
0 ignored issues
show
Bug introduced by
The expression self::count_chars($test3) cannot be used as a reference.

Let?s assume that you have the following foreach statement:

foreach ($array as &$itemValue) { }

$itemValue is assigned by reference. This is possible because the expression (in the example $array) can be used as a reference target.

However, if we were to replace $array with something different like the result of a function call as in

foreach (getArray() as &$itemValue) { }

then assigning by reference is not possible anymore as there is no target that could be modified.

Available Fixes

1. Do not assign by reference
foreach (getArray() as $itemValue) { }
2. Assign to a local variable first
$array = getArray();
foreach ($array as &$itemValue) {}
3. Return a reference
function &getArray() { $array = array(); return $array; }

foreach (getArray() as &$itemValue) { }
Loading history...
3924 15
                    if (\in_array($test3char, $str_chars, true)) {
3925 6
                        ++$maybe_utf16be;
3926
                    }
3927
                }
3928 15
                unset($test3charEmpty);
3929
            }
3930
        }
3931
3932 21
        if ($maybe_utf16be !== $maybe_utf16le) {
3933 7
            if ($maybe_utf16le > $maybe_utf16be) {
3934 5
                return 1;
3935
            }
3936
3937 6
            return 2;
3938
        }
3939
3940 17
        return false;
3941
    }
3942
3943
    /**
3944
     * Check if the string is UTF-32.
3945
     *
3946
     * EXAMPLE: <code>
3947
     * UTF8::is_utf32(file_get_contents('utf-32-le.txt')); // 1
3948
     * //
3949
     * UTF8::is_utf32(file_get_contents('utf-32-be.txt')); // 2
3950
     * //
3951
     * UTF8::is_utf32(file_get_contents('utf-8.txt')); // false
3952
     * </code>
3953
     *
3954
     * @param string $str                       <p>The input string.</p>
3955
     * @param bool   $check_if_string_is_binary
3956
     *
3957
     * @psalm-pure
3958
     *
3959
     * @return false|int
3960
     *                   <strong>false</strong> if is't not UTF-32,<br>
3961
     *                   <strong>1</strong> for UTF-32LE,<br>
3962
     *                   <strong>2</strong> for UTF-32BE
3963
     */
3964 19
    public static function is_utf32($str, bool $check_if_string_is_binary = true)
3965
    {
3966
        // init
3967 19
        $str = (string) $str;
3968 19
        $str_chars = [];
3969
3970
        // fix for the "binary"-check
3971 19
        if ($check_if_string_is_binary !== false && self::string_has_bom($str)) {
3972 2
            $check_if_string_is_binary = false;
3973
        }
3974
3975
        if (
3976 19
            $check_if_string_is_binary
3977
            &&
3978 19
            !self::is_binary($str, true)
3979
        ) {
3980 2
            return false;
3981
        }
3982
3983 19
        if (self::$SUPPORT['mbstring'] === false) {
3984
            /**
3985
             * @psalm-suppress ImpureFunctionCall - is is only a warning
3986
             */
3987 3
            \trigger_error('UTF8::is_utf32() without mbstring may did not work correctly', \E_USER_WARNING);
3988
        }
3989
3990 19
        $str = self::remove_bom($str);
3991
3992 19
        $maybe_utf32le = 0;
3993 19
        $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-32LE');
3994 19
        if ($test) {
3995 13
            $test2 = \mb_convert_encoding($test, 'UTF-32LE', 'UTF-8');
3996 13
            $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-32LE');
3997 13
            if ($test3 === $test) {
3998
                /**
3999
                 * @psalm-suppress RedundantCondition
4000
                 */
4001 13
                if ($str_chars === []) {
4002 13
                    $str_chars = self::count_chars($str, true, false);
4003
                }
4004 13
                foreach (self::count_chars($test3) as $test3char => &$test3charEmpty) {
0 ignored issues
show
Bug introduced by
The expression self::count_chars($test3) cannot be used as a reference.

Let?s assume that you have the following foreach statement:

foreach ($array as &$itemValue) { }

$itemValue is assigned by reference. This is possible because the expression (in the example $array) can be used as a reference target.

However, if we were to replace $array with something different like the result of a function call as in

foreach (getArray() as &$itemValue) { }

then assigning by reference is not possible anymore as there is no target that could be modified.

Available Fixes

1. Do not assign by reference
foreach (getArray() as $itemValue) { }
2. Assign to a local variable first
$array = getArray();
foreach ($array as &$itemValue) {}
3. Return a reference
function &getArray() { $array = array(); return $array; }

foreach (getArray() as &$itemValue) { }
Loading history...
Bug introduced by
It seems like $test3 can also be of type array; however, parameter $str of voku\helper\UTF8::count_chars() does only seem to accept string, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

4004
                foreach (self::count_chars(/** @scrutinizer ignore-type */ $test3) as $test3char => &$test3charEmpty) {
Loading history...
4005 13
                    if (\in_array($test3char, $str_chars, true)) {
4006 2
                        ++$maybe_utf32le;
4007
                    }
4008
                }
4009 13
                unset($test3charEmpty);
4010
            }
4011
        }
4012
4013 19
        $maybe_utf32be = 0;
4014 19
        $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-32BE');
4015 19
        if ($test) {
4016 13
            $test2 = \mb_convert_encoding($test, 'UTF-32BE', 'UTF-8');
4017 13
            $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-32BE');
4018 13
            if ($test3 === $test) {
4019 13
                if ($str_chars === []) {
4020 7
                    $str_chars = self::count_chars($str, true, false);
4021
                }
4022 13
                foreach (self::count_chars($test3) as $test3char => &$test3charEmpty) {
0 ignored issues
show
Bug introduced by
The expression self::count_chars($test3) cannot be used as a reference.

Let?s assume that you have the following foreach statement:

foreach ($array as &$itemValue) { }

$itemValue is assigned by reference. This is possible because the expression (in the example $array) can be used as a reference target.

However, if we were to replace $array with something different like the result of a function call as in

foreach (getArray() as &$itemValue) { }

then assigning by reference is not possible anymore as there is no target that could be modified.

Available Fixes

1. Do not assign by reference
foreach (getArray() as $itemValue) { }
2. Assign to a local variable first
$array = getArray();
foreach ($array as &$itemValue) {}
3. Return a reference
function &getArray() { $array = array(); return $array; }

foreach (getArray() as &$itemValue) { }
Loading history...
4023 13
                    if (\in_array($test3char, $str_chars, true)) {
4024 3
                        ++$maybe_utf32be;
4025
                    }
4026
                }
4027 13
                unset($test3charEmpty);
4028
            }
4029
        }
4030
4031 19
        if ($maybe_utf32be !== $maybe_utf32le) {
4032 3
            if ($maybe_utf32le > $maybe_utf32be) {
4033 2
                return 1;
4034
            }
4035
4036 3
            return 2;
4037
        }
4038
4039 19
        return false;
4040
    }
4041
4042
    /**
4043
     * Checks whether the passed input contains only byte sequences that appear valid UTF-8.
4044
     *
4045
     * EXAMPLE: <code>
4046
     * UTF8::is_utf8(['Iñtërnâtiônàlizætiøn', 'foo']); // true
4047
     * //
4048
     * UTF8::is_utf8(["Iñtërnâtiônàlizætiøn\xA0\xA1", 'bar']); // false
4049
     * </code>
4050
     *
4051
     * @param int|string|string[]|null $str    <p>The input to be checked.</p>
4052
     * @param bool                     $strict <p>Check also if the string is not UTF-16 or UTF-32.</p>
4053
     *
4054
     * @psalm-pure
4055
     *
4056
     * @return bool
4057
     */
4058 83
    public static function is_utf8($str, bool $strict = false): bool
4059
    {
4060 83
        if (\is_array($str)) {
4061 2
            foreach ($str as &$v) {
4062 2
                if (!self::is_utf8($v, $strict)) {
4063 2
                    return false;
4064
                }
4065
            }
4066
4067
            return true;
4068
        }
4069
4070 83
        return self::is_utf8_string((string) $str, $strict);
4071
    }
4072
4073
    /**
4074
     * (PHP 5 &gt;= 5.2.0, PECL json &gt;= 1.2.0)<br/>
4075
     * Decodes a JSON string
4076
     *
4077
     * EXAMPLE: <code>UTF8::json_decode('[1,"\u00a5","\u00e4"]'); // array(1, '¥', 'ä')</code>
4078
     *
4079
     * @see http://php.net/manual/en/function.json-decode.php
4080
     *
4081
     * @param string $json    <p>
4082
     *                        The <i>json</i> string being decoded.
4083
     *                        </p>
4084
     *                        <p>
4085
     *                        This function only works with UTF-8 encoded strings.
4086
     *                        </p>
4087
     *                        <p>PHP implements a superset of
4088
     *                        JSON - it will also encode and decode scalar types and <b>NULL</b>. The JSON standard
4089
     *                        only supports these values when they are nested inside an array or an object.
4090
     *                        </p>
4091
     * @param bool   $assoc   [optional] <p>
4092
     *                        When <b>TRUE</b>, returned objects will be converted into
4093
     *                        associative arrays.
4094
     *                        </p>
4095
     * @param int    $depth   [optional] <p>
4096
     *                        User specified recursion depth.
4097
     *                        </p>
4098
     * @param int    $options [optional] <p>
4099
     *                        Bitmask of JSON decode options. Currently only
4100
     *                        <b>JSON_BIGINT_AS_STRING</b>
4101
     *                        is supported (default is to cast large integers as floats)
4102
     *                        </p>
4103
     *
4104
     * @psalm-pure
4105
     *
4106
     * @return mixed
4107
     *               <p>The value encoded in <i>json</i> in appropriate PHP type. Values true, false and
4108
     *               null (case-insensitive) are returned as <b>TRUE</b>, <b>FALSE</b> and <b>NULL</b> respectively.
4109
     *               <b>NULL</b> is returned if the <i>json</i> cannot be decoded or if the encoded data
4110
     *               is deeper than the recursion limit.</p>
4111
     */
4112 43
    public static function json_decode(
4113
        string $json,
4114
        bool $assoc = false,
4115
        int $depth = 512,
4116
        int $options = 0
4117
    ) {
4118 43
        $json = self::filter($json);
4119
4120 43
        if (self::$SUPPORT['json'] === false) {
4121
            throw new \RuntimeException('ext-json: is not installed');
4122
        }
4123
4124 43
        if ($depth < 1) {
4125
            $depth = 1;
4126
        }
4127
4128 43
        return \json_decode($json, $assoc, $depth, $options);
4129
    }
4130
4131
    /**
4132
     * (PHP 5 &gt;= 5.2.0, PECL json &gt;= 1.2.0)<br/>
4133
     * Returns the JSON representation of a value.
4134
     *
4135
     * EXAMPLE: <code>UTF8::json_enocde(array(1, '¥', 'ä')); // '[1,"\u00a5","\u00e4"]'</code>
4136
     *
4137
     * @see http://php.net/manual/en/function.json-encode.php
4138
     *
4139
     * @param mixed $value   <p>
4140
     *                       The <i>value</i> being encoded. Can be any type except
4141
     *                       a resource.
4142
     *                       </p>
4143
     *                       <p>
4144
     *                       All string data must be UTF-8 encoded.
4145
     *                       </p>
4146
     *                       <p>PHP implements a superset of
4147
     *                       JSON - it will also encode and decode scalar types and <b>NULL</b>. The JSON standard
4148
     *                       only supports these values when they are nested inside an array or an object.
4149
     *                       </p>
4150
     * @param int   $options [optional] <p>
4151
     *                       Bitmask consisting of <b>JSON_HEX_QUOT</b>,
4152
     *                       <b>JSON_HEX_TAG</b>,
4153
     *                       <b>JSON_HEX_AMP</b>,
4154
     *                       <b>JSON_HEX_APOS</b>,
4155
     *                       <b>JSON_NUMERIC_CHECK</b>,
4156
     *                       <b>JSON_PRETTY_PRINT</b>,
4157
     *                       <b>JSON_UNESCAPED_SLASHES</b>,
4158
     *                       <b>JSON_FORCE_OBJECT</b>,
4159
     *                       <b>JSON_UNESCAPED_UNICODE</b>. The behaviour of these
4160
     *                       constants is described on
4161
     *                       the JSON constants page.
4162
     *                       </p>
4163
     * @param int   $depth   [optional] <p>
4164
     *                       Set the maximum depth. Must be greater than zero.
4165
     *                       </p>
4166
     *
4167
     * @psalm-pure
4168
     *
4169
     * @return false|string
4170
     *                      A JSON encoded <strong>string</strong> on success or<br>
4171
     *                      <strong>FALSE</strong> on failure
4172
     */
4173 5
    public static function json_encode($value, int $options = 0, int $depth = 512)
4174
    {
4175 5
        $value = self::filter($value);
4176
4177 5
        if (self::$SUPPORT['json'] === false) {
4178
            throw new \RuntimeException('ext-json: is not installed');
4179
        }
4180
4181 5
        if ($depth < 1) {
4182
            $depth = 1;
4183
        }
4184
4185 5
        return \json_encode($value, $options, $depth);
4186
    }
4187
4188
    /**
4189
     * Checks whether JSON is available on the server.
4190
     *
4191
     * @psalm-pure
4192
     *
4193
     * @return bool
4194
     *              <p><strong>true</strong> if available, <strong>false</strong> otherwise</p>
4195
     *
4196
     * @internal <p>Please do not use it anymore, we will make is private in next major version.</p>
4197
     */
4198
    public static function json_loaded(): bool
4199
    {
4200
        return \function_exists('json_decode');
4201
    }
4202
4203
    /**
4204
     * Makes string's first char lowercase.
4205
     *
4206
     * EXAMPLE: <code>UTF8::lcfirst('ÑTËRNÂTIÔNÀLIZÆTIØN'); // ñTËRNÂTIÔNÀLIZÆTIØN</code>
4207
     *
4208
     * @param string      $str                           <p>The input string</p>
4209
     * @param string      $encoding                      [optional] <p>Set the charset for e.g. "mb_" function</p>
4210
     * @param bool        $clean_utf8                    [optional] <p>Remove non UTF-8 chars from the string.</p>
4211
     * @param string|null $lang                          [optional] <p>Set the language for special cases: az, el, lt,
4212
     *                                                   tr</p>
4213
     * @param bool        $try_to_keep_the_string_length [optional] <p>true === try to keep the string length: e.g. ẞ
4214
     *                                                   -> ß</p>
4215
     *
4216
     * @psalm-pure
4217
     *
4218
     * @return string the resulting string
4219
     */
4220 46
    public static function lcfirst(
4221
        string $str,
4222
        string $encoding = 'UTF-8',
4223
        bool $clean_utf8 = false,
4224
        string $lang = null,
4225
        bool $try_to_keep_the_string_length = false
4226
    ): string {
4227 46
        if ($clean_utf8) {
4228
            $str = self::clean($str);
4229
        }
4230
4231 46
        $use_mb_functions = ($lang === null && !$try_to_keep_the_string_length);
4232
4233 46
        if ($encoding === 'UTF-8') {
4234 43
            $str_part_two = (string) \mb_substr($str, 1);
4235
4236 43
            if ($use_mb_functions) {
4237 43
                $str_part_one = \mb_strtolower(
4238 43
                    (string) \mb_substr($str, 0, 1)
4239
                );
4240
            } else {
4241 43
                $str_part_one = self::strtolower(
4242
                    (string) \mb_substr($str, 0, 1),
4243
                    $encoding,
4244
                    false,
4245
                    $lang,
4246
                    $try_to_keep_the_string_length
4247
                );
4248
            }
4249
        } else {
4250 3
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
4251
4252 3
            $str_part_two = (string) self::substr($str, 1, null, $encoding);
4253
4254 3
            $str_part_one = self::strtolower(
4255 3
                (string) self::substr($str, 0, 1, $encoding),
4256
                $encoding,
4257 3
                false,
4258
                $lang,
4259
                $try_to_keep_the_string_length
4260
            );
4261
        }
4262
4263 46
        return $str_part_one . $str_part_two;
4264
    }
4265
4266
    /**
4267
     * Lowercase for all words in the string.
4268
     *
4269
     * @param string      $str                           <p>The input string.</p>
4270
     * @param string[]    $exceptions                    [optional] <p>Exclusion for some words.</p>
4271
     * @param string      $char_list                     [optional] <p>Additional chars that contains to words and do
4272
     *                                                   not start a new word.</p>
4273
     * @param string      $encoding                      [optional] <p>Set the charset.</p>
4274
     * @param bool        $clean_utf8                    [optional] <p>Remove non UTF-8 chars from the string.</p>
4275
     * @param string|null $lang                          [optional] <p>Set the language for special cases: az, el, lt,
4276
     *                                                   tr</p>
4277
     * @param bool        $try_to_keep_the_string_length [optional] <p>true === try to keep the string length: e.g. ẞ
4278
     *                                                   -> ß</p>
4279
     *
4280
     * @psalm-pure
4281
     *
4282
     * @return string
4283
     */
4284 4
    public static function lcwords(
4285
        string $str,
4286
        array $exceptions = [],
4287
        string $char_list = '',
4288
        string $encoding = 'UTF-8',
4289
        bool $clean_utf8 = false,
4290
        string $lang = null,
4291
        bool $try_to_keep_the_string_length = false
4292
    ): string {
4293 4
        if (!$str) {
4294 2
            return '';
4295
        }
4296
4297 4
        $words = self::str_to_words($str, $char_list);
4298 4
        $use_exceptions = $exceptions !== [];
4299
4300 4
        $words_str = '';
4301 4
        foreach ($words as &$word) {
4302 4
            if (!$word) {
4303 4
                continue;
4304
            }
4305
4306
            if (
4307 4
                !$use_exceptions
4308
                ||
4309 4
                !\in_array($word, $exceptions, true)
4310
            ) {
4311 4
                $words_str .= self::lcfirst($word, $encoding, $clean_utf8, $lang, $try_to_keep_the_string_length);
4312
            } else {
4313 2
                $words_str .= $word;
4314
            }
4315
        }
4316
4317 4
        return $words_str;
4318
    }
4319
4320
    /**
4321
     * Calculate Levenshtein distance between two strings.
4322
     *
4323
     * For better performance, in a real application with a single input string
4324
     * matched against many strings from a database, you will probably want to pre-
4325
     * encode the input only once and use \levenshtein().
4326
     *
4327
     * Source: https://github.com/KEINOS/mb_levenshtein
4328
     *
4329
     * @see https://www.php.net/manual/en/function.levenshtein
4330
     *
4331
     * @param string $str1            <p>One of the strings being evaluated for Levenshtein distance.</p>
4332
     * @param string $str2            <p>One of the strings being evaluated for Levenshtein distance.</p>
4333
     * @param int    $insertionCost   [optional] <p>Defines the cost of insertion.</p>
4334
     * @param int    $replacementCost [optional] <p>Defines the cost of replacement.</p>
4335
     * @param int    $deletionCost    [optional] <p>Defines the cost of deletion.</p>
4336
     *
4337
     * @return int
4338
     */
4339 5
    public static function levenshtein(
4340
        string $str1,
4341
        string $str2,
4342
        int $insertionCost = 1,
4343
        int $replacementCost = 1,
4344
        int $deletionCost = 1
4345
    ): int {
4346 5
        $result = ASCII::to_ascii_remap($str1, $str2);
4347
4348 5
        return \levenshtein($result[0], $result[1], $insertionCost, $replacementCost, $deletionCost);
4349
    }
4350
4351
    /**
4352
     * Strip whitespace or other characters from the beginning of a UTF-8 string.
4353
     *
4354
     * EXAMPLE: <code>UTF8::ltrim(' 中文空白  '); // '中文空白  '</code>
4355
     *
4356
     * @param string      $str   <p>The string to be trimmed</p>
4357
     * @param string|null $chars <p>Optional characters to be stripped</p>
4358
     *
4359
     * @psalm-pure
4360
     *
4361
     * @return string the string with unwanted characters stripped from the left
4362
     */
4363 23
    public static function ltrim(string $str = '', string $chars = null): string
4364
    {
4365 23
        if ($str === '') {
4366 3
            return '';
4367
        }
4368
4369 22
        if (self::$SUPPORT['mbstring'] === true) {
4370 22
            if ($chars !== null) {
4371
                /** @noinspection PregQuoteUsageInspection */
4372 11
                $chars = \preg_quote($chars);
4373 11
                $pattern = "^[${chars}]+";
4374
            } else {
4375 14
                $pattern = '^[\\s]+';
4376
            }
4377
4378 22
            return (string) \mb_ereg_replace($pattern, '', $str);
4379
        }
4380
4381
        if ($chars !== null) {
4382
            $chars = \preg_quote($chars, '/');
4383
            $pattern = "^[${chars}]+";
4384
        } else {
4385
            $pattern = '^[\\s]+';
4386
        }
4387
4388
        return self::regex_replace($str, $pattern, '');
4389
    }
4390
4391
    /**
4392
     * Returns the UTF-8 character with the maximum code point in the given data.
4393
     *
4394
     * EXAMPLE: <code>UTF8::max('abc-äöü-中文空白'); // 'ø'</code>
4395
     *
4396
     * @param string|string[] $arg <p>A UTF-8 encoded string or an array of such strings.</p>
4397
     *
4398
     * @psalm-pure
4399
     *
4400
     * @return string|null the character with the highest code point than others, returns null on failure or empty input
4401
     */
4402 2
    public static function max($arg)
4403
    {
4404 2
        if (\is_array($arg)) {
4405 2
            $arg = \implode('', $arg);
4406
        }
4407
4408 2
        $codepoints = self::codepoints($arg);
4409 2
        if ($codepoints === []) {
4410 2
            return null;
4411
        }
4412
4413 2
        $codepoint_max = \max($codepoints);
4414
4415 2
        return self::chr((int) $codepoint_max);
4416
    }
4417
4418
    /**
4419
     * Calculates and returns the maximum number of bytes taken by any
4420
     * UTF-8 encoded character in the given string.
4421
     *
4422
     * EXAMPLE: <code>UTF8::max_chr_width('Intërnâtiônàlizætiøn'); // 2</code>
4423
     *
4424
     * @param string $str <p>The original Unicode string.</p>
4425
     *
4426
     * @psalm-pure
4427
     *
4428
     * @return int
4429
     *             <p>Max byte lengths of the given chars.</p>
4430
     */
4431 2
    public static function max_chr_width(string $str): int
4432
    {
4433 2
        $bytes = self::chr_size_list($str);
4434 2
        if ($bytes !== []) {
4435 2
            return (int) \max($bytes);
4436
        }
4437
4438 2
        return 0;
4439
    }
4440
4441
    /**
4442
     * Checks whether mbstring is available on the server.
4443
     *
4444
     * @psalm-pure
4445
     *
4446
     * @return bool
4447
     *              <p><strong>true</strong> if available, <strong>false</strong> otherwise</p>
4448
     *
4449
     * @internal <p>Please do not use it anymore, we will make is private in next major version.</p>
4450
     */
4451 29
    public static function mbstring_loaded(): bool
4452
    {
4453 29
        return \extension_loaded('mbstring');
4454
    }
4455
4456
    /**
4457
     * Returns the UTF-8 character with the minimum code point in the given data.
4458
     *
4459
     * EXAMPLE: <code>UTF8::min('abc-äöü-中文空白'); // '-'</code>
4460
     *
4461
     * @param string|string[] $arg <strong>A UTF-8 encoded string or an array of such strings.</strong>
4462
     *
4463
     * @psalm-pure
4464
     *
4465
     * @return string|null
4466
     *                     <p>The character with the lowest code point than others, returns null on failure or empty input.</p>
4467
     */
4468 2
    public static function min($arg)
4469
    {
4470 2
        if (\is_array($arg)) {
4471 2
            $arg = \implode('', $arg);
4472
        }
4473
4474 2
        $codepoints = self::codepoints($arg);
4475 2
        if ($codepoints === []) {
4476 2
            return null;
4477
        }
4478
4479 2
        $codepoint_min = \min($codepoints);
4480
4481 2
        return self::chr((int) $codepoint_min);
4482
    }
4483
4484
    /**
4485
     * Normalize the encoding-"name" input.
4486
     *
4487
     * EXAMPLE: <code>UTF8::normalize_encoding('UTF8'); // 'UTF-8'</code>
4488
     *
4489
     * @param mixed $encoding <p>e.g.: ISO, UTF8, WINDOWS-1251 etc.</p>
4490
     * @param mixed $fallback <p>e.g.: UTF-8</p>
4491
     *
4492
     * @psalm-pure
4493
     *
4494
     * @return mixed|string
4495
     *                      <p>e.g.: ISO-8859-1, UTF-8, WINDOWS-1251 etc.<br>Will return a empty string as fallback (by default)</p>
4496
     *
4497
     * @template TNormalizeEncodingFallback
4498
     * @phpstan-param string|TNormalizeEncodingFallback $fallback
4499
     * @phpstan-return string|TNormalizeEncodingFallback
4500
     */
4501 339
    public static function normalize_encoding($encoding, $fallback = '')
4502
    {
4503
        /**
4504
         * @psalm-suppress ImpureStaticVariable
4505
         *
4506
         * @var array<string,string>
4507
         */
4508 339
        static $STATIC_NORMALIZE_ENCODING_CACHE = [];
4509
4510
        // init
4511 339
        $encoding = (string) $encoding;
4512
4513 339
        if (!$encoding) {
4514 290
            return $fallback;
4515
        }
4516
4517
        if (
4518 53
            $encoding === 'UTF-8'
4519
            ||
4520 53
            $encoding === 'UTF8'
4521
        ) {
4522 29
            return 'UTF-8';
4523
        }
4524
4525
        if (
4526 44
            $encoding === '8BIT'
4527
            ||
4528 44
            $encoding === 'BINARY'
4529
        ) {
4530
            return 'CP850';
4531
        }
4532
4533
        if (
4534 44
            $encoding === 'HTML'
4535
            ||
4536 44
            $encoding === 'HTML-ENTITIES'
4537
        ) {
4538 2
            return 'HTML-ENTITIES';
4539
        }
4540
4541
        if (
4542 44
            $encoding === 'ISO'
4543
            ||
4544 44
            $encoding === 'ISO-8859-1'
4545
        ) {
4546 41
            return 'ISO-8859-1';
4547
        }
4548
4549
        if (
4550 11
            $encoding === '1' // only a fallback, for non "strict_types" usage ...
4551
            ||
4552 11
            $encoding === '0' // only a fallback, for non "strict_types" usage ...
4553
        ) {
4554
            return $fallback;
4555
        }
4556
4557 11
        if (isset($STATIC_NORMALIZE_ENCODING_CACHE[$encoding])) {
4558 8
            return $STATIC_NORMALIZE_ENCODING_CACHE[$encoding];
4559
        }
4560
4561 5
        if (self::$ENCODINGS === null) {
4562 1
            self::$ENCODINGS = self::getData('encodings');
4563
        }
4564
4565 5
        if (\in_array($encoding, self::$ENCODINGS, true)) {
4566 3
            $STATIC_NORMALIZE_ENCODING_CACHE[$encoding] = $encoding;
4567
4568 3
            return $encoding;
4569
        }
4570
4571 4
        $encoding_original = $encoding;
4572 4
        $encoding = \strtoupper($encoding);
4573 4
        $encoding_upper_helper = (string) \preg_replace('/[^a-zA-Z0-9]/u', '', $encoding);
4574
4575 4
        $equivalences = [
4576
            'ISO8859'     => 'ISO-8859-1',
4577
            'ISO88591'    => 'ISO-8859-1',
4578
            'ISO'         => 'ISO-8859-1',
4579
            'LATIN'       => 'ISO-8859-1',
4580
            'LATIN1'      => 'ISO-8859-1', // Western European
4581
            'ISO88592'    => 'ISO-8859-2',
4582
            'LATIN2'      => 'ISO-8859-2', // Central European
4583
            'ISO88593'    => 'ISO-8859-3',
4584
            'LATIN3'      => 'ISO-8859-3', // Southern European
4585
            'ISO88594'    => 'ISO-8859-4',
4586
            'LATIN4'      => 'ISO-8859-4', // Northern European
4587
            'ISO88595'    => 'ISO-8859-5',
4588
            'ISO88596'    => 'ISO-8859-6', // Greek
4589
            'ISO88597'    => 'ISO-8859-7',
4590
            'ISO88598'    => 'ISO-8859-8', // Hebrew
4591
            'ISO88599'    => 'ISO-8859-9',
4592
            'LATIN5'      => 'ISO-8859-9', // Turkish
4593
            'ISO885911'   => 'ISO-8859-11',
4594
            'TIS620'      => 'ISO-8859-11', // Thai
4595
            'ISO885910'   => 'ISO-8859-10',
4596
            'LATIN6'      => 'ISO-8859-10', // Nordic
4597
            'ISO885913'   => 'ISO-8859-13',
4598
            'LATIN7'      => 'ISO-8859-13', // Baltic
4599
            'ISO885914'   => 'ISO-8859-14',
4600
            'LATIN8'      => 'ISO-8859-14', // Celtic
4601
            'ISO885915'   => 'ISO-8859-15',
4602
            'LATIN9'      => 'ISO-8859-15', // Western European (with some extra chars e.g. €)
4603
            'ISO885916'   => 'ISO-8859-16',
4604
            'LATIN10'     => 'ISO-8859-16', // Southeast European
4605
            'CP1250'      => 'WINDOWS-1250',
4606
            'WIN1250'     => 'WINDOWS-1250',
4607
            'WINDOWS1250' => 'WINDOWS-1250',
4608
            'CP1251'      => 'WINDOWS-1251',
4609
            'WIN1251'     => 'WINDOWS-1251',
4610
            'WINDOWS1251' => 'WINDOWS-1251',
4611
            'CP1252'      => 'WINDOWS-1252',
4612
            'WIN1252'     => 'WINDOWS-1252',
4613
            'WINDOWS1252' => 'WINDOWS-1252',
4614
            'CP1253'      => 'WINDOWS-1253',
4615
            'WIN1253'     => 'WINDOWS-1253',
4616
            'WINDOWS1253' => 'WINDOWS-1253',
4617
            'CP1254'      => 'WINDOWS-1254',
4618
            'WIN1254'     => 'WINDOWS-1254',
4619
            'WINDOWS1254' => 'WINDOWS-1254',
4620
            'CP1255'      => 'WINDOWS-1255',
4621
            'WIN1255'     => 'WINDOWS-1255',
4622
            'WINDOWS1255' => 'WINDOWS-1255',
4623
            'CP1256'      => 'WINDOWS-1256',
4624
            'WIN1256'     => 'WINDOWS-1256',
4625
            'WINDOWS1256' => 'WINDOWS-1256',
4626
            'CP1257'      => 'WINDOWS-1257',
4627
            'WIN1257'     => 'WINDOWS-1257',
4628
            'WINDOWS1257' => 'WINDOWS-1257',
4629
            'CP1258'      => 'WINDOWS-1258',
4630
            'WIN1258'     => 'WINDOWS-1258',
4631
            'WINDOWS1258' => 'WINDOWS-1258',
4632
            'UTF16'       => 'UTF-16',
4633
            'UTF32'       => 'UTF-32',
4634
            'UTF8'        => 'UTF-8',
4635
            'UTF'         => 'UTF-8',
4636
            'UTF7'        => 'UTF-7',
4637
            '8BIT'        => 'CP850',
4638
            'BINARY'      => 'CP850',
4639
        ];
4640
4641 4
        if (!empty($equivalences[$encoding_upper_helper])) {
4642 3
            $encoding = $equivalences[$encoding_upper_helper];
4643
        }
4644
4645 4
        $STATIC_NORMALIZE_ENCODING_CACHE[$encoding_original] = $encoding;
4646
4647 4
        return $encoding;
4648
    }
4649
4650
    /**
4651
     * Standardize line ending to unix-like.
4652
     *
4653
     * @param string          $str      <p>The input string.</p>
4654
     * @param string|string[] $replacer <p>The replacer char e.g. "\n" (Linux) or "\r\n" (Windows). You can also use \PHP_EOL
4655
     *                                  here.</p>
4656
     *
4657
     * @psalm-pure
4658
     *
4659
     * @return string
4660
     *                <p>A string with normalized line ending.</p>
4661
     */
4662 4
    public static function normalize_line_ending(string $str, $replacer = "\n"): string
4663
    {
4664 4
        return \str_replace(["\r\n", "\r", "\n"], $replacer, $str);
4665
    }
4666
4667
    /**
4668
     * Normalize some MS Word special characters.
4669
     *
4670
     * EXAMPLE: <code>UTF8::normalize_msword('„Abcdef…”'); // '"Abcdef..."'</code>
4671
     *
4672
     * @param string $str <p>The string to be normalized.</p>
4673
     *
4674
     * @psalm-pure
4675
     *
4676
     * @return string
4677
     *                <p>A string with normalized characters for commonly used chars in Word documents.</p>
4678
     */
4679 10
    public static function normalize_msword(string $str): string
4680
    {
4681 10
        return ASCII::normalize_msword($str);
4682
    }
4683
4684
    /**
4685
     * Normalize the whitespace.
4686
     *
4687
     * EXAMPLE: <code>UTF8::normalize_whitespace("abc-\xc2\xa0-öäü-\xe2\x80\xaf-\xE2\x80\xAC", true); // "abc-\xc2\xa0-öäü- -"</code>
4688
     *
4689
     * @param string $str                          <p>The string to be normalized.</p>
4690
     * @param bool   $keep_non_breaking_space      [optional] <p>Set to true, to keep non-breaking-spaces.</p>
4691
     * @param bool   $keep_bidi_unicode_controls   [optional] <p>Set to true, to keep non-printable (for the web)
4692
     *                                             bidirectional text chars.</p>
4693
     * @param bool   $normalize_control_characters [optional] <p>Set to true, to convert e.g. LINE-, PARAGRAPH-SEPARATOR with "\n" and LINE TABULATION with "\t".</p>
4694
     *
4695
     * @psalm-pure
4696
     *
4697
     * @return string
4698
     *                <p>A string with normalized whitespace.</p>
4699
     */
4700 61
    public static function normalize_whitespace(
4701
        string $str,
4702
        bool $keep_non_breaking_space = false,
4703
        bool $keep_bidi_unicode_controls = false,
4704
        bool $normalize_control_characters = false
4705
    ): string {
4706 61
        return ASCII::normalize_whitespace(
4707 61
            $str,
4708
            $keep_non_breaking_space,
4709
            $keep_bidi_unicode_controls,
4710
            $normalize_control_characters
4711
        );
4712
    }
4713
4714
    /**
4715
     * Calculates Unicode code point of the given UTF-8 encoded character.
4716
     *
4717
     * INFO: opposite to UTF8::chr()
4718
     *
4719
     * EXAMPLE: <code>UTF8::ord('☃'); // 0x2603</code>
4720
     *
4721
     * @param string $chr      <p>The character of which to calculate code point.<p/>
4722
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
4723
     *
4724
     * @psalm-pure
4725
     *
4726
     * @return int
4727
     *             <p>Unicode code point of the given character,<br>
4728
     *             0 on invalid UTF-8 byte sequence</p>
4729
     */
4730 27
    public static function ord($chr, string $encoding = 'UTF-8'): int
4731
    {
4732
        /**
4733
         * @psalm-suppress ImpureStaticVariable
4734
         *
4735
         * @var array<string,int>
4736
         */
4737 27
        static $CHAR_CACHE = [];
4738
4739
        // init
4740 27
        $chr = (string) $chr;
4741
4742 27
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
4743 5
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
4744
        }
4745
4746 27
        $cache_key = $chr . '_' . $encoding;
4747 27
        if (isset($CHAR_CACHE[$cache_key])) {
4748 27
            return $CHAR_CACHE[$cache_key];
4749
        }
4750
4751
        // check again, if it's still not UTF-8
4752 11
        if ($encoding !== 'UTF-8') {
4753 3
            $chr = self::encode($encoding, $chr);
4754
        }
4755
4756 11
        if (self::$ORD === null) {
4757 1
            self::$ORD = self::getData('ord');
4758
        }
4759
4760 11
        if (isset(self::$ORD[$chr])) {
4761 11
            return $CHAR_CACHE[$cache_key] = self::$ORD[$chr];
4762
        }
4763
4764
        //
4765
        // fallback via "IntlChar"
4766
        //
4767
4768 6
        if (self::$SUPPORT['intlChar'] === true) {
4769 5
            $code = \IntlChar::ord($chr);
4770 5
            if ($code) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $code of type integer|null is loosely compared to true; this is ambiguous if the integer can be 0. You might want to explicitly use !== null instead.

In PHP, under loose comparison (like ==, or !=, or switch conditions), values of different types might be equal.

For integer values, zero is a special case, in particular the following results might be unexpected:

0   == false // true
0   == null  // true
123 == false // false
123 == null  // false

// It is often better to use strict comparison
0 === false // false
0 === null  // false
Loading history...
4771 5
                return $CHAR_CACHE[$cache_key] = $code;
4772
            }
4773
        }
4774
4775
        //
4776
        // fallback via vanilla php
4777
        //
4778
4779 1
        $chr = \unpack('C*', (string) \substr($chr, 0, 4));
4780
        /** @noinspection PhpSillyAssignmentInspection - hack for phpstan */
4781
        /** @var int[] $chr - "unpack": only false if the format string contains errors */
4782 1
        $chr = $chr;
4783 1
        $code = $chr ? $chr[1] : 0;
4784
4785 1
        if ($code >= 0xF0 && isset($chr[4])) {
4786
            return $CHAR_CACHE[$cache_key] = ((($code - 0xF0) << 18) + (($chr[2] - 0x80) << 12) + (($chr[3] - 0x80) << 6) + $chr[4] - 0x80);
4787
        }
4788
4789 1
        if ($code >= 0xE0 && isset($chr[3])) {
4790 1
            return $CHAR_CACHE[$cache_key] = ((($code - 0xE0) << 12) + (($chr[2] - 0x80) << 6) + $chr[3] - 0x80);
4791
        }
4792
4793 1
        if ($code >= 0xC0 && isset($chr[2])) {
4794 1
            return $CHAR_CACHE[$cache_key] = ((($code - 0xC0) << 6) + $chr[2] - 0x80);
4795
        }
4796
4797
        return $CHAR_CACHE[$cache_key] = $code;
4798
    }
4799
4800
    /**
4801
     * Parses the string into an array (into the the second parameter).
4802
     *
4803
     * WARNING: Unlike "parse_str()", this method does not (re-)place variables in the current scope,
4804
     *          if the second parameter is not set!
4805
     *
4806
     * EXAMPLE: <code>
4807
     * UTF8::parse_str('Iñtërnâtiônéàlizætiøn=測試&arr[]=foo+測試&arr[]=ການທົດສອບ', $array);
4808
     * echo $array['Iñtërnâtiônéàlizætiøn']; // '測試'
4809
     * </code>
4810
     *
4811
     * @see http://php.net/manual/en/function.parse-str.php
4812
     *
4813
     * @param string               $str        <p>The input string.</p>
4814
     * @param array<string, mixed> $result     <p>The result will be returned into this reference parameter.</p>
4815
     * @param bool                 $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
4816
     *
4817
     * @psalm-pure
4818
     *
4819
     * @return bool
4820
     *              <p>Will return <strong>false</strong> if php can't parse the string and we haven't any $result.</p>
4821
     */
4822 2
    public static function parse_str(string $str, &$result, bool $clean_utf8 = false): bool
4823
    {
4824 2
        if ($clean_utf8) {
4825 2
            $str = self::clean($str);
4826
        }
4827
4828 2
        if (self::$SUPPORT['mbstring'] === true) {
4829 2
            $return = \mb_parse_str($str, $result);
4830
4831 2
            return $return !== false && $result !== [];
4832
        }
4833
4834
        /**
4835
         * @psalm-suppress ImpureFunctionCall - we use the second parameter, so we don't change variables by magic
4836
         */
4837
        \parse_str($str, $result);
4838
4839
        return $result !== [];
4840
    }
4841
4842
    /**
4843
     * Checks if \u modifier is available that enables Unicode support in PCRE.
4844
     *
4845
     * @psalm-pure
4846
     *
4847
     * @return bool
4848
     *              <p>
4849
     *              <strong>true</strong> if support is available,<br>
4850
     *              <strong>false</strong> otherwise
4851
     *              </p>
4852
     */
4853
    public static function pcre_utf8_support(): bool
4854
    {
4855
        /** @noinspection PhpUsageOfSilenceOperatorInspection */
4856
        return (bool) @\preg_match('//u', '');
4857
    }
4858
4859
    /**
4860
     * Create an array containing a range of UTF-8 characters.
4861
     *
4862
     * EXAMPLE: <code>UTF8::range('κ', 'ζ'); // array('κ', 'ι', 'θ', 'η', 'ζ',)</code>
4863
     *
4864
     * @param int|string $var1      <p>Numeric or hexadecimal code points, or a UTF-8 character to start from.</p>
4865
     * @param int|string $var2      <p>Numeric or hexadecimal code points, or a UTF-8 character to end at.</p>
4866
     * @param bool       $use_ctype <p>use ctype to detect numeric and hexadecimal, otherwise we will use a simple
4867
     *                              "is_numeric"</p>
4868
     * @param string     $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
4869
     * @param float|int  $step      [optional] <p>
4870
     *                              If a step value is given, it will be used as the
4871
     *                              increment between elements in the sequence. step
4872
     *                              should be given as a positive number. If not specified,
4873
     *                              step will default to 1.
4874
     *                              </p>
4875
     *
4876
     * @psalm-pure
4877
     *
4878
     * @return string[]
4879
     */
4880 2
    public static function range(
4881
        $var1,
4882
        $var2,
4883
        bool $use_ctype = true,
4884
        string $encoding = 'UTF-8',
4885
        $step = 1
4886
    ): array {
4887 2
        if (!$var1 || !$var2) {
4888 2
            return [];
4889
        }
4890
4891 2
        if ($step !== 1) {
4892
            /**
4893
             * @psalm-suppress RedundantConditionGivenDocblockType
4894
             * @psalm-suppress DocblockTypeContradiction
4895
             */
4896 1
            if (!\is_numeric($step)) {
0 ignored issues
show
introduced by
The condition is_numeric($step) is always true.
Loading history...
4897
                throw new \InvalidArgumentException('$step need to be a number, type given: ' . \gettype($step));
4898
            }
4899
4900
            /**
4901
             * @psalm-suppress RedundantConditionGivenDocblockType - false-positive from psalm?
4902
             */
4903 1
            if ($step <= 0) {
4904
                throw new \InvalidArgumentException('$step need to be a positive number, given: ' . $step);
4905
            }
4906
        }
4907
4908 2
        if ($use_ctype && self::$SUPPORT['ctype'] === false) {
4909
            throw new \RuntimeException('ext-ctype: is not installed');
4910
        }
4911
4912 2
        $is_digit = false;
4913 2
        $is_xdigit = false;
4914
4915 2
        if ($use_ctype && \ctype_digit((string) $var1) && \ctype_digit((string) $var2)) {
4916 2
            $is_digit = true;
4917 2
            $start = (int) $var1;
4918 2
        } elseif ($use_ctype && \ctype_xdigit($var1) && \ctype_xdigit($var2)) {
4919
            $is_xdigit = true;
4920
            $start = (int) self::hex_to_int((string) $var1);
4921 2
        } elseif (!$use_ctype && \is_numeric($var1)) {
4922 1
            $start = (int) $var1;
4923
        } else {
4924 2
            $start = self::ord((string) $var1);
4925
        }
4926
4927 2
        if (!$start) {
4928
            return [];
4929
        }
4930
4931 2
        if ($is_digit) {
4932 2
            $end = (int) $var2;
4933 2
        } elseif ($is_xdigit) {
4934
            $end = (int) self::hex_to_int((string) $var2);
4935 2
        } elseif (!$use_ctype && \is_numeric($var2)) {
4936 1
            $end = (int) $var2;
4937
        } else {
4938 2
            $end = self::ord((string) $var2);
4939
        }
4940
4941 2
        if (!$end) {
4942
            return [];
4943
        }
4944
4945 2
        $array = [];
4946 2
        foreach (\range($start, $end, $step) as $i) {
4947 2
            $array[] = (string) self::chr((int) $i, $encoding);
4948
        }
4949
4950 2
        return $array;
4951
    }
4952
4953
    /**
4954
     * Multi decode HTML entity + fix urlencoded-win1252-chars.
4955
     *
4956
     * EXAMPLE: <code>UTF8::rawurldecode('tes%20öäü%20\u00edtest+test'); // 'tes öäü ítest+test'</code>
4957
     *
4958
     * e.g:
4959
     * 'test+test'                     => 'test+test'
4960
     * 'D&#252;sseldorf'               => 'Düsseldorf'
4961
     * 'D%FCsseldorf'                  => 'Düsseldorf'
4962
     * 'D&#xFC;sseldorf'               => 'Düsseldorf'
4963
     * 'D%26%23xFC%3Bsseldorf'         => 'Düsseldorf'
4964
     * 'Düsseldorf'                   => 'Düsseldorf'
4965
     * 'D%C3%BCsseldorf'               => 'Düsseldorf'
4966
     * 'D%C3%83%C2%BCsseldorf'         => 'Düsseldorf'
4967
     * 'D%25C3%2583%25C2%25BCsseldorf' => 'Düsseldorf'
4968
     *
4969
     * @param string $str          <p>The input string.</p>
4970
     * @param bool   $multi_decode <p>Decode as often as possible.</p>
4971
     *
4972
     * @psalm-pure
4973
     *
4974
     * @return string
4975
     *                <p>The decoded URL, as a string.</p>
4976
     */
4977 6
    public static function rawurldecode(string $str, bool $multi_decode = true): string
4978
    {
4979 6
        if ($str === '') {
4980 4
            return '';
4981
        }
4982
4983 6
        $str = self::urldecode_unicode_helper($str);
4984
4985 6
        if ($multi_decode) {
4986
            do {
4987 5
                $str_compare = $str;
4988
4989
                /**
4990
                 * @psalm-suppress PossiblyInvalidArgument
4991
                 */
4992 5
                $str = \rawurldecode(
4993 5
                    self::html_entity_decode(
4994 5
                        self::to_utf8($str),
4995 5
                        \ENT_QUOTES | \ENT_HTML5
4996
                    )
4997
                );
4998 5
            } while ($str_compare !== $str);
4999
        } else {
5000
            /**
5001
             * @psalm-suppress PossiblyInvalidArgument
5002
             */
5003 1
            $str = \rawurldecode(
5004 1
                self::html_entity_decode(
5005 1
                    self::to_utf8($str),
5006 1
                    \ENT_QUOTES | \ENT_HTML5
5007
                )
5008
            );
5009
        }
5010
5011 6
        return self::fix_simple_utf8($str);
5012
    }
5013
5014
    /**
5015
     * Replaces all occurrences of $pattern in $str by $replacement.
5016
     *
5017
     * @param string $str         <p>The input string.</p>
5018
     * @param string $pattern     <p>The regular expression pattern.</p>
5019
     * @param string $replacement <p>The string to replace with.</p>
5020
     * @param string $options     [optional] <p>Matching conditions to be used.</p>
5021
     * @param string $delimiter   [optional] <p>Delimiter the the regex. Default: '/'</p>
5022
     *
5023
     * @psalm-pure
5024
     *
5025
     * @return string
5026
     */
5027 18
    public static function regex_replace(
5028
        string $str,
5029
        string $pattern,
5030
        string $replacement,
5031
        string $options = '',
5032
        string $delimiter = '/'
5033
    ): string {
5034 18
        if ($options === 'msr') {
5035 9
            $options = 'ms';
5036
        }
5037
5038
        // fallback
5039 18
        if (!$delimiter) {
5040
            $delimiter = '/';
5041
        }
5042
5043 18
        return (string) \preg_replace(
5044 18
            $delimiter . $pattern . $delimiter . 'u' . $options,
5045 18
            $replacement,
5046 18
            $str
5047
        );
5048
    }
5049
5050
    /**
5051
     * Remove the BOM from UTF-8 / UTF-16 / UTF-32 strings.
5052
     *
5053
     * EXAMPLE: <code>UTF8::remove_bom("\xEF\xBB\xBFΜπορώ να"); // 'Μπορώ να'</code>
5054
     *
5055
     * @param string $str <p>The input string.</p>
5056
     *
5057
     * @psalm-pure
5058
     *
5059
     * @return string
5060
     *                <p>A string without UTF-BOM.</p>
5061
     */
5062 54
    public static function remove_bom(string $str): string
5063
    {
5064 54
        if ($str === '') {
5065 9
            return '';
5066
        }
5067
5068 54
        $str_length = \strlen($str);
5069 54
        foreach (self::$BOM as $bom_string => $bom_byte_length) {
5070 54
            if (\strncmp($str, $bom_string, $bom_byte_length) === 0) {
5071
                /** @var false|string $str_tmp - needed for PhpStan (stubs error) */
5072 9
                $str_tmp = \substr($str, $bom_byte_length, $str_length);
5073 9
                if ($str_tmp === false) {
5074
                    return '';
5075
                }
5076
5077 9
                $str_length -= $bom_byte_length;
5078
5079 9
                $str = (string) $str_tmp;
5080
            }
5081
        }
5082
5083 54
        return $str;
5084
    }
5085
5086
    /**
5087
     * Removes duplicate occurrences of a string in another string.
5088
     *
5089
     * EXAMPLE: <code>UTF8::remove_duplicates('öäü-κόσμεκόσμε-äöü', 'κόσμε'); // 'öäü-κόσμε-äöü'</code>
5090
     *
5091
     * @param string          $str  <p>The base string.</p>
5092
     * @param string|string[] $what <p>String to search for in the base string.</p>
5093
     *
5094
     * @psalm-pure
5095
     *
5096
     * @return string
5097
     *                <p>A string with removed duplicates.</p>
5098
     */
5099 2
    public static function remove_duplicates(string $str, $what = ' '): string
5100
    {
5101 2
        if (\is_string($what)) {
5102 2
            $what = [$what];
5103
        }
5104
5105
        /**
5106
         * @psalm-suppress RedundantConditionGivenDocblockType
5107
         */
5108 2
        if (\is_array($what)) {
0 ignored issues
show
introduced by
The condition is_array($what) is always true.
Loading history...
5109 2
            foreach ($what as $item) {
5110 2
                $str = (string) \preg_replace('/(' . \preg_quote($item, '/') . ')+/u', $item, $str);
5111
            }
5112
        }
5113
5114 2
        return $str;
5115
    }
5116
5117
    /**
5118
     * Remove html via "strip_tags()" from the string.
5119
     *
5120
     * @param string $str            <p>The input string.</p>
5121
     * @param string $allowable_tags [optional] <p>You can use the optional second parameter to specify tags which
5122
     *                               should not be stripped. Default: null
5123
     *                               </p>
5124
     *
5125
     * @psalm-pure
5126
     *
5127
     * @return string
5128
     *                <p>A string with without html tags.</p>
5129
     */
5130 6
    public static function remove_html(string $str, string $allowable_tags = ''): string
5131
    {
5132 6
        return \strip_tags($str, $allowable_tags);
5133
    }
5134
5135
    /**
5136
     * Remove all breaks [<br> | \r\n | \r | \n | ...] from the string.
5137
     *
5138
     * @param string $str         <p>The input string.</p>
5139
     * @param string $replacement [optional] <p>Default is a empty string.</p>
5140
     *
5141
     * @psalm-pure
5142
     *
5143
     * @return string
5144
     *                <p>A string without breaks.</p>
5145
     */
5146 6
    public static function remove_html_breaks(string $str, string $replacement = ''): string
5147
    {
5148 6
        return (string) \preg_replace("#/\r\n|\r|\n|<br.*/?>#isU", $replacement, $str);
5149
    }
5150
5151
    /**
5152
     * Remove invisible characters from a string.
5153
     *
5154
     * e.g.: This prevents sandwiching null characters between ascii characters, like Java\0script.
5155
     *
5156
     * EXAMPLE: <code>UTF8::remove_invisible_characters("κόσ\0με"); // 'κόσμε'</code>
5157
     *
5158
     * copy&past from https://github.com/bcit-ci/CodeIgniter/blob/develop/system/core/Common.php
5159
     *
5160
     * @param string $str                           <p>The input string.</p>
5161
     * @param bool   $url_encoded                   [optional] <p>
5162
     *                                              Try to remove url encoded control character.
5163
     *                                              WARNING: maybe contains false-positives e.g. aa%0Baa -> aaaa.
5164
     *                                              <br>
5165
     *                                              Default: false
5166
     *                                              </p>
5167
     * @param string $replacement                   [optional] <p>The replacement character.</p>
5168
     * @param bool   $keep_basic_control_characters [optional] <p>Keep control characters like [LRM] or [LSEP].</p>
5169
     *
5170
     * @psalm-pure
5171
     *
5172
     * @return string
5173
     *                <p>A string without invisible chars.</p>
5174
     */
5175 92
    public static function remove_invisible_characters(
5176
        string $str,
5177
        bool $url_encoded = false,
5178
        string $replacement = '',
5179
        bool $keep_basic_control_characters = true
5180
    ): string {
5181 92
        return ASCII::remove_invisible_characters(
5182 92
            $str,
5183
            $url_encoded,
5184
            $replacement,
5185
            $keep_basic_control_characters
5186
        );
5187
    }
5188
5189
    /**
5190
     * Returns a new string with the prefix $substring removed, if present.
5191
     *
5192
     * @param string $str       <p>The input string.</p>
5193
     * @param string $substring <p>The prefix to remove.</p>
5194
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
5195
     *
5196
     * @psalm-pure
5197
     *
5198
     * @return string
5199
     *                <p>A string without the prefix $substring.</p>
5200
     */
5201 12
    public static function remove_left(
5202
        string $str,
5203
        string $substring,
5204
        string $encoding = 'UTF-8'
5205
    ): string {
5206
        if (
5207 12
            $substring
5208
            &&
5209 12
            \strpos($str, $substring) === 0
5210
        ) {
5211 6
            if ($encoding === 'UTF-8') {
5212 4
                return (string) \mb_substr(
5213 4
                    $str,
5214 4
                    (int) \mb_strlen($substring)
5215
                );
5216
            }
5217
5218 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
5219
5220 2
            return (string) self::substr(
5221 2
                $str,
5222 2
                (int) self::strlen($substring, $encoding),
5223 2
                null,
5224
                $encoding
5225
            );
5226
        }
5227
5228 6
        return $str;
5229
    }
5230
5231
    /**
5232
     * Returns a new string with the suffix $substring removed, if present.
5233
     *
5234
     * @param string $str
5235
     * @param string $substring <p>The suffix to remove.</p>
5236
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
5237
     *
5238
     * @psalm-pure
5239
     *
5240
     * @return string
5241
     *                <p>A string having a $str without the suffix $substring.</p>
5242
     */
5243 12
    public static function remove_right(
5244
        string $str,
5245
        string $substring,
5246
        string $encoding = 'UTF-8'
5247
    ): string {
5248 12
        if ($substring && \substr($str, -\strlen($substring)) === $substring) {
5249 6
            if ($encoding === 'UTF-8') {
5250 4
                return (string) \mb_substr(
5251 4
                    $str,
5252 4
                    0,
5253 4
                    (int) \mb_strlen($str) - (int) \mb_strlen($substring)
5254
                );
5255
            }
5256
5257 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
5258
5259 2
            return (string) self::substr(
5260 2
                $str,
5261 2
                0,
5262 2
                (int) self::strlen($str, $encoding) - (int) self::strlen($substring, $encoding),
5263
                $encoding
5264
            );
5265
        }
5266
5267 6
        return $str;
5268
    }
5269
5270
    /**
5271
     * Replaces all occurrences of $search in $str by $replacement.
5272
     *
5273
     * @param string $str            <p>The input string.</p>
5274
     * @param string $search         <p>The needle to search for.</p>
5275
     * @param string $replacement    <p>The string to replace with.</p>
5276
     * @param bool   $case_sensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
5277
     *
5278
     * @psalm-pure
5279
     *
5280
     * @return string
5281
     *                <p>A string with replaced parts.</p>
5282
     */
5283 29
    public static function replace(
5284
        string $str,
5285
        string $search,
5286
        string $replacement,
5287
        bool $case_sensitive = true
5288
    ): string {
5289 29
        if ($case_sensitive) {
5290 22
            return \str_replace($search, $replacement, $str);
5291
        }
5292
5293 7
        return self::str_ireplace($search, $replacement, $str);
0 ignored issues
show
Bug Best Practice introduced by
The expression return self::str_ireplac...ch, $replacement, $str) could return the type string[] which is incompatible with the type-hinted return string. Consider adding an additional type-check to rule them out.
Loading history...
5294
    }
5295
5296
    /**
5297
     * Replaces all occurrences of $search in $str by $replacement.
5298
     *
5299
     * @param string          $str            <p>The input string.</p>
5300
     * @param string[]        $search         <p>The elements to search for.</p>
5301
     * @param string|string[] $replacement    <p>The string to replace with.</p>
5302
     * @param bool            $case_sensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
5303
     *
5304
     * @psalm-pure
5305
     *
5306
     * @return string
5307
     *                <p>A string with replaced parts.</p>
5308
     */
5309 30
    public static function replace_all(
5310
        string $str,
5311
        array $search,
5312
        $replacement,
5313
        bool $case_sensitive = true
5314
    ): string {
5315 30
        if ($case_sensitive) {
5316 23
            return \str_replace($search, $replacement, $str);
5317
        }
5318
5319 7
        return self::str_ireplace($search, $replacement, $str);
0 ignored issues
show
Bug Best Practice introduced by
The expression return self::str_ireplac...ch, $replacement, $str) could return the type string[] which is incompatible with the type-hinted return string. Consider adding an additional type-check to rule them out.
Loading history...
5320
    }
5321
5322
    /**
5323
     * Replace the diamond question mark (�) and invalid-UTF8 chars with the replacement.
5324
     *
5325
     * EXAMPLE: <code>UTF8::replace_diamond_question_mark('中文空白�', ''); // '中文空白'</code>
5326
     *
5327
     * @param string $str                        <p>The input string</p>
5328
     * @param string $replacement_char           <p>The replacement character.</p>
5329
     * @param bool   $process_invalid_utf8_chars <p>Convert invalid UTF-8 chars </p>
5330
     *
5331
     * @psalm-pure
5332
     *
5333
     * @return string
5334
     *                <p>A string without diamond question marks (�).</p>
5335
     */
5336 35
    public static function replace_diamond_question_mark(
5337
        string $str,
5338
        string $replacement_char = '',
5339
        bool $process_invalid_utf8_chars = true
5340
    ): string {
5341 35
        if ($str === '') {
5342 9
            return '';
5343
        }
5344
5345 35
        if ($process_invalid_utf8_chars) {
5346 35
            if ($replacement_char === '') {
5347 35
                $replacement_char_helper = 'none';
5348
            } else {
5349 2
                $replacement_char_helper = \ord($replacement_char);
5350
            }
5351
5352 35
            if (self::$SUPPORT['mbstring'] === false) {
5353
                // if there is no native support for "mbstring",
5354
                // then we need to clean the string before ...
5355
                $str = self::clean($str);
5356
            }
5357
5358
            /**
5359
             * @psalm-suppress ImpureFunctionCall - we will reset the value in the next step
5360
             */
5361 35
            $save = \mb_substitute_character();
5362
            /** @noinspection PhpUsageOfSilenceOperatorInspection - ignore "Unknown character" warnings, it's working anyway */
5363 35
            @\mb_substitute_character($replacement_char_helper);
0 ignored issues
show
Security Best Practice introduced by
It seems like you do not handle an error condition for mb_substitute_character(). This can introduce security issues, and is generally not recommended. ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-unhandled  annotation

5363
            /** @scrutinizer ignore-unhandled */ @\mb_substitute_character($replacement_char_helper);

If you suppress an error, we recommend checking for the error condition explicitly:

// For example instead of
@mkdir($dir);

// Better use
if (@mkdir($dir) === false) {
    throw new \RuntimeException('The directory '.$dir.' could not be created.');
}
Loading history...
5364
            // the polyfill maybe return false, so cast to string
5365 35
            $str = (string) \mb_convert_encoding($str, 'UTF-8', 'UTF-8');
5366 35
            \mb_substitute_character($save);
0 ignored issues
show
Bug introduced by
It seems like $save can also be of type true; however, parameter $substitute_character of mb_substitute_character() does only seem to accept integer|null|string, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

5366
            \mb_substitute_character(/** @scrutinizer ignore-type */ $save);
Loading history...
5367
        }
5368
5369 35
        return \str_replace(
5370
            [
5371 35
                "\xEF\xBF\xBD",
5372
                '�',
5373
            ],
5374
            [
5375 35
                $replacement_char,
5376 35
                $replacement_char,
5377
            ],
5378 35
            $str
5379
        );
5380
    }
5381
5382
    /**
5383
     * Strip whitespace or other characters from the end of a UTF-8 string.
5384
     *
5385
     * EXAMPLE: <code>UTF8::rtrim('-ABC-中文空白-  '); // '-ABC-中文空白-'</code>
5386
     *
5387
     * @param string      $str   <p>The string to be trimmed.</p>
5388
     * @param string|null $chars <p>Optional characters to be stripped.</p>
5389
     *
5390
     * @psalm-pure
5391
     *
5392
     * @return string
5393
     *                <p>A string with unwanted characters stripped from the right.</p>
5394
     */
5395 21
    public static function rtrim(string $str = '', string $chars = null): string
5396
    {
5397 21
        if ($str === '') {
5398 3
            return '';
5399
        }
5400
5401 20
        if (self::$SUPPORT['mbstring'] === true) {
5402 20
            if ($chars !== null) {
5403
                /** @noinspection PregQuoteUsageInspection */
5404 9
                $chars = \preg_quote($chars);
5405 9
                $pattern = "[${chars}]+$";
5406
            } else {
5407 14
                $pattern = '[\\s]+$';
5408
            }
5409
5410 20
            return (string) \mb_ereg_replace($pattern, '', $str);
5411
        }
5412
5413
        if ($chars !== null) {
5414
            $chars = \preg_quote($chars, '/');
5415
            $pattern = "[${chars}]+$";
5416
        } else {
5417
            $pattern = '[\\s]+$';
5418
        }
5419
5420
        return self::regex_replace($str, $pattern, '');
5421
    }
5422
5423
    /**
5424
     * WARNING: Print native UTF-8 support (libs) by default, e.g. for debugging.
5425
     *
5426
     * @param bool $useEcho
5427
     *
5428
     * @psalm-pure
5429
     *
5430
     * @return string|void
5431
     */
5432 2
    public static function showSupport(bool $useEcho = true)
5433
    {
5434
        // init
5435 2
        $html = '';
5436
5437 2
        $html .= '<pre>';
5438 2
        foreach (self::$SUPPORT as $key => &$value) {
5439 2
            $html .= $key . ' - ' . \print_r($value, true) . "\n<br>";
0 ignored issues
show
Bug introduced by
Are you sure print_r($value, true) of type string|true can be used in concatenation? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

5439
            $html .= $key . ' - ' . /** @scrutinizer ignore-type */ \print_r($value, true) . "\n<br>";
Loading history...
5440
        }
5441 2
        $html .= '</pre>';
5442
5443 2
        if ($useEcho) {
5444 1
            echo $html;
5445
        }
5446
5447 2
        return $html;
5448
    }
5449
5450
    /**
5451
     * Converts a UTF-8 character to HTML Numbered Entity like "&#123;".
5452
     *
5453
     * EXAMPLE: <code>UTF8::single_chr_html_encode('κ'); // '&#954;'</code>
5454
     *
5455
     * @param string $char             <p>The Unicode character to be encoded as numbered entity.</p>
5456
     * @param bool   $keep_ascii_chars <p>Set to <strong>true</strong> to keep ASCII chars.</>
5457
     * @param string $encoding         [optional] <p>Set the charset for e.g. "mb_" function</p>
5458
     *
5459
     * @psalm-pure
5460
     *
5461
     * @return string
5462
     *                <p>The HTML numbered entity for the given character.</p>
5463
     */
5464 2
    public static function single_chr_html_encode(
5465
        string $char,
5466
        bool $keep_ascii_chars = false,
5467
        string $encoding = 'UTF-8'
5468
    ): string {
5469 2
        if ($char === '') {
5470 2
            return '';
5471
        }
5472
5473
        if (
5474 2
            $keep_ascii_chars
5475
            &&
5476 2
            ASCII::is_ascii($char)
5477
        ) {
5478 2
            return $char;
5479
        }
5480
5481 2
        return '&#' . self::ord($char, $encoding) . ';';
5482
    }
5483
5484
    /**
5485
     * @param string $str
5486
     * @param int    $tab_length
5487
     *
5488
     * @psalm-pure
5489
     *
5490
     * @return string
5491
     */
5492 5
    public static function spaces_to_tabs(string $str, int $tab_length = 4): string
5493
    {
5494 5
        if ($tab_length === 4) {
5495 3
            $tab = '    ';
5496 2
        } elseif ($tab_length === 2) {
5497 1
            $tab = '  ';
5498
        } else {
5499 1
            $tab = \str_repeat(' ', $tab_length);
5500
        }
5501
5502 5
        return \str_replace($tab, "\t", $str);
5503
    }
5504
5505
    /**
5506
     * Returns a camelCase version of the string. Trims surrounding spaces,
5507
     * capitalizes letters following digits, spaces, dashes and underscores,
5508
     * and removes spaces, dashes, as well as underscores.
5509
     *
5510
     * @param string      $str                           <p>The input string.</p>
5511
     * @param string      $encoding                      [optional] <p>Default: 'UTF-8'</p>
5512
     * @param bool        $clean_utf8                    [optional] <p>Remove non UTF-8 chars from the string.</p>
5513
     * @param string|null $lang                          [optional] <p>Set the language for special cases: az, el, lt,
5514
     *                                                   tr</p>
5515
     * @param bool        $try_to_keep_the_string_length [optional] <p>true === try to keep the string length: e.g. ẞ
5516
     *                                                   -> ß</p>
5517
     *
5518
     * @psalm-pure
5519
     *
5520
     * @return string
5521
     */
5522 32
    public static function str_camelize(
5523
        string $str,
5524
        string $encoding = 'UTF-8',
5525
        bool $clean_utf8 = false,
5526
        string $lang = null,
5527
        bool $try_to_keep_the_string_length = false
5528
    ): string {
5529 32
        if ($clean_utf8) {
5530
            $str = self::clean($str);
5531
        }
5532
5533 32
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
5534 26
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
5535
        }
5536
5537 32
        $str = self::lcfirst(
5538 32
            \trim($str),
5539 32
            $encoding,
5540 32
            false,
5541 32
            $lang,
5542 32
            $try_to_keep_the_string_length
5543
        );
5544 32
        $str = (string) \preg_replace('/^[-_]+/', '', $str);
5545
5546 32
        $use_mb_functions = $lang === null && !$try_to_keep_the_string_length;
5547
5548 32
        $str = (string) \preg_replace_callback(
5549 32
            '/[-_\\s]+(.)?/u',
5550
            /**
5551
             * @param array $match
5552
             *
5553
             * @psalm-pure
5554
             *
5555
             * @return string
5556
             */
5557 32
            static function (array $match) use ($use_mb_functions, $encoding, $lang, $try_to_keep_the_string_length): string {
5558 27
                if (isset($match[1])) {
5559 27
                    if ($use_mb_functions) {
5560 27
                        if ($encoding === 'UTF-8') {
5561 27
                            return \mb_strtoupper($match[1]);
5562
                        }
5563
5564
                        return \mb_strtoupper($match[1], $encoding);
5565
                    }
5566
5567
                    return self::strtoupper($match[1], $encoding, false, $lang, $try_to_keep_the_string_length);
5568
                }
5569
5570 1
                return '';
5571 32
            },
5572 32
            $str
5573
        );
5574
5575 32
        return (string) \preg_replace_callback(
5576 32
            '/[\\p{N}]+(.)?/u',
5577
            /**
5578
             * @param array $match
5579
             *
5580
             * @psalm-pure
5581
             *
5582
             * @return string
5583
             */
5584 32
            static function (array $match) use ($use_mb_functions, $encoding, $clean_utf8, $lang, $try_to_keep_the_string_length): string {
5585 6
                if ($use_mb_functions) {
5586 6
                    if ($encoding === 'UTF-8') {
5587 6
                        return \mb_strtoupper($match[0]);
5588
                    }
5589
5590
                    return \mb_strtoupper($match[0], $encoding);
5591
                }
5592
5593
                return self::strtoupper($match[0], $encoding, $clean_utf8, $lang, $try_to_keep_the_string_length);
5594 32
            },
5595 32
            $str
5596
        );
5597
    }
5598
5599
    /**
5600
     * Returns the string with the first letter of each word capitalized,
5601
     * except for when the word is a name which shouldn't be capitalized.
5602
     *
5603
     * @param string $str
5604
     *
5605
     * @psalm-pure
5606
     *
5607
     * @return string
5608
     *                <p>A string with $str capitalized.</p>
5609
     */
5610 1
    public static function str_capitalize_name(string $str): string
5611
    {
5612 1
        return self::str_capitalize_name_helper(
5613 1
            self::str_capitalize_name_helper(
5614 1
                self::collapse_whitespace($str),
5615 1
                ' '
5616
            ),
5617 1
            '-'
5618
        );
5619
    }
5620
5621
    /**
5622
     * Returns true if the string contains $needle, false otherwise. By default
5623
     * the comparison is case-sensitive, but can be made insensitive by setting
5624
     * $case_sensitive to false.
5625
     *
5626
     * @param string $haystack       <p>The input string.</p>
5627
     * @param string $needle         <p>Substring to look for.</p>
5628
     * @param bool   $case_sensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
5629
     *
5630
     * @psalm-pure
5631
     *
5632
     * @return bool
5633
     *              <p>Whether or not $haystack contains $needle.</p>
5634
     */
5635 21
    public static function str_contains(
5636
        string $haystack,
5637
        string $needle,
5638
        bool $case_sensitive = true
5639
    ): bool {
5640 21
        if ($case_sensitive) {
5641 11
            if (\PHP_VERSION_ID >= 80000) {
5642
                /** @phpstan-ignore-next-line - only for PHP8 */
5643 11
                return \str_contains($haystack, $needle);
5644
            }
5645
5646
            return \strpos($haystack, $needle) !== false;
5647
        }
5648
5649 10
        return \mb_stripos($haystack, $needle) !== false;
5650
    }
5651
5652
    /**
5653
     * Returns true if the string contains all $needles, false otherwise. By
5654
     * default the comparison is case-sensitive, but can be made insensitive by
5655
     * setting $case_sensitive to false.
5656
     *
5657
     * @param string   $haystack       <p>The input string.</p>
5658
     * @param scalar[] $needles        <p>SubStrings to look for.</p>
5659
     * @param bool     $case_sensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
5660
     *
5661
     * @psalm-pure
5662
     *
5663
     * @return bool
5664
     *              <p>Whether or not $haystack contains $needle.</p>
5665
     */
5666 45
    public static function str_contains_all(
5667
        string $haystack,
5668
        array $needles,
5669
        bool $case_sensitive = true
5670
    ): bool {
5671 45
        if ($haystack === '' || $needles === []) {
5672 1
            return false;
5673
        }
5674
5675 44
        foreach ($needles as &$needle) {
5676 44
            if ($case_sensitive) {
5677 24
                if (!$needle || \strpos($haystack, (string) $needle) === false) {
5678 12
                    return false;
5679
                }
5680
            }
5681
5682 33
            if (!$needle || \mb_stripos($haystack, (string) $needle) === false) {
5683 8
                return false;
5684
            }
5685
        }
5686
5687 24
        return true;
5688
    }
5689
5690
    /**
5691
     * Returns true if the string contains any $needles, false otherwise. By
5692
     * default the comparison is case-sensitive, but can be made insensitive by
5693
     * setting $case_sensitive to false.
5694
     *
5695
     * @param string   $haystack       <p>The input string.</p>
5696
     * @param scalar[] $needles        <p>SubStrings to look for.</p>
5697
     * @param bool     $case_sensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
5698
     *
5699
     * @psalm-pure
5700
     *
5701
     * @return bool
5702
     *              <p>Whether or not $str contains $needle.</p>
5703
     */
5704 46
    public static function str_contains_any(
5705
        string $haystack,
5706
        array $needles,
5707
        bool $case_sensitive = true
5708
    ): bool {
5709 46
        if ($haystack === '' || $needles === []) {
5710 1
            return false;
5711
        }
5712
5713 45
        foreach ($needles as &$needle) {
5714 45
            if (!$needle) {
5715
                continue;
5716
            }
5717
5718 45
            if ($case_sensitive) {
5719 25
                if (\strpos($haystack, (string) $needle) !== false) {
5720 14
                    return true;
5721
                }
5722
5723 13
                continue;
5724
            }
5725
5726 20
            if (\mb_stripos($haystack, (string) $needle) !== false) {
5727 12
                return true;
5728
            }
5729
        }
5730
5731 19
        return false;
5732
    }
5733
5734
    /**
5735
     * Returns a lowercase and trimmed string separated by dashes. Dashes are
5736
     * inserted before uppercase characters (with the exception of the first
5737
     * character of the string), and in place of spaces as well as underscores.
5738
     *
5739
     * @param string $str      <p>The input string.</p>
5740
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
5741
     *
5742
     * @psalm-pure
5743
     *
5744
     * @return string
5745
     */
5746 19
    public static function str_dasherize(string $str, string $encoding = 'UTF-8'): string
5747
    {
5748 19
        return self::str_delimit($str, '-', $encoding);
5749
    }
5750
5751
    /**
5752
     * Returns a lowercase and trimmed string separated by the given delimiter.
5753
     * Delimiters are inserted before uppercase characters (with the exception
5754
     * of the first character of the string), and in place of spaces, dashes,
5755
     * and underscores. Alpha delimiters are not converted to lowercase.
5756
     *
5757
     * @param string      $str                           <p>The input string.</p>
5758
     * @param string      $delimiter                     <p>Sequence used to separate parts of the string.</p>
5759
     * @param string      $encoding                      [optional] <p>Set the charset for e.g. "mb_" function</p>
5760
     * @param bool        $clean_utf8                    [optional] <p>Remove non UTF-8 chars from the string.</p>
5761
     * @param string|null $lang                          [optional] <p>Set the language for special cases: az, el, lt,
5762
     *                                                   tr</p>
5763
     * @param bool        $try_to_keep_the_string_length [optional] <p>true === try to keep the string length: e.g. ẞ ->
5764
     *                                                   ß</p>
5765
     *
5766
     * @psalm-pure
5767
     *
5768
     * @return string
5769
     */
5770 49
    public static function str_delimit(
5771
        string $str,
5772
        string $delimiter,
5773
        string $encoding = 'UTF-8',
5774
        bool $clean_utf8 = false,
5775
        string $lang = null,
5776
        bool $try_to_keep_the_string_length = false
5777
    ): string {
5778 49
        if (self::$SUPPORT['mbstring'] === true) {
5779 49
            $str = (string) \mb_ereg_replace('\\B(\\p{Lu})', '-\1', \trim($str));
5780
5781 49
            $use_mb_functions = $lang === null && !$try_to_keep_the_string_length;
5782 49
            if ($use_mb_functions && $encoding === 'UTF-8') {
5783 22
                $str = \mb_strtolower($str);
5784
            } else {
5785 27
                $str = self::strtolower($str, $encoding, $clean_utf8, $lang, $try_to_keep_the_string_length);
5786
            }
5787
5788 49
            return (string) \mb_ereg_replace('[\\-_\\s]+', $delimiter, $str);
5789
        }
5790
5791
        $str = (string) \preg_replace('/\\B(\\p{Lu})/u', '-\1', \trim($str));
5792
5793
        $use_mb_functions = $lang === null && !$try_to_keep_the_string_length;
5794
        if ($use_mb_functions && $encoding === 'UTF-8') {
5795
            $str = \mb_strtolower($str);
5796
        } else {
5797
            $str = self::strtolower($str, $encoding, $clean_utf8, $lang, $try_to_keep_the_string_length);
5798
        }
5799
5800
        return (string) \preg_replace('/[\\-_\\s]+/u', $delimiter, $str);
5801
    }
5802
5803
    /**
5804
     * Optimized "mb_detect_encoding()"-function -> with support for UTF-16 and UTF-32.
5805
     *
5806
     * EXAMPLE: <code>
5807
     * UTF8::str_detect_encoding('中文空白'); // 'UTF-8'
5808
     * UTF8::str_detect_encoding('Abc'); // 'ASCII'
5809
     * </code>
5810
     *
5811
     * @param string $str <p>The input string.</p>
5812
     *
5813
     * @psalm-pure
5814
     *
5815
     * @return false|string
5816
     *                      <p>
5817
     *                      The detected string-encoding e.g. UTF-8 or UTF-16BE,<br>
5818
     *                      otherwise it will return false e.g. for BINARY or not detected encoding.
5819
     *                      </p>
5820
     */
5821 30
    public static function str_detect_encoding($str)
5822
    {
5823
        // init
5824 30
        $str = (string) $str;
5825
5826
        //
5827
        // 1.) check binary strings (010001001...) like UTF-16 / UTF-32 / PDF / Images / ...
5828
        //
5829
5830 30
        if (self::is_binary($str, self::string_has_bom($str) ? false : true)) {
5831 10
            $is_utf32 = self::is_utf32($str, false);
5832 10
            if ($is_utf32 === 1) {
5833
                return 'UTF-32LE';
5834
            }
5835 10
            if ($is_utf32 === 2) {
5836 1
                return 'UTF-32BE';
5837
            }
5838
5839 10
            $is_utf16 = self::is_utf16($str, false);
5840 10
            if ($is_utf16 === 1) {
5841 3
                return 'UTF-16LE';
5842
            }
5843 10
            if ($is_utf16 === 2) {
5844 2
                return 'UTF-16BE';
5845
            }
5846
5847
            // is binary but not "UTF-16" or "UTF-32"
5848 8
            return false;
5849
        }
5850
5851
        //
5852
        // 2.) simple check for ASCII chars
5853
        //
5854
5855 27
        if (ASCII::is_ascii($str)) {
5856 10
            return 'ASCII';
5857
        }
5858
5859
        //
5860
        // 3.) simple check for UTF-8 chars
5861
        //
5862
5863 27
        if (self::is_utf8_string($str)) {
5864 19
            return 'UTF-8';
5865
        }
5866
5867
        //
5868
        // 4.) check via "mb_detect_encoding()"
5869
        //
5870
        // INFO: UTF-16, UTF-32, UCS2 and UCS4, encoding detection will fail always with "mb_detect_encoding()"
5871
5872 16
        $encoding_detecting_order = [
5873
            'ISO-8859-1',
5874
            'ISO-8859-2',
5875
            'ISO-8859-3',
5876
            'ISO-8859-4',
5877
            'ISO-8859-5',
5878
            'ISO-8859-6',
5879
            'ISO-8859-7',
5880
            'ISO-8859-8',
5881
            'ISO-8859-9',
5882
            'ISO-8859-10',
5883
            'ISO-8859-13',
5884
            'ISO-8859-14',
5885
            'ISO-8859-15',
5886
            'ISO-8859-16',
5887
            'WINDOWS-1251',
5888
            'WINDOWS-1252',
5889
            'WINDOWS-1254',
5890
            'CP932',
5891
            'CP936',
5892
            'CP950',
5893
            'CP866',
5894
            'CP850',
5895
            'CP51932',
5896
            'CP50220',
5897
            'CP50221',
5898
            'CP50222',
5899
            'ISO-2022-JP',
5900
            'ISO-2022-KR',
5901
            'JIS',
5902
            'JIS-ms',
5903
            'EUC-CN',
5904
            'EUC-JP',
5905
        ];
5906
5907 16
        if (self::$SUPPORT['mbstring'] === true) {
5908
            // info: do not use the symfony polyfill here
5909 16
            $encoding = \mb_detect_encoding($str, $encoding_detecting_order, true);
5910 16
            if ($encoding) {
5911 16
                return $encoding;
5912
            }
5913
        }
5914
5915
        //
5916
        // 5.) check via "iconv()"
5917
        //
5918
5919
        if (self::$ENCODINGS === null) {
5920
            self::$ENCODINGS = self::getData('encodings');
5921
        }
5922
5923
        foreach (self::$ENCODINGS as $encoding_tmp) {
5924
            // INFO: //IGNORE but still throw notice
5925
            /** @noinspection PhpUsageOfSilenceOperatorInspection */
5926
            if ((string) @\iconv($encoding_tmp, $encoding_tmp . '//IGNORE', $str) === $str) {
5927
                return $encoding_tmp;
5928
            }
5929
        }
5930
5931
        return false;
5932
    }
5933
5934
    /**
5935
     * Check if the string ends with the given substring.
5936
     *
5937
     * EXAMPLE: <code>
5938
     * UTF8::str_ends_with('BeginMiddleΚόσμε', 'Κόσμε'); // true
5939
     * UTF8::str_ends_with('BeginMiddleΚόσμε', 'κόσμε'); // false
5940
     * </code>
5941
     *
5942
     * @param string $haystack <p>The string to search in.</p>
5943
     * @param string $needle   <p>The substring to search for.</p>
5944
     *
5945
     * @psalm-pure
5946
     *
5947
     * @return bool
5948
     */
5949 9
    public static function str_ends_with(string $haystack, string $needle): bool
5950
    {
5951 9
        if ($needle === '') {
5952 2
            return true;
5953
        }
5954
5955 9
        if ($haystack === '') {
5956 1
            return false;
5957
        }
5958
5959 9
        if (\PHP_VERSION_ID >= 80000) {
5960
            /** @phpstan-ignore-next-line - only for PHP8 */
5961 9
            return \str_ends_with($haystack, $needle);
5962
        }
5963
5964
        return \substr($haystack, -\strlen($needle)) === $needle;
5965
    }
5966
5967
    /**
5968
     * Returns true if the string ends with any of $substrings, false otherwise.
5969
     *
5970
     * - case-sensitive
5971
     *
5972
     * @param string   $str        <p>The input string.</p>
5973
     * @param string[] $substrings <p>Substrings to look for.</p>
5974
     *
5975
     * @psalm-pure
5976
     *
5977
     * @return bool
5978
     *              <p>Whether or not $str ends with $substring.</p>
5979
     */
5980 7
    public static function str_ends_with_any(string $str, array $substrings): bool
5981
    {
5982 7
        if ($substrings === []) {
5983
            return false;
5984
        }
5985
5986 7
        foreach ($substrings as &$substring) {
5987 7
            if (\substr($str, -\strlen($substring)) === $substring) {
5988 1
                return true;
5989
            }
5990
        }
5991
5992 6
        return false;
5993
    }
5994
5995
    /**
5996
     * Ensures that the string begins with $substring. If it doesn't, it's
5997
     * prepended.
5998
     *
5999
     * @param string $str       <p>The input string.</p>
6000
     * @param string $substring <p>The substring to add if not present.</p>
6001
     *
6002
     * @psalm-pure
6003
     *
6004
     * @return string
6005
     */
6006 10
    public static function str_ensure_left(string $str, string $substring): string
6007
    {
6008
        if (
6009 10
            $substring !== ''
6010
            &&
6011 10
            \strpos($str, $substring) === 0
6012
        ) {
6013 6
            return $str;
6014
        }
6015
6016 4
        return $substring . $str;
6017
    }
6018
6019
    /**
6020
     * Ensures that the string ends with $substring. If it doesn't, it's appended.
6021
     *
6022
     * @param string $str       <p>The input string.</p>
6023
     * @param string $substring <p>The substring to add if not present.</p>
6024
     *
6025
     * @psalm-pure
6026
     *
6027
     * @return string
6028
     */
6029 10
    public static function str_ensure_right(string $str, string $substring): string
6030
    {
6031
        if (
6032 10
            $str === ''
6033
            ||
6034 10
            $substring === ''
6035
            ||
6036 10
            \substr($str, -\strlen($substring)) !== $substring
6037
        ) {
6038 4
            $str .= $substring;
6039
        }
6040
6041 10
        return $str;
6042
    }
6043
6044
    /**
6045
     * Capitalizes the first word of the string, replaces underscores with
6046
     * spaces, and strips '_id'.
6047
     *
6048
     * @param string $str
6049
     *
6050
     * @psalm-pure
6051
     *
6052
     * @return string
6053
     */
6054 3
    public static function str_humanize($str): string
6055
    {
6056 3
        $str = \str_replace(
6057
            [
6058 3
                '_id',
6059
                '_',
6060
            ],
6061
            [
6062 3
                '',
6063
                ' ',
6064
            ],
6065 3
            $str
6066
        );
6067
6068 3
        return self::ucfirst(\trim($str));
6069
    }
6070
6071
    /**
6072
     * Check if the string ends with the given substring, case-insensitive.
6073
     *
6074
     * EXAMPLE: <code>
6075
     * UTF8::str_iends_with('BeginMiddleΚόσμε', 'Κόσμε'); // true
6076
     * UTF8::str_iends_with('BeginMiddleΚόσμε', 'κόσμε'); // true
6077
     * </code>
6078
     *
6079
     * @param string $haystack <p>The string to search in.</p>
6080
     * @param string $needle   <p>The substring to search for.</p>
6081
     *
6082
     * @psalm-pure
6083
     *
6084
     * @return bool
6085
     */
6086 12
    public static function str_iends_with(string $haystack, string $needle): bool
6087
    {
6088 12
        if ($needle === '') {
6089 2
            return true;
6090
        }
6091
6092 12
        if ($haystack === '') {
6093
            return false;
6094
        }
6095
6096 12
        return self::strcasecmp(\substr($haystack, -\strlen($needle)), $needle) === 0;
6097
    }
6098
6099
    /**
6100
     * Returns true if the string ends with any of $substrings, false otherwise.
6101
     *
6102
     * - case-insensitive
6103
     *
6104
     * @param string   $str        <p>The input string.</p>
6105
     * @param string[] $substrings <p>Substrings to look for.</p>
6106
     *
6107
     * @psalm-pure
6108
     *
6109
     * @return bool
6110
     *              <p>Whether or not $str ends with $substring.</p>
6111
     */
6112 4
    public static function str_iends_with_any(string $str, array $substrings): bool
6113
    {
6114 4
        if ($substrings === []) {
6115
            return false;
6116
        }
6117
6118 4
        foreach ($substrings as &$substring) {
6119 4
            if (self::str_iends_with($str, $substring)) {
6120 4
                return true;
6121
            }
6122
        }
6123
6124
        return false;
6125
    }
6126
6127
    /**
6128
     * Inserts $substring into the string at the $index provided.
6129
     *
6130
     * @param string $str       <p>The input string.</p>
6131
     * @param string $substring <p>String to be inserted.</p>
6132
     * @param int    $index     <p>The index at which to insert the substring.</p>
6133
     * @param string $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
6134
     *
6135
     * @psalm-pure
6136
     *
6137
     * @return string
6138
     */
6139 8
    public static function str_insert(
6140
        string $str,
6141
        string $substring,
6142
        int $index,
6143
        string $encoding = 'UTF-8'
6144
    ): string {
6145 8
        if ($encoding === 'UTF-8') {
6146 4
            $len = (int) \mb_strlen($str);
6147 4
            if ($index > $len) {
6148
                return $str;
6149
            }
6150
6151
            /** @noinspection UnnecessaryCastingInspection */
6152 4
            return (string) \mb_substr($str, 0, $index) .
6153 4
                   $substring .
6154 4
                   (string) \mb_substr($str, $index, $len);
6155
        }
6156
6157 4
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
6158
6159 4
        $len = (int) self::strlen($str, $encoding);
6160 4
        if ($index > $len) {
6161 1
            return $str;
6162
        }
6163
6164 3
        return ((string) self::substr($str, 0, $index, $encoding)) .
6165 3
               $substring .
6166 3
               ((string) self::substr($str, $index, $len, $encoding));
6167
    }
6168
6169
    /**
6170
     * Case-insensitive and UTF-8 safe version of <function>str_replace</function>.
6171
     *
6172
     * EXAMPLE: <code>
6173
     * UTF8::str_ireplace('lIzÆ', 'lise', 'Iñtërnâtiônàlizætiøn'); // 'Iñtërnâtiônàlisetiøn'
6174
     * </code>
6175
     *
6176
     * @see http://php.net/manual/en/function.str-ireplace.php
6177
     *
6178
     * @param string|string[] $search      <p>
6179
     *                                     Every replacement with search array is
6180
     *                                     performed on the result of previous replacement.
6181
     *                                     </p>
6182
     * @param string|string[] $replacement <p>The replacement.</p>
6183
     * @param string|string[] $subject     <p>
6184
     *                                     If subject is an array, then the search and
6185
     *                                     replace is performed with every entry of
6186
     *                                     subject, and the return value is an array as
6187
     *                                     well.
6188
     *                                     </p>
6189
     * @param int             $count       [optional] <p>
6190
     *                                     The number of matched and replaced needles will
6191
     *                                     be returned in count which is passed by
6192
     *                                     reference.
6193
     *                                     </p>
6194
     *
6195
     * @psalm-pure
6196
     *
6197
     * @return string|string[]
6198
     *                         <p>A string or an array of replacements.</p>
6199
     *
6200
     * @template TStrIReplaceSubject
6201
     * @phpstan-param TStrIReplaceSubject $subject
6202
     * @phpstan-return TStrIReplaceSubject
6203
     */
6204 29
    public static function str_ireplace($search, $replacement, $subject, &$count = null)
6205
    {
6206 29
        $search = (array) $search;
6207
6208
        /** @noinspection AlterInForeachInspection */
6209 29
        foreach ($search as &$s) {
6210 29
            $s = (string) $s;
6211 29
            if ($s === '') {
6212 6
                $s = '/^(?<=.)$/';
6213
            } else {
6214 24
                $s = '/' . \preg_quote($s, '/') . '/ui';
6215
            }
6216
        }
6217
6218
        // fallback
6219
        /** @phpstan-ignore-next-line - only a fallback for PHP8 */
6220 29
        if ($replacement === null) {
0 ignored issues
show
introduced by
The condition $replacement === null is always false.
Loading history...
6221 1
            $replacement = '';
6222
        }
6223
        /** @phpstan-ignore-next-line - only a fallback for PHP8 */
6224 29
        if ($subject === null) {
6225 1
            $subject = '';
6226
        }
6227
6228
        /**
6229
         * @psalm-suppress PossiblyNullArgument
6230
         * @phpstan-var TStrIReplaceSubject $subject
6231
         */
6232 29
        $subject = \preg_replace($search, $replacement, $subject, -1, $count);
6233
6234 29
        return $subject;
6235
    }
6236
6237
    /**
6238
     * Replaces $search from the beginning of string with $replacement.
6239
     *
6240
     * @param string $str         <p>The input string.</p>
6241
     * @param string $search      <p>The string to search for.</p>
6242
     * @param string $replacement <p>The replacement.</p>
6243
     *
6244
     * @psalm-pure
6245
     *
6246
     * @return string
6247
     *                <p>The string after the replacement.</p>
6248
     */
6249 17
    public static function str_ireplace_beginning(string $str, string $search, string $replacement): string
6250
    {
6251 17
        if ($str === '') {
6252 4
            if ($replacement === '') {
6253 2
                return '';
6254
            }
6255
6256 2
            if ($search === '') {
6257 2
                return $replacement;
6258
            }
6259
        }
6260
6261 13
        if ($search === '') {
6262 2
            return $str . $replacement;
6263
        }
6264
6265 11
        $searchLength = \strlen($search);
6266 11
        if (\strncasecmp($str, $search, $searchLength) === 0) {
6267 10
            return $replacement . \substr($str, $searchLength);
6268
        }
6269
6270 1
        return $str;
6271
    }
6272
6273
    /**
6274
     * Replaces $search from the ending of string with $replacement.
6275
     *
6276
     * @param string $str         <p>The input string.</p>
6277
     * @param string $search      <p>The string to search for.</p>
6278
     * @param string $replacement <p>The replacement.</p>
6279
     *
6280
     * @psalm-pure
6281
     *
6282
     * @return string
6283
     *                <p>The string after the replacement.</p>
6284
     */
6285 17
    public static function str_ireplace_ending(string $str, string $search, string $replacement): string
6286
    {
6287 17
        if ($str === '') {
6288 4
            if ($replacement === '') {
6289 2
                return '';
6290
            }
6291
6292 2
            if ($search === '') {
6293 2
                return $replacement;
6294
            }
6295
        }
6296
6297 13
        if ($search === '') {
6298 2
            return $str . $replacement;
6299
        }
6300
6301 11
        if (\stripos($str, $search, \strlen($str) - \strlen($search)) !== false) {
6302 9
            $str = \substr($str, 0, -\strlen($search)) . $replacement;
6303
        }
6304
6305 11
        return $str;
6306
    }
6307
6308
    /**
6309
     * Check if the string starts with the given substring, case-insensitive.
6310
     *
6311
     * EXAMPLE: <code>
6312
     * UTF8::str_istarts_with('ΚόσμεMiddleEnd', 'Κόσμε'); // true
6313
     * UTF8::str_istarts_with('ΚόσμεMiddleEnd', 'κόσμε'); // true
6314
     * </code>
6315
     *
6316
     * @param string $haystack <p>The string to search in.</p>
6317
     * @param string $needle   <p>The substring to search for.</p>
6318
     *
6319
     * @psalm-pure
6320
     *
6321
     * @return bool
6322
     */
6323 13
    public static function str_istarts_with(string $haystack, string $needle): bool
6324
    {
6325 13
        if ($needle === '') {
6326 2
            return true;
6327
        }
6328
6329 13
        if ($haystack === '') {
6330
            return false;
6331
        }
6332
6333 13
        return self::stripos($haystack, $needle) === 0;
6334
    }
6335
6336
    /**
6337
     * Returns true if the string begins with any of $substrings, false otherwise.
6338
     *
6339
     * - case-insensitive
6340
     *
6341
     * @param string   $str        <p>The input string.</p>
6342
     * @param scalar[] $substrings <p>Substrings to look for.</p>
6343
     *
6344
     * @psalm-pure
6345
     *
6346
     * @return bool
6347
     *              <p>Whether or not $str starts with $substring.</p>
6348
     */
6349 5
    public static function str_istarts_with_any(string $str, array $substrings): bool
6350
    {
6351 5
        if ($str === '') {
6352
            return false;
6353
        }
6354
6355 5
        if ($substrings === []) {
6356
            return false;
6357
        }
6358
6359 5
        foreach ($substrings as &$substring) {
6360 5
            if (self::str_istarts_with($str, (string) $substring)) {
6361 5
                return true;
6362
            }
6363
        }
6364
6365 1
        return false;
6366
    }
6367
6368
    /**
6369
     * Gets the substring after the first occurrence of a separator.
6370
     *
6371
     * @param string $str       <p>The input string.</p>
6372
     * @param string $separator <p>The string separator.</p>
6373
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
6374
     *
6375
     * @psalm-pure
6376
     *
6377
     * @return string
6378
     */
6379 1
    public static function str_isubstr_after_first_separator(
6380
        string $str,
6381
        string $separator,
6382
        string $encoding = 'UTF-8'
6383
    ): string {
6384 1
        if ($separator === '' || $str === '') {
6385 1
            return '';
6386
        }
6387
6388 1
        $offset = self::stripos($str, $separator);
6389 1
        if ($offset === false) {
6390 1
            return '';
6391
        }
6392
6393 1
        if ($encoding === 'UTF-8') {
6394 1
            return (string) \mb_substr(
6395 1
                $str,
6396 1
                $offset + (int) \mb_strlen($separator)
6397
            );
6398
        }
6399
6400
        return (string) self::substr(
6401
            $str,
6402
            $offset + (int) self::strlen($separator, $encoding),
6403
            null,
6404
            $encoding
6405
        );
6406
    }
6407
6408
    /**
6409
     * Gets the substring after the last occurrence of a separator.
6410
     *
6411
     * @param string $str       <p>The input string.</p>
6412
     * @param string $separator <p>The string separator.</p>
6413
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
6414
     *
6415
     * @psalm-pure
6416
     *
6417
     * @return string
6418
     */
6419 1
    public static function str_isubstr_after_last_separator(
6420
        string $str,
6421
        string $separator,
6422
        string $encoding = 'UTF-8'
6423
    ): string {
6424 1
        if ($separator === '' || $str === '') {
6425 1
            return '';
6426
        }
6427
6428 1
        $offset = self::strripos($str, $separator);
6429 1
        if ($offset === false) {
6430 1
            return '';
6431
        }
6432
6433 1
        if ($encoding === 'UTF-8') {
6434 1
            return (string) \mb_substr(
6435 1
                $str,
6436 1
                $offset + (int) self::strlen($separator)
6437
            );
6438
        }
6439
6440
        return (string) self::substr(
6441
            $str,
6442
            $offset + (int) self::strlen($separator, $encoding),
6443
            null,
6444
            $encoding
6445
        );
6446
    }
6447
6448
    /**
6449
     * Gets the substring before the first occurrence of a separator.
6450
     *
6451
     * @param string $str       <p>The input string.</p>
6452
     * @param string $separator <p>The string separator.</p>
6453
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
6454
     *
6455
     * @psalm-pure
6456
     *
6457
     * @return string
6458
     */
6459 1
    public static function str_isubstr_before_first_separator(
6460
        string $str,
6461
        string $separator,
6462
        string $encoding = 'UTF-8'
6463
    ): string {
6464 1
        if ($separator === '' || $str === '') {
6465 1
            return '';
6466
        }
6467
6468 1
        $offset = self::stripos($str, $separator);
6469 1
        if ($offset === false) {
6470 1
            return '';
6471
        }
6472
6473 1
        if ($encoding === 'UTF-8') {
6474 1
            return (string) \mb_substr($str, 0, $offset);
6475
        }
6476
6477
        return (string) self::substr($str, 0, $offset, $encoding);
6478
    }
6479
6480
    /**
6481
     * Gets the substring before the last occurrence of a separator.
6482
     *
6483
     * @param string $str       <p>The input string.</p>
6484
     * @param string $separator <p>The string separator.</p>
6485
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
6486
     *
6487
     * @psalm-pure
6488
     *
6489
     * @return string
6490
     */
6491 1
    public static function str_isubstr_before_last_separator(
6492
        string $str,
6493
        string $separator,
6494
        string $encoding = 'UTF-8'
6495
    ): string {
6496 1
        if ($separator === '' || $str === '') {
6497 1
            return '';
6498
        }
6499
6500 1
        if ($encoding === 'UTF-8') {
6501 1
            $offset = \mb_strripos($str, $separator);
6502 1
            if ($offset === false) {
6503 1
                return '';
6504
            }
6505
6506 1
            return (string) \mb_substr($str, 0, $offset);
6507
        }
6508
6509
        $offset = self::strripos($str, $separator, 0, $encoding);
6510
        if ($offset === false) {
6511
            return '';
6512
        }
6513
6514
        return (string) self::substr($str, 0, $offset, $encoding);
6515
    }
6516
6517
    /**
6518
     * Gets the substring after (or before via "$before_needle") the first occurrence of the "$needle".
6519
     *
6520
     * @param string $str           <p>The input string.</p>
6521
     * @param string $needle        <p>The string to look for.</p>
6522
     * @param bool   $before_needle [optional] <p>Default: false</p>
6523
     * @param string $encoding      [optional] <p>Default: 'UTF-8'</p>
6524
     *
6525
     * @psalm-pure
6526
     *
6527
     * @return string
6528
     */
6529 2
    public static function str_isubstr_first(
6530
        string $str,
6531
        string $needle,
6532
        bool $before_needle = false,
6533
        string $encoding = 'UTF-8'
6534
    ): string {
6535
        if (
6536 2
            $needle === ''
6537
            ||
6538 2
            $str === ''
6539
        ) {
6540 2
            return '';
6541
        }
6542
6543 2
        $part = self::stristr(
6544 2
            $str,
6545
            $needle,
6546
            $before_needle,
6547
            $encoding
6548
        );
6549 2
        if ($part === false) {
6550 2
            return '';
6551
        }
6552
6553 2
        return $part;
6554
    }
6555
6556
    /**
6557
     * Gets the substring after (or before via "$before_needle") the last occurrence of the "$needle".
6558
     *
6559
     * @param string $str           <p>The input string.</p>
6560
     * @param string $needle        <p>The string to look for.</p>
6561
     * @param bool   $before_needle [optional] <p>Default: false</p>
6562
     * @param string $encoding      [optional] <p>Default: 'UTF-8'</p>
6563
     *
6564
     * @psalm-pure
6565
     *
6566
     * @return string
6567
     */
6568 1
    public static function str_isubstr_last(
6569
        string $str,
6570
        string $needle,
6571
        bool $before_needle = false,
6572
        string $encoding = 'UTF-8'
6573
    ): string {
6574
        if (
6575 1
            $needle === ''
6576
            ||
6577 1
            $str === ''
6578
        ) {
6579 1
            return '';
6580
        }
6581
6582 1
        $part = self::strrichr(
6583 1
            $str,
6584
            $needle,
6585
            $before_needle,
6586
            $encoding
6587
        );
6588 1
        if ($part === false) {
6589 1
            return '';
6590
        }
6591
6592 1
        return $part;
6593
    }
6594
6595
    /**
6596
     * Returns the last $n characters of the string.
6597
     *
6598
     * @param string $str      <p>The input string.</p>
6599
     * @param int    $n        <p>Number of characters to retrieve from the end.</p>
6600
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6601
     *
6602
     * @psalm-pure
6603
     *
6604
     * @return string
6605
     */
6606 12
    public static function str_last_char(
6607
        string $str,
6608
        int $n = 1,
6609
        string $encoding = 'UTF-8'
6610
    ): string {
6611 12
        if ($str === '' || $n <= 0) {
6612 4
            return '';
6613
        }
6614
6615 8
        if ($encoding === 'UTF-8') {
6616 4
            return (string) \mb_substr($str, -$n);
6617
        }
6618
6619 4
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
6620
6621 4
        return (string) self::substr($str, -$n, null, $encoding);
6622
    }
6623
6624
    /**
6625
     * Limit the number of characters in a string.
6626
     *
6627
     * @param string $str        <p>The input string.</p>
6628
     * @param int    $length     [optional] <p>Default: 100</p>
6629
     * @param string $str_add_on [optional] <p>Default: …</p>
6630
     * @param string $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
6631
     *
6632
     * @psalm-pure
6633
     *
6634
     * @return string
6635
     */
6636 2
    public static function str_limit(
6637
        string $str,
6638
        int $length = 100,
6639
        string $str_add_on = '…',
6640
        string $encoding = 'UTF-8'
6641
    ): string {
6642 2
        if ($str === '' || $length <= 0) {
6643 2
            return '';
6644
        }
6645
6646 2
        if ($encoding === 'UTF-8') {
6647 2
            if ((int) \mb_strlen($str) <= $length) {
6648 2
                return $str;
6649
            }
6650
6651
            /** @noinspection UnnecessaryCastingInspection */
6652 2
            return (string) \mb_substr($str, 0, $length - (int) self::strlen($str_add_on)) . $str_add_on;
6653
        }
6654
6655
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
6656
6657
        if ((int) self::strlen($str, $encoding) <= $length) {
6658
            return $str;
6659
        }
6660
6661
        return ((string) self::substr($str, 0, $length - (int) self::strlen($str_add_on), $encoding)) . $str_add_on;
6662
    }
6663
6664
    /**
6665
     * Limit the number of characters in a string, but also after the next word.
6666
     *
6667
     * EXAMPLE: <code>UTF8::str_limit_after_word('fòô bàř fòô', 8, ''); // 'fòô bàř'</code>
6668
     *
6669
     * @param string $str        <p>The input string.</p>
6670
     * @param int    $length     [optional] <p>Default: 100</p>
6671
     * @param string $str_add_on [optional] <p>Default: …</p>
6672
     * @param string $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
6673
     *
6674
     * @psalm-pure
6675
     *
6676
     * @return string
6677
     */
6678 6
    public static function str_limit_after_word(
6679
        string $str,
6680
        int $length = 100,
6681
        string $str_add_on = '…',
6682
        string $encoding = 'UTF-8'
6683
    ): string {
6684 6
        if ($str === '' || $length <= 0) {
6685 2
            return '';
6686
        }
6687
6688 6
        if ($encoding === 'UTF-8') {
6689 2
            if ((int) \mb_strlen($str) <= $length) {
6690 2
                return $str;
6691
            }
6692
6693 2
            if (\mb_substr($str, $length - 1, 1) === ' ') {
6694 2
                return ((string) \mb_substr($str, 0, $length - 1)) . $str_add_on;
6695
            }
6696
6697 2
            $str = \mb_substr($str, 0, $length);
6698
6699 2
            $array = \explode(' ', $str, -1);
6700 2
            $new_str = \implode(' ', $array);
6701
6702 2
            if ($new_str === '') {
6703 2
                return ((string) \mb_substr($str, 0, $length - 1)) . $str_add_on;
6704
            }
6705
        } else {
6706 4
            if ((int) self::strlen($str, $encoding) <= $length) {
6707
                return $str;
6708
            }
6709
6710 4
            if (self::substr($str, $length - 1, 1, $encoding) === ' ') {
6711 3
                return ((string) self::substr($str, 0, $length - 1, $encoding)) . $str_add_on;
6712
            }
6713
6714
            /** @noinspection CallableParameterUseCaseInTypeContextInspection - FP */
6715 1
            $str = self::substr($str, 0, $length, $encoding);
6716 1
            if ($str === false) {
6717
                return '' . $str_add_on;
6718
            }
6719
6720 1
            $array = \explode(' ', $str, -1);
6721 1
            $new_str = \implode(' ', $array);
6722
6723 1
            if ($new_str === '') {
6724
                return ((string) self::substr($str, 0, $length - 1, $encoding)) . $str_add_on;
6725
            }
6726
        }
6727
6728 3
        return $new_str . $str_add_on;
6729
    }
6730
6731
    /**
6732
     * Returns the longest common prefix between the $str1 and $str2.
6733
     *
6734
     * @param string $str1     <p>The input sting.</p>
6735
     * @param string $str2     <p>Second string for comparison.</p>
6736
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6737
     *
6738
     * @psalm-pure
6739
     *
6740
     * @return string
6741
     */
6742 10
    public static function str_longest_common_prefix(
6743
        string $str1,
6744
        string $str2,
6745
        string $encoding = 'UTF-8'
6746
    ): string {
6747
        // init
6748 10
        $longest_common_prefix = '';
6749
6750 10
        if ($encoding === 'UTF-8') {
6751 5
            $max_length = (int) \min(
6752 5
                \mb_strlen($str1),
6753 5
                \mb_strlen($str2)
6754
            );
6755
6756 5
            for ($i = 0; $i < $max_length; ++$i) {
6757 4
                $char = \mb_substr($str1, $i, 1);
6758
6759
                if (
6760 4
                    $char !== false
6761
                    &&
6762 4
                    $char === \mb_substr($str2, $i, 1)
6763
                ) {
6764 3
                    $longest_common_prefix .= $char;
6765
                } else {
6766 3
                    break;
6767
                }
6768
            }
6769
        } else {
6770 5
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
6771
6772 5
            $max_length = (int) \min(
6773 5
                self::strlen($str1, $encoding),
6774 5
                self::strlen($str2, $encoding)
6775
            );
6776
6777 5
            for ($i = 0; $i < $max_length; ++$i) {
6778 4
                $char = self::substr($str1, $i, 1, $encoding);
6779
6780
                if (
6781 4
                    $char !== false
6782
                    &&
6783 4
                    $char === self::substr($str2, $i, 1, $encoding)
6784
                ) {
6785 3
                    $longest_common_prefix .= $char;
6786
                } else {
6787 3
                    break;
6788
                }
6789
            }
6790
        }
6791
6792 10
        return $longest_common_prefix;
6793
    }
6794
6795
    /**
6796
     * Returns the longest common substring between the $str1 and $str2.
6797
     * In the case of ties, it returns that which occurs first.
6798
     *
6799
     * @param string $str1
6800
     * @param string $str2     <p>Second string for comparison.</p>
6801
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6802
     *
6803
     * @psalm-pure
6804
     *
6805
     * @return string
6806
     *                <p>A string with its $str being the longest common substring.</p>
6807
     */
6808 11
    public static function str_longest_common_substring(
6809
        string $str1,
6810
        string $str2,
6811
        string $encoding = 'UTF-8'
6812
    ): string {
6813 11
        if ($str1 === '' || $str2 === '') {
6814 2
            return '';
6815
        }
6816
6817
        // Uses dynamic programming to solve
6818
        // http://en.wikipedia.org/wiki/Longest_common_substring_problem
6819
6820 9
        if ($encoding === 'UTF-8') {
6821 4
            $str_length = (int) \mb_strlen($str1);
6822 4
            $other_length = (int) \mb_strlen($str2);
6823
        } else {
6824 5
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
6825
6826 5
            $str_length = (int) self::strlen($str1, $encoding);
6827 5
            $other_length = (int) self::strlen($str2, $encoding);
6828
        }
6829
6830
        // Return if either string is empty
6831 9
        if ($str_length === 0 || $other_length === 0) {
6832
            return '';
6833
        }
6834
6835 9
        $len = 0;
6836 9
        $end = 0;
6837 9
        $table = \array_fill(
6838 9
            0,
6839 9
            $str_length + 1,
6840 9
            \array_fill(0, $other_length + 1, 0)
6841
        );
6842
6843 9
        if ($encoding === 'UTF-8') {
6844 9
            for ($i = 1; $i <= $str_length; ++$i) {
6845 9
                for ($j = 1; $j <= $other_length; ++$j) {
6846 9
                    $str_char = \mb_substr($str1, $i - 1, 1);
6847 9
                    $other_char = \mb_substr($str2, $j - 1, 1);
6848
6849 9
                    if ($str_char === $other_char) {
6850 8
                        $table[$i][$j] = $table[$i - 1][$j - 1] + 1;
6851 8
                        if ($table[$i][$j] > $len) {
6852 8
                            $len = $table[$i][$j];
6853 8
                            $end = $i;
6854
                        }
6855
                    } else {
6856 9
                        $table[$i][$j] = 0;
6857
                    }
6858
                }
6859
            }
6860
        } else {
6861
            for ($i = 1; $i <= $str_length; ++$i) {
6862
                for ($j = 1; $j <= $other_length; ++$j) {
6863
                    $str_char = self::substr($str1, $i - 1, 1, $encoding);
6864
                    $other_char = self::substr($str2, $j - 1, 1, $encoding);
6865
6866
                    if ($str_char === $other_char) {
6867
                        $table[$i][$j] = $table[$i - 1][$j - 1] + 1;
6868
                        if ($table[$i][$j] > $len) {
6869
                            $len = $table[$i][$j];
6870
                            $end = $i;
6871
                        }
6872
                    } else {
6873
                        $table[$i][$j] = 0;
6874
                    }
6875
                }
6876
            }
6877
        }
6878
6879 9
        if ($encoding === 'UTF-8') {
6880 9
            return (string) \mb_substr($str1, $end - $len, $len);
6881
        }
6882
6883
        return (string) self::substr($str1, $end - $len, $len, $encoding);
6884
    }
6885
6886
    /**
6887
     * Returns the longest common suffix between the $str1 and $str2.
6888
     *
6889
     * @param string $str1
6890
     * @param string $str2     <p>Second string for comparison.</p>
6891
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6892
     *
6893
     * @psalm-pure
6894
     *
6895
     * @return string
6896
     */
6897 10
    public static function str_longest_common_suffix(
6898
        string $str1,
6899
        string $str2,
6900
        string $encoding = 'UTF-8'
6901
    ): string {
6902 10
        if ($str1 === '' || $str2 === '') {
6903 2
            return '';
6904
        }
6905
6906 8
        if ($encoding === 'UTF-8') {
6907 4
            $max_length = (int) \min(
6908 4
                \mb_strlen($str1, $encoding),
6909 4
                \mb_strlen($str2, $encoding)
6910
            );
6911
6912 4
            $longest_common_suffix = '';
6913 4
            for ($i = 1; $i <= $max_length; ++$i) {
6914 4
                $char = \mb_substr($str1, -$i, 1);
6915
6916
                if (
6917 4
                    $char !== false
6918
                    &&
6919 4
                    $char === \mb_substr($str2, -$i, 1)
6920
                ) {
6921 3
                    $longest_common_suffix = $char . $longest_common_suffix;
6922
                } else {
6923 3
                    break;
6924
                }
6925
            }
6926
        } else {
6927 4
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
6928
6929 4
            $max_length = (int) \min(
6930 4
                self::strlen($str1, $encoding),
6931 4
                self::strlen($str2, $encoding)
6932
            );
6933
6934 4
            $longest_common_suffix = '';
6935 4
            for ($i = 1; $i <= $max_length; ++$i) {
6936 4
                $char = self::substr($str1, -$i, 1, $encoding);
6937
6938
                if (
6939 4
                    $char !== false
6940
                    &&
6941 4
                    $char === self::substr($str2, -$i, 1, $encoding)
6942
                ) {
6943 3
                    $longest_common_suffix = $char . $longest_common_suffix;
6944
                } else {
6945 3
                    break;
6946
                }
6947
            }
6948
        }
6949
6950 8
        return $longest_common_suffix;
6951
    }
6952
6953
    /**
6954
     * Returns true if $str matches the supplied pattern, false otherwise.
6955
     *
6956
     * @param string $str     <p>The input string.</p>
6957
     * @param string $pattern <p>Regex pattern to match against.</p>
6958
     *
6959
     * @psalm-pure
6960
     *
6961
     * @return bool
6962
     *              <p>Whether or not $str matches the pattern.</p>
6963
     */
6964 10
    public static function str_matches_pattern(string $str, string $pattern): bool
6965
    {
6966 10
        return (bool) \preg_match('/' . $pattern . '/u', $str);
6967
    }
6968
6969
    /**
6970
     * Returns whether or not a character exists at an index. Offsets may be
6971
     * negative to count from the last character in the string. Implements
6972
     * part of the ArrayAccess interface.
6973
     *
6974
     * @param string $str      <p>The input string.</p>
6975
     * @param int    $offset   <p>The index to check.</p>
6976
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6977
     *
6978
     * @psalm-pure
6979
     *
6980
     * @return bool
6981
     *              <p>Whether or not the index exists.</p>
6982
     */
6983 6
    public static function str_offset_exists(string $str, int $offset, string $encoding = 'UTF-8'): bool
6984
    {
6985
        // init
6986 6
        $length = (int) self::strlen($str, $encoding);
6987
6988 6
        if ($offset >= 0) {
6989 3
            return $length > $offset;
6990
        }
6991
6992 3
        return $length >= \abs($offset);
6993
    }
6994
6995
    /**
6996
     * Returns the character at the given index. Offsets may be negative to
6997
     * count from the last character in the string. Implements part of the
6998
     * ArrayAccess interface, and throws an OutOfBoundsException if the index
6999
     * does not exist.
7000
     *
7001
     * @param string $str      <p>The input string.</p>
7002
     * @param int    $index    <p>The <strong>index</strong> from which to retrieve the char.</p>
7003
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
7004
     *
7005
     * @throws \OutOfBoundsException if the positive or negative offset does not exist
7006
     *
7007
     * @return string
7008
     *                <p>The character at the specified index.</p>
7009
     *
7010
     * @psalm-pure
7011
     */
7012 2
    public static function str_offset_get(string $str, int $index, string $encoding = 'UTF-8'): string
7013
    {
7014
        // init
7015 2
        $length = (int) self::strlen($str);
7016
7017
        if (
7018 2
            ($index >= 0 && $length <= $index)
7019
            ||
7020 2
            $length < \abs($index)
7021
        ) {
7022 1
            throw new \OutOfBoundsException('No character exists at the index');
7023
        }
7024
7025 1
        return self::char_at($str, $index, $encoding);
7026
    }
7027
7028
    /**
7029
     * Pad a UTF-8 string to a given length with another string.
7030
     *
7031
     * EXAMPLE: <code>UTF8::str_pad('中文空白', 10, '_', STR_PAD_BOTH); // '___中文空白___'</code>
7032
     *
7033
     * @param string     $str        <p>The input string.</p>
7034
     * @param int        $pad_length <p>The length of return string.</p>
7035
     * @param string     $pad_string [optional] <p>String to use for padding the input string.</p>
7036
     * @param int|string $pad_type   [optional] <p>
7037
     *                               Can be <strong>STR_PAD_RIGHT</strong> (default), [or string "right"]<br>
7038
     *                               <strong>STR_PAD_LEFT</strong> [or string "left"] or<br>
7039
     *                               <strong>STR_PAD_BOTH</strong> [or string "both"]
7040
     *                               </p>
7041
     * @param string     $encoding   [optional] <p>Default: 'UTF-8'</p>
7042
     *
7043
     * @psalm-pure
7044
     *
7045
     * @return string
7046
     *                <p>Returns the padded string.</p>
7047
     */
7048 41
    public static function str_pad(
7049
        string $str,
7050
        int $pad_length,
7051
        string $pad_string = ' ',
7052
        $pad_type = \STR_PAD_RIGHT,
7053
        string $encoding = 'UTF-8'
7054
    ): string {
7055 41
        if ($pad_length === 0 || $pad_string === '') {
7056 1
            return $str;
7057
        }
7058
7059 41
        if ($pad_type !== (int) $pad_type) {
7060 13
            if ($pad_type === 'left') {
7061 3
                $pad_type = \STR_PAD_LEFT;
7062 10
            } elseif ($pad_type === 'right') {
7063 6
                $pad_type = \STR_PAD_RIGHT;
7064 4
            } elseif ($pad_type === 'both') {
7065 3
                $pad_type = \STR_PAD_BOTH;
7066
            } else {
7067 1
                throw new \InvalidArgumentException(
7068 1
                    'Pad expects $pad_type to be "STR_PAD_*" or ' . "to be one of 'left', 'right' or 'both'"
7069
                );
7070
            }
7071
        }
7072
7073 40
        if ($encoding === 'UTF-8') {
7074 25
            $str_length = (int) \mb_strlen($str);
7075
7076 25
            if ($pad_length >= $str_length) {
7077 25
                switch ($pad_type) {
7078
                    case \STR_PAD_LEFT:
7079 8
                        $ps_length = (int) \mb_strlen($pad_string);
7080
7081 8
                        $diff = ($pad_length - $str_length);
7082
7083 8
                        $pre = (string) \mb_substr(
7084 8
                            \str_repeat($pad_string, (int) \ceil($diff / $ps_length)),
7085 8
                            0,
7086 8
                            $diff
7087
                        );
7088 8
                        $post = '';
7089
7090 8
                        break;
7091
7092
                    case \STR_PAD_BOTH:
7093 14
                        $diff = ($pad_length - $str_length);
7094
7095 14
                        $ps_length_left = (int) \floor($diff / 2);
7096
7097 14
                        $ps_length_right = (int) \ceil($diff / 2);
7098
7099 14
                        $pre = (string) \mb_substr(
7100 14
                            \str_repeat($pad_string, $ps_length_left),
7101 14
                            0,
7102 14
                            $ps_length_left
7103
                        );
7104 14
                        $post = (string) \mb_substr(
7105 14
                            \str_repeat($pad_string, $ps_length_right),
7106 14
                            0,
7107 14
                            $ps_length_right
7108
                        );
7109
7110 14
                        break;
7111
7112
                    case \STR_PAD_RIGHT:
7113
                    default:
7114 9
                        $ps_length = (int) \mb_strlen($pad_string);
7115
7116 9
                        $diff = ($pad_length - $str_length);
7117
7118 9
                        $post = (string) \mb_substr(
7119 9
                            \str_repeat($pad_string, (int) \ceil($diff / $ps_length)),
7120 9
                            0,
7121 9
                            $diff
7122
                        );
7123 9
                        $pre = '';
7124
                }
7125
7126 25
                return $pre . $str . $post;
7127
            }
7128
7129 3
            return $str;
7130
        }
7131
7132 15
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
7133
7134 15
        $str_length = (int) self::strlen($str, $encoding);
7135
7136 15
        if ($pad_length >= $str_length) {
7137 14
            switch ($pad_type) {
7138
                case \STR_PAD_LEFT:
7139 5
                    $ps_length = (int) self::strlen($pad_string, $encoding);
7140
7141 5
                    $diff = ($pad_length - $str_length);
7142
7143 5
                    $pre = (string) self::substr(
7144 5
                        \str_repeat($pad_string, (int) \ceil($diff / $ps_length)),
7145 5
                        0,
7146
                        $diff,
7147
                        $encoding
7148
                    );
7149 5
                    $post = '';
7150
7151 5
                    break;
7152
7153
                case \STR_PAD_BOTH:
7154 3
                    $diff = ($pad_length - $str_length);
7155
7156 3
                    $ps_length_left = (int) \floor($diff / 2);
7157
7158 3
                    $ps_length_right = (int) \ceil($diff / 2);
7159
7160 3
                    $pre = (string) self::substr(
7161 3
                        \str_repeat($pad_string, $ps_length_left),
7162 3
                        0,
7163
                        $ps_length_left,
7164
                        $encoding
7165
                    );
7166 3
                    $post = (string) self::substr(
7167 3
                        \str_repeat($pad_string, $ps_length_right),
7168 3
                        0,
7169
                        $ps_length_right,
7170
                        $encoding
7171
                    );
7172
7173 3
                    break;
7174
7175
                case \STR_PAD_RIGHT:
7176
                default:
7177 6
                    $ps_length = (int) self::strlen($pad_string, $encoding);
7178
7179 6
                    $diff = ($pad_length - $str_length);
7180
7181 6
                    $post = (string) self::substr(
7182 6
                        \str_repeat($pad_string, (int) \ceil($diff / $ps_length)),
7183 6
                        0,
7184
                        $diff,
7185
                        $encoding
7186
                    );
7187 6
                    $pre = '';
7188
            }
7189
7190 14
            return $pre . $str . $post;
7191
        }
7192
7193 1
        return $str;
7194
    }
7195
7196
    /**
7197
     * Returns a new string of a given length such that both sides of the
7198
     * string are padded. Alias for "UTF8::str_pad()" with a $pad_type of 'both'.
7199
     *
7200
     * @param string $str
7201
     * @param int    $length   <p>Desired string length after padding.</p>
7202
     * @param string $pad_str  [optional] <p>String used to pad, defaults to space. Default: ' '</p>
7203
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
7204
     *
7205
     * @psalm-pure
7206
     *
7207
     * @return string
7208
     *                <p>The string with padding applied.</p>
7209
     */
7210 11
    public static function str_pad_both(
7211
        string $str,
7212
        int $length,
7213
        string $pad_str = ' ',
7214
        string $encoding = 'UTF-8'
7215
    ): string {
7216 11
        return self::str_pad(
7217 11
            $str,
7218 11
            $length,
7219 11
            $pad_str,
7220 11
            \STR_PAD_BOTH,
7221 11
            $encoding
7222
        );
7223
    }
7224
7225
    /**
7226
     * Returns a new string of a given length such that the beginning of the
7227
     * string is padded. Alias for "UTF8::str_pad()" with a $pad_type of 'left'.
7228
     *
7229
     * @param string $str
7230
     * @param int    $length   <p>Desired string length after padding.</p>
7231
     * @param string $pad_str  [optional] <p>String used to pad, defaults to space. Default: ' '</p>
7232
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
7233
     *
7234
     * @psalm-pure
7235
     *
7236
     * @return string
7237
     *                <p>The string with left padding.</p>
7238
     */
7239 7
    public static function str_pad_left(
7240
        string $str,
7241
        int $length,
7242
        string $pad_str = ' ',
7243
        string $encoding = 'UTF-8'
7244
    ): string {
7245 7
        return self::str_pad(
7246 7
            $str,
7247 7
            $length,
7248 7
            $pad_str,
7249 7
            \STR_PAD_LEFT,
7250 7
            $encoding
7251
        );
7252
    }
7253
7254
    /**
7255
     * Returns a new string of a given length such that the end of the string
7256
     * is padded. Alias for "UTF8::str_pad()" with a $pad_type of 'right'.
7257
     *
7258
     * @param string $str
7259
     * @param int    $length   <p>Desired string length after padding.</p>
7260
     * @param string $pad_str  [optional] <p>String used to pad, defaults to space. Default: ' '</p>
7261
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
7262
     *
7263
     * @psalm-pure
7264
     *
7265
     * @return string
7266
     *                <p>The string with right padding.</p>
7267
     */
7268 7
    public static function str_pad_right(
7269
        string $str,
7270
        int $length,
7271
        string $pad_str = ' ',
7272
        string $encoding = 'UTF-8'
7273
    ): string {
7274 7
        return self::str_pad(
7275 7
            $str,
7276 7
            $length,
7277 7
            $pad_str,
7278 7
            \STR_PAD_RIGHT,
7279 7
            $encoding
7280
        );
7281
    }
7282
7283
    /**
7284
     * Repeat a string.
7285
     *
7286
     * EXAMPLE: <code>UTF8::str_repeat("°~\xf0\x90\x28\xbc", 2); // '°~ð(¼°~ð(¼'</code>
7287
     *
7288
     * @param string $str        <p>
7289
     *                           The string to be repeated.
7290
     *                           </p>
7291
     * @param int    $multiplier <p>
7292
     *                           Number of time the input string should be
7293
     *                           repeated.
7294
     *                           </p>
7295
     *                           <p>
7296
     *                           multiplier has to be greater than or equal to 0.
7297
     *                           If the multiplier is set to 0, the function
7298
     *                           will return an empty string.
7299
     *                           </p>
7300
     *
7301
     * @psalm-pure
7302
     *
7303
     * @return string
7304
     *                <p>The repeated string.</p>
7305
     */
7306 9
    public static function str_repeat(string $str, int $multiplier): string
7307
    {
7308 9
        $str = self::filter($str);
7309
7310 9
        return \str_repeat($str, $multiplier);
7311
    }
7312
7313
    /**
7314
     * INFO: This is only a wrapper for "str_replace()"  -> the original functions is already UTF-8 safe.
7315
     *
7316
     * Replace all occurrences of the search string with the replacement string
7317
     *
7318
     * @see http://php.net/manual/en/function.str-replace.php
7319
     *
7320
     * @param string|string[] $search  <p>
7321
     *                                 The value being searched for, otherwise known as the needle.
7322
     *                                 An array may be used to designate multiple needles.
7323
     *                                 </p>
7324
     * @param string|string[] $replace <p>
7325
     *                                 The replacement value that replaces found search
7326
     *                                 values. An array may be used to designate multiple replacements.
7327
     *                                 </p>
7328
     * @param string|string[] $subject <p>
7329
     *                                 The string or array of strings being searched and replaced on,
7330
     *                                 otherwise known as the haystack.
7331
     *                                 </p>
7332
     *                                 <p>
7333
     *                                 If subject is an array, then the search and
7334
     *                                 replace is performed with every entry of
7335
     *                                 subject, and the return value is an array as
7336
     *                                 well.
7337
     *                                 </p>
7338
     * @param int|null        $count   [optional] <p>
7339
     *                                 If passed, this will hold the number of matched and replaced needles.
7340
     *                                 </p>
7341
     *
7342
     * @psalm-pure
7343
     *
7344
     * @return string|string[]
7345
     *                         <p>This function returns a string or an array with the replaced values.</p>
7346
     *
7347
     * @template TStrReplaceSubject
7348
     * @phpstan-param TStrReplaceSubject $subject
7349
     * @phpstan-return TStrReplaceSubject
7350
     *
7351
     * @deprecated please use \str_replace() instead
7352
     */
7353 12
    public static function str_replace(
7354
        $search,
7355
        $replace,
7356
        $subject,
7357
        int &$count = null
7358
    ) {
7359
        /**
7360
         * @psalm-suppress PossiblyNullArgument
7361
         * @phpstan-var TStrReplaceSubject $return;
7362
         */
7363 12
        $return = \str_replace(
7364 12
            $search,
7365 12
            $replace,
7366 12
            $subject,
7367 12
            $count
7368
        );
7369
7370 12
        return $return;
7371
    }
7372
7373
    /**
7374
     * Replaces $search from the beginning of string with $replacement.
7375
     *
7376
     * @param string $str         <p>The input string.</p>
7377
     * @param string $search      <p>The string to search for.</p>
7378
     * @param string $replacement <p>The replacement.</p>
7379
     *
7380
     * @psalm-pure
7381
     *
7382
     * @return string
7383
     *                <p>A string after the replacements.</p>
7384
     */
7385 17
    public static function str_replace_beginning(
7386
        string $str,
7387
        string $search,
7388
        string $replacement
7389
    ): string {
7390 17
        if ($str === '') {
7391 4
            if ($replacement === '') {
7392 2
                return '';
7393
            }
7394
7395 2
            if ($search === '') {
7396 2
                return $replacement;
7397
            }
7398
        }
7399
7400 13
        if ($search === '') {
7401 2
            return $str . $replacement;
7402
        }
7403
7404 11
        $searchLength = \strlen($search);
7405 11
        if (\strncmp($str, $search, $searchLength) === 0) {
7406 9
            return $replacement . \substr($str, $searchLength);
7407
        }
7408
7409 2
        return $str;
7410
    }
7411
7412
    /**
7413
     * Replaces $search from the ending of string with $replacement.
7414
     *
7415
     * @param string $str         <p>The input string.</p>
7416
     * @param string $search      <p>The string to search for.</p>
7417
     * @param string $replacement <p>The replacement.</p>
7418
     *
7419
     * @psalm-pure
7420
     *
7421
     * @return string
7422
     *                <p>A string after the replacements.</p>
7423
     */
7424 17
    public static function str_replace_ending(
7425
        string $str,
7426
        string $search,
7427
        string $replacement
7428
    ): string {
7429 17
        if ($str === '') {
7430 4
            if ($replacement === '') {
7431 2
                return '';
7432
            }
7433
7434 2
            if ($search === '') {
7435 2
                return $replacement;
7436
            }
7437
        }
7438
7439 13
        if ($search === '') {
7440 2
            return $str . $replacement;
7441
        }
7442
7443 11
        if (\strpos($str, $search, \strlen($str) - \strlen($search)) !== false) {
7444 8
            $str = \substr($str, 0, -\strlen($search)) . $replacement;
7445
        }
7446
7447 11
        return $str;
7448
    }
7449
7450
    /**
7451
     * Replace the first "$search"-term with the "$replace"-term.
7452
     *
7453
     * @param string $search
7454
     * @param string $replace
7455
     * @param string $subject
7456
     *
7457
     * @psalm-pure
7458
     *
7459
     * @return string
7460
     *
7461
     * @psalm-suppress InvalidReturnType
7462
     */
7463 2
    public static function str_replace_first(
7464
        string $search,
7465
        string $replace,
7466
        string $subject
7467
    ): string {
7468 2
        $pos = self::strpos($subject, $search);
7469
7470 2
        if ($pos !== false) {
7471
            /**
7472
             * @psalm-suppress InvalidReturnStatement
7473
             */
7474 2
            return self::substr_replace(
7475 2
                $subject,
7476
                $replace,
7477
                $pos,
7478 2
                (int) self::strlen($search)
7479
            );
7480
        }
7481
7482
        return $subject;
7483
    }
7484
7485
    /**
7486
     * Replace the last "$search"-term with the "$replace"-term.
7487
     *
7488
     * @param string $search
7489
     * @param string $replace
7490
     * @param string $subject
7491
     *
7492
     * @psalm-pure
7493
     *
7494
     * @return string
7495
     *
7496
     * @psalm-suppress InvalidReturnType
7497
     */
7498 2
    public static function str_replace_last(
7499
        string $search,
7500
        string $replace,
7501
        string $subject
7502
    ): string {
7503 2
        $pos = self::strrpos($subject, $search);
7504 2
        if ($pos !== false) {
7505
            /**
7506
             * @psalm-suppress InvalidReturnStatement
7507
             */
7508 2
            return self::substr_replace(
7509 2
                $subject,
7510
                $replace,
7511
                $pos,
7512 2
                (int) self::strlen($search)
7513
            );
7514
        }
7515
7516
        return $subject;
7517
    }
7518
7519
    /**
7520
     * Shuffles all the characters in the string.
7521
     *
7522
     * INFO: uses random algorithm which is weak for cryptography purposes
7523
     *
7524
     * EXAMPLE: <code>UTF8::str_shuffle('fòô bàř fòô'); // 'àòôřb ffòô '</code>
7525
     *
7526
     * @param string $str      <p>The input string</p>
7527
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
7528
     *
7529
     * @return string
7530
     *                <p>The shuffled string.</p>
7531
     */
7532 5
    public static function str_shuffle(string $str, string $encoding = 'UTF-8'): string
7533
    {
7534 5
        if ($encoding === 'UTF-8') {
7535 5
            $indexes = \range(0, (int) \mb_strlen($str) - 1);
7536 5
            \shuffle($indexes);
7537
7538
            // init
7539 5
            $shuffled_str = '';
7540
7541 5
            foreach ($indexes as &$i) {
7542 5
                $tmp_sub_str = \mb_substr($str, $i, 1);
7543 5
                if ($tmp_sub_str !== false) {
7544 5
                    $shuffled_str .= $tmp_sub_str;
7545
                }
7546
            }
7547
        } else {
7548
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
7549
7550
            $indexes = \range(0, (int) self::strlen($str, $encoding) - 1);
7551
            \shuffle($indexes);
7552
7553
            // init
7554
            $shuffled_str = '';
7555
7556
            foreach ($indexes as &$i) {
7557
                $tmp_sub_str = self::substr($str, $i, 1, $encoding);
7558
                if ($tmp_sub_str !== false) {
7559
                    $shuffled_str .= $tmp_sub_str;
7560
                }
7561
            }
7562
        }
7563
7564 5
        return $shuffled_str;
7565
    }
7566
7567
    /**
7568
     * Returns the substring beginning at $start, and up to, but not including
7569
     * the index specified by $end. If $end is omitted, the function extracts
7570
     * the remaining string. If $end is negative, it is computed from the end
7571
     * of the string.
7572
     *
7573
     * @param string   $str
7574
     * @param int      $start    <p>Initial index from which to begin extraction.</p>
7575
     * @param int|null $end      [optional] <p>Index at which to end extraction. Default: null</p>
7576
     * @param string   $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
7577
     *
7578
     * @psalm-pure
7579
     *
7580
     * @return false|string
7581
     *                      <p>The extracted substring.</p><p>If <i>str</i> is shorter than <i>start</i>
7582
     *                      characters long, <b>FALSE</b> will be returned.
7583
     */
7584 18
    public static function str_slice(
7585
        string $str,
7586
        int $start,
7587
        int $end = null,
7588
        string $encoding = 'UTF-8'
7589
    ) {
7590 18
        if ($encoding === 'UTF-8') {
7591 7
            if ($end === null) {
7592 1
                $length = (int) \mb_strlen($str);
7593 6
            } elseif ($end >= 0 && $end <= $start) {
7594 2
                return '';
7595 4
            } elseif ($end < 0) {
7596 1
                $length = (int) \mb_strlen($str) + $end - $start;
7597
            } else {
7598 3
                $length = $end - $start;
7599
            }
7600
7601 5
            return \mb_substr($str, $start, $length);
7602
        }
7603
7604 11
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
7605
7606 11
        if ($end === null) {
7607 5
            $length = (int) self::strlen($str, $encoding);
7608 6
        } elseif ($end >= 0 && $end <= $start) {
7609 2
            return '';
7610 4
        } elseif ($end < 0) {
7611 1
            $length = (int) self::strlen($str, $encoding) + $end - $start;
7612
        } else {
7613 3
            $length = $end - $start;
7614
        }
7615
7616 9
        return self::substr($str, $start, $length, $encoding);
7617
    }
7618
7619
    /**
7620
     * Convert a string to e.g.: "snake_case"
7621
     *
7622
     * @param string $str
7623
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
7624
     *
7625
     * @psalm-pure
7626
     *
7627
     * @return string
7628
     *                <p>A string in snake_case.</p>
7629
     */
7630 22
    public static function str_snakeize(string $str, string $encoding = 'UTF-8'): string
7631
    {
7632 22
        if ($str === '') {
7633
            return '';
7634
        }
7635
7636 22
        $str = \str_replace(
7637 22
            '-',
7638 22
            '_',
7639 22
            self::normalize_whitespace($str)
7640
        );
7641
7642 22
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
7643 19
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
7644
        }
7645
7646 22
        $str = (string) \preg_replace_callback(
7647 22
            '/([\\p{N}|\\p{Lu}])/u',
7648
            /**
7649
             * @param string[] $matches
7650
             *
7651
             * @psalm-pure
7652
             *
7653
             * @return string
7654
             */
7655 22
            static function (array $matches) use ($encoding): string {
7656 9
                $match = $matches[1];
7657 9
                $match_int = (int) $match;
7658
7659 9
                if ((string) $match_int === $match) {
7660 4
                    return '_' . $match . '_';
7661
                }
7662
7663 5
                if ($encoding === 'UTF-8') {
7664 5
                    return '_' . \mb_strtolower($match);
7665
                }
7666
7667
                return '_' . self::strtolower($match, $encoding);
7668 22
            },
7669 22
            $str
7670
        );
7671
7672 22
        $str = (string) \preg_replace(
7673
            [
7674 22
                '/\\s+/u',           // convert spaces to "_"
7675
                '/^\\s+|\\s+$/u', // trim leading & trailing spaces
7676
                '/_+/',                 // remove double "_"
7677
            ],
7678
            [
7679 22
                '_',
7680
                '',
7681
                '_',
7682
            ],
7683 22
            $str
7684
        );
7685
7686 22
        return \trim(\trim($str, '_')); // trim leading & trailing "_" + whitespace
7687
    }
7688
7689
    /**
7690
     * Sort all characters according to code points.
7691
     *
7692
     * EXAMPLE: <code>UTF8::str_sort('  -ABC-中文空白-  '); // '    ---ABC中文白空'</code>
7693
     *
7694
     * @param string $str    <p>A UTF-8 string.</p>
7695
     * @param bool   $unique <p>Sort unique. If <strong>true</strong>, repeated characters are ignored.</p>
7696
     * @param bool   $desc   <p>If <strong>true</strong>, will sort characters in reverse code point order.</p>
7697
     *
7698
     * @psalm-pure
7699
     *
7700
     * @return string
7701
     *                <p>A string of sorted characters.</p>
7702
     */
7703 2
    public static function str_sort(string $str, bool $unique = false, bool $desc = false): string
7704
    {
7705
        /** @var int[] $array */
7706 2
        $array = self::codepoints($str);
7707
7708 2
        if ($unique) {
7709 2
            $array = \array_flip(\array_flip($array));
7710
        }
7711
7712 2
        if ($desc) {
7713 2
            \arsort($array);
7714
        } else {
7715 2
            \asort($array);
7716
        }
7717
7718 2
        return self::string($array);
7719
    }
7720
7721
    /**
7722
     * Convert a string to an array of Unicode characters.
7723
     *
7724
     * EXAMPLE: <code>
7725
     * UTF8::str_split_array(['中文空白', 'test'], 2); // [['中文', '空白'], ['te', 'st']]
7726
     * </code>
7727
     *
7728
     * @param int[]|string[] $input                   <p>The string[] or int[] to split into array.</p>
7729
     * @param int            $length                  [optional] <p>Max character length of each array
7730
     *                                                lement.</p>
7731
     * @param bool           $clean_utf8              [optional] <p>Remove non UTF-8 chars from the
7732
     *                                                string.</p>
7733
     * @param bool           $try_to_use_mb_functions [optional] <p>Set to false, if you don't want to use
7734
     *                                                "mb_substr"</p>
7735
     *
7736
     * @psalm-pure
7737
     *
7738
     * @return string[][]
7739
     *                    <p>An array containing chunks of the input.</p>
7740
     */
7741 1
    public static function str_split_array(
7742
        array $input,
7743
        int $length = 1,
7744
        bool $clean_utf8 = false,
7745
        bool $try_to_use_mb_functions = true
7746
    ): array {
7747 1
        foreach ($input as &$v) {
7748 1
            $v = self::str_split(
7749 1
                $v,
7750
                $length,
7751
                $clean_utf8,
7752
                $try_to_use_mb_functions
7753
            );
7754
        }
7755
7756
        /** @var string[][] $input */
7757 1
        return $input;
7758
    }
7759
7760
    /**
7761
     * Convert a string to an array of unicode characters.
7762
     *
7763
     * EXAMPLE: <code>UTF8::str_split('中文空白'); // array('中', '文', '空', '白')</code>
7764
     *
7765
     * @param int|string $input                   <p>The string or int to split into array.</p>
7766
     * @param int        $length                  [optional] <p>Max character length of each array
7767
     *                                            element.</p>
7768
     * @param bool       $clean_utf8              [optional] <p>Remove non UTF-8 chars from the
7769
     *                                            string.</p>
7770
     * @param bool       $try_to_use_mb_functions [optional] <p>Set to false, if you don't want to use
7771
     *                                            "mb_substr"</p>
7772
     *
7773
     * @psalm-pure
7774
     *
7775
     * @return string[]
7776
     *                  <p>An array containing chunks of chars from the input.</p>
7777
     */
7778 90
    public static function str_split(
7779
        $input,
7780
        int $length = 1,
7781
        bool $clean_utf8 = false,
7782
        bool $try_to_use_mb_functions = true
7783
    ): array {
7784 90
        if ($length <= 0) {
7785 3
            return [];
7786
        }
7787
7788
        // this is only an old fallback
7789
        /** @noinspection PhpSillyAssignmentInspection - hack for phpstan */
7790
        /** @var int|int[]|string|string[] $input */
7791 89
        $input = $input;
7792 89
        if (\is_array($input)) {
7793
            /** @psalm-suppress InvalidReturnStatement */
7794
            /** @phpstan-ignore-next-line - old code :/ */
7795
            return self::str_split_array(
0 ignored issues
show
Bug Best Practice introduced by
The expression return self::str_split_a...ry_to_use_mb_functions) returns the type array<mixed,string[]> which is incompatible with the documented return type string[].
Loading history...
7796
                $input,
7797
                $length,
7798
                $clean_utf8,
7799
                $try_to_use_mb_functions
7800
            );
7801
        }
7802
7803
        // init
7804 89
        $input = (string) $input;
7805
7806 89
        if ($input === '') {
7807 14
            return [];
7808
        }
7809
7810 86
        if ($clean_utf8) {
7811 19
            $input = self::clean($input);
7812
        }
7813
7814
        if (
7815 86
            $try_to_use_mb_functions
7816
            &&
7817 86
            self::$SUPPORT['mbstring'] === true
7818
        ) {
7819 82
            if (\function_exists('mb_str_split')) {
7820
                /**
7821
                 * @psalm-suppress ImpureFunctionCall - why?
7822
                 */
7823 82
                $return = \mb_str_split($input, $length);
7824 82
                if ($return !== false) {
7825 82
                    return $return;
7826
                }
7827
            }
7828
7829
            $i_max = \mb_strlen($input);
7830
            if ($i_max <= 127) {
7831
                $ret = [];
7832
                for ($i = 0; $i < $i_max; ++$i) {
7833
                    $ret[] = \mb_substr($input, $i, 1);
7834
                }
7835
            } else {
7836
                $return_array = [];
7837
                \preg_match_all('/./us', $input, $return_array);
7838
                $ret = $return_array[0] ?? [];
7839
            }
7840 23
        } elseif (self::$SUPPORT['pcre_utf8'] === true) {
7841 17
            $return_array = [];
7842 17
            \preg_match_all('/./us', $input, $return_array);
7843 17
            $ret = $return_array[0] ?? [];
7844
        } else {
7845
7846
            // fallback
7847
7848 8
            $ret = [];
7849 8
            $len = \strlen($input);
7850
7851 8
            for ($i = 0; $i < $len; ++$i) {
7852 8
                if (($input[$i] & "\x80") === "\x00") {
7853 8
                    $ret[] = $input[$i];
7854
                } elseif (
7855 8
                    isset($input[$i + 1])
7856
                    &&
7857 8
                    ($input[$i] & "\xE0") === "\xC0"
7858
                ) {
7859 4
                    if (($input[$i + 1] & "\xC0") === "\x80") {
7860 4
                        $ret[] = $input[$i] . $input[$i + 1];
7861
7862 4
                        ++$i;
7863
                    }
7864
                } elseif (
7865 6
                    isset($input[$i + 2])
7866
                    &&
7867 6
                    ($input[$i] & "\xF0") === "\xE0"
7868
                ) {
7869
                    if (
7870 6
                        ($input[$i + 1] & "\xC0") === "\x80"
7871
                        &&
7872 6
                        ($input[$i + 2] & "\xC0") === "\x80"
7873
                    ) {
7874 6
                        $ret[] = $input[$i] . $input[$i + 1] . $input[$i + 2];
7875
7876 6
                        $i += 2;
7877
                    }
7878
                } elseif (
7879
                    isset($input[$i + 3])
7880
                    &&
7881
                    ($input[$i] & "\xF8") === "\xF0"
7882
                ) {
7883
                    if (
7884
                        ($input[$i + 1] & "\xC0") === "\x80"
7885
                        &&
7886
                        ($input[$i + 2] & "\xC0") === "\x80"
7887
                        &&
7888
                        ($input[$i + 3] & "\xC0") === "\x80"
7889
                    ) {
7890
                        $ret[] = $input[$i] . $input[$i + 1] . $input[$i + 2] . $input[$i + 3];
7891
7892
                        $i += 3;
7893
                    }
7894
                }
7895
            }
7896
        }
7897
7898 23
        if ($length > 1) {
7899 2
            return \array_map(
7900 2
                static function (array $item): string {
7901 2
                    return \implode('', $item);
7902 2
                },
7903 2
                \array_chunk($ret, $length)
7904
            );
7905
        }
7906
7907 23
        if (isset($ret[0]) && $ret[0] === '') {
7908
            return [];
7909
        }
7910
7911 23
        return $ret;
7912
    }
7913
7914
    /**
7915
     * Splits the string with the provided regular expression, returning an
7916
     * array of strings. An optional integer $limit will truncate the
7917
     * results.
7918
     *
7919
     * @param string $str
7920
     * @param string $pattern <p>The regex with which to split the string.</p>
7921
     * @param int    $limit   [optional] <p>Maximum number of results to return. Default: -1 === no limit</p>
7922
     *
7923
     * @psalm-pure
7924
     *
7925
     * @return string[]
7926
     *                  <p>An array of strings.</p>
7927
     */
7928 16
    public static function str_split_pattern(string $str, string $pattern, int $limit = -1): array
7929
    {
7930 16
        if ($limit === 0) {
7931 2
            return [];
7932
        }
7933
7934 14
        if ($pattern === '') {
7935 1
            return [$str];
7936
        }
7937
7938 13
        if (self::$SUPPORT['mbstring'] === true) {
7939 13
            if ($limit >= 0) {
7940 8
                $result_tmp = \mb_split($pattern, $str);
7941 8
                if ($result_tmp === false) {
7942
                    return [];
7943
                }
7944
7945 8
                $result = [];
7946 8
                foreach ($result_tmp as $item_tmp) {
7947 8
                    if ($limit === 0) {
7948 4
                        break;
7949
                    }
7950 8
                    --$limit;
7951
7952 8
                    $result[] = $item_tmp;
7953
                }
7954
7955 8
                return $result;
7956
            }
7957
7958 5
            $result = \mb_split($pattern, $str);
7959 5
            if ($result === false) {
7960
                return [];
7961
            }
7962
7963 5
            return $result;
7964
        }
7965
7966
        if ($limit > 0) {
7967
            ++$limit;
7968
        } else {
7969
            $limit = -1;
7970
        }
7971
7972
        $array = \preg_split('/' . \preg_quote($pattern, '/') . '/u', $str, $limit);
7973
        if ($array === false) {
7974
            return [];
7975
        }
7976
7977
        if ($limit > 0 && \count($array) === $limit) {
7978
            \array_pop($array);
7979
        }
7980
7981
        return $array;
7982
    }
7983
7984
    /**
7985
     * Check if the string starts with the given substring.
7986
     *
7987
     * EXAMPLE: <code>
7988
     * UTF8::str_starts_with('ΚόσμεMiddleEnd', 'Κόσμε'); // true
7989
     * UTF8::str_starts_with('ΚόσμεMiddleEnd', 'κόσμε'); // false
7990
     * </code>
7991
     *
7992
     * @param string $haystack <p>The string to search in.</p>
7993
     * @param string $needle   <p>The substring to search for.</p>
7994
     *
7995
     * @psalm-pure
7996
     *
7997
     * @return bool
7998
     */
7999 19
    public static function str_starts_with(string $haystack, string $needle): bool
8000
    {
8001 19
        if ($needle === '') {
8002 2
            return true;
8003
        }
8004
8005 19
        if ($haystack === '') {
8006 1
            return false;
8007
        }
8008
8009 19
        if (\PHP_VERSION_ID >= 80000) {
8010
            /** @phpstan-ignore-next-line - only for PHP8 */
8011 19
            return \str_starts_with($haystack, $needle);
8012
        }
8013
8014
        return \strncmp($haystack, $needle, \strlen($needle)) === 0;
8015
    }
8016
8017
    /**
8018
     * Returns true if the string begins with any of $substrings, false otherwise.
8019
     *
8020
     * - case-sensitive
8021
     *
8022
     * @param string   $str        <p>The input string.</p>
8023
     * @param scalar[] $substrings <p>Substrings to look for.</p>
8024
     *
8025
     * @psalm-pure
8026
     *
8027
     * @return bool
8028
     *              <p>Whether or not $str starts with $substring.</p>
8029
     */
8030 8
    public static function str_starts_with_any(string $str, array $substrings): bool
8031
    {
8032 8
        if ($str === '') {
8033
            return false;
8034
        }
8035
8036 8
        if ($substrings === []) {
8037
            return false;
8038
        }
8039
8040 8
        foreach ($substrings as &$substring) {
8041 8
            if (self::str_starts_with($str, (string) $substring)) {
8042 2
                return true;
8043
            }
8044
        }
8045
8046 6
        return false;
8047
    }
8048
8049
    /**
8050
     * Gets the substring after the first occurrence of a separator.
8051
     *
8052
     * @param string $str       <p>The input string.</p>
8053
     * @param string $separator <p>The string separator.</p>
8054
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
8055
     *
8056
     * @psalm-pure
8057
     *
8058
     * @return string
8059
     */
8060 1
    public static function str_substr_after_first_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
8061
    {
8062 1
        if ($separator === '' || $str === '') {
8063 1
            return '';
8064
        }
8065
8066 1
        if ($encoding === 'UTF-8') {
8067 1
            $offset = \mb_strpos($str, $separator);
8068 1
            if ($offset === false) {
8069 1
                return '';
8070
            }
8071
8072 1
            return (string) \mb_substr(
8073 1
                $str,
8074 1
                $offset + (int) \mb_strlen($separator)
8075
            );
8076
        }
8077
8078
        $offset = self::strpos($str, $separator, 0, $encoding);
8079
        if ($offset === false) {
8080
            return '';
8081
        }
8082
8083
        return (string) \mb_substr(
8084
            $str,
8085
            $offset + (int) self::strlen($separator, $encoding),
8086
            null,
8087
            $encoding
8088
        );
8089
    }
8090
8091
    /**
8092
     * Gets the substring after the last occurrence of a separator.
8093
     *
8094
     * @param string $str       <p>The input string.</p>
8095
     * @param string $separator <p>The string separator.</p>
8096
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
8097
     *
8098
     * @psalm-pure
8099
     *
8100
     * @return string
8101
     */
8102 1
    public static function str_substr_after_last_separator(
8103
        string $str,
8104
        string $separator,
8105
        string $encoding = 'UTF-8'
8106
    ): string {
8107 1
        if ($separator === '' || $str === '') {
8108 1
            return '';
8109
        }
8110
8111 1
        if ($encoding === 'UTF-8') {
8112 1
            $offset = \mb_strrpos($str, $separator);
8113 1
            if ($offset === false) {
8114 1
                return '';
8115
            }
8116
8117 1
            return (string) \mb_substr(
8118 1
                $str,
8119 1
                $offset + (int) \mb_strlen($separator)
8120
            );
8121
        }
8122
8123
        $offset = self::strrpos($str, $separator, 0, $encoding);
8124
        if ($offset === false) {
8125
            return '';
8126
        }
8127
8128
        return (string) self::substr(
8129
            $str,
8130
            $offset + (int) self::strlen($separator, $encoding),
8131
            null,
8132
            $encoding
8133
        );
8134
    }
8135
8136
    /**
8137
     * Gets the substring before the first occurrence of a separator.
8138
     *
8139
     * @param string $str       <p>The input string.</p>
8140
     * @param string $separator <p>The string separator.</p>
8141
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
8142
     *
8143
     * @psalm-pure
8144
     *
8145
     * @return string
8146
     */
8147 1
    public static function str_substr_before_first_separator(
8148
        string $str,
8149
        string $separator,
8150
        string $encoding = 'UTF-8'
8151
    ): string {
8152 1
        if ($separator === '' || $str === '') {
8153 1
            return '';
8154
        }
8155
8156 1
        if ($encoding === 'UTF-8') {
8157 1
            $offset = \mb_strpos($str, $separator);
8158 1
            if ($offset === false) {
8159 1
                return '';
8160
            }
8161
8162 1
            return (string) \mb_substr(
8163 1
                $str,
8164 1
                0,
8165 1
                $offset
8166
            );
8167
        }
8168
8169
        $offset = self::strpos($str, $separator, 0, $encoding);
8170
        if ($offset === false) {
8171
            return '';
8172
        }
8173
8174
        return (string) self::substr(
8175
            $str,
8176
            0,
8177
            $offset,
8178
            $encoding
8179
        );
8180
    }
8181
8182
    /**
8183
     * Gets the substring before the last occurrence of a separator.
8184
     *
8185
     * @param string $str       <p>The input string.</p>
8186
     * @param string $separator <p>The string separator.</p>
8187
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
8188
     *
8189
     * @psalm-pure
8190
     *
8191
     * @return string
8192
     */
8193 1
    public static function str_substr_before_last_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
8194
    {
8195 1
        if ($separator === '' || $str === '') {
8196 1
            return '';
8197
        }
8198
8199 1
        if ($encoding === 'UTF-8') {
8200 1
            $offset = \mb_strrpos($str, $separator);
8201 1
            if ($offset === false) {
8202 1
                return '';
8203
            }
8204
8205 1
            return (string) \mb_substr(
8206 1
                $str,
8207 1
                0,
8208 1
                $offset
8209
            );
8210
        }
8211
8212
        $offset = self::strrpos($str, $separator, 0, $encoding);
8213
        if ($offset === false) {
8214
            return '';
8215
        }
8216
8217
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
8218
8219
        return (string) self::substr(
8220
            $str,
8221
            0,
8222
            $offset,
8223
            $encoding
8224
        );
8225
    }
8226
8227
    /**
8228
     * Gets the substring after (or before via "$before_needle") the first occurrence of the "$needle".
8229
     *
8230
     * @param string $str           <p>The input string.</p>
8231
     * @param string $needle        <p>The string to look for.</p>
8232
     * @param bool   $before_needle [optional] <p>Default: false</p>
8233
     * @param string $encoding      [optional] <p>Default: 'UTF-8'</p>
8234
     *
8235
     * @psalm-pure
8236
     *
8237
     * @return string
8238
     */
8239 2
    public static function str_substr_first(
8240
        string $str,
8241
        string $needle,
8242
        bool $before_needle = false,
8243
        string $encoding = 'UTF-8'
8244
    ): string {
8245 2
        if ($str === '' || $needle === '') {
8246 2
            return '';
8247
        }
8248
8249 2
        if ($encoding === 'UTF-8') {
8250 2
            if ($before_needle) {
8251 1
                $part = \mb_strstr(
8252 1
                    $str,
8253 1
                    $needle,
8254 1
                    $before_needle
8255
                );
8256
            } else {
8257 2
                $part = \mb_strstr(
8258 1
                    $str,
8259 1
                    $needle
8260
                );
8261
            }
8262
        } else {
8263
            $part = self::strstr(
8264
                $str,
8265
                $needle,
8266
                $before_needle,
8267
                $encoding
8268
            );
8269
        }
8270
8271 2
        return $part === false ? '' : $part;
8272
    }
8273
8274
    /**
8275
     * Gets the substring after (or before via "$before_needle") the last occurrence of the "$needle".
8276
     *
8277
     * @param string $str           <p>The input string.</p>
8278
     * @param string $needle        <p>The string to look for.</p>
8279
     * @param bool   $before_needle [optional] <p>Default: false</p>
8280
     * @param string $encoding      [optional] <p>Default: 'UTF-8'</p>
8281
     *
8282
     * @psalm-pure
8283
     *
8284
     * @return string
8285
     */
8286 2
    public static function str_substr_last(
8287
        string $str,
8288
        string $needle,
8289
        bool $before_needle = false,
8290
        string $encoding = 'UTF-8'
8291
    ): string {
8292 2
        if ($str === '' || $needle === '') {
8293 2
            return '';
8294
        }
8295
8296 2
        if ($encoding === 'UTF-8') {
8297 2
            if ($before_needle) {
8298 1
                $part = \mb_strrchr(
8299 1
                    $str,
8300 1
                    $needle,
8301 1
                    $before_needle
8302
                );
8303
            } else {
8304 2
                $part = \mb_strrchr(
8305 1
                    $str,
8306 1
                    $needle
8307
                );
8308
            }
8309
        } else {
8310
            $part = self::strrchr(
8311
                $str,
8312
                $needle,
8313
                $before_needle,
8314
                $encoding
8315
            );
8316
        }
8317
8318 2
        return $part === false ? '' : $part;
8319
    }
8320
8321
    /**
8322
     * Surrounds $str with the given substring.
8323
     *
8324
     * @param string $str
8325
     * @param string $substring <p>The substring to add to both sides.</p>
8326
     *
8327
     * @psalm-pure
8328
     *
8329
     * @return string
8330
     *                <p>A string with the substring both prepended and appended.</p>
8331
     */
8332 5
    public static function str_surround(string $str, string $substring): string
8333
    {
8334 5
        return $substring . $str . $substring;
8335
    }
8336
8337
    /**
8338
     * Returns a trimmed string with the first letter of each word capitalized.
8339
     * Also accepts an array, $ignore, allowing you to list words not to be
8340
     * capitalized.
8341
     *
8342
     * @param string        $str
8343
     * @param string[]|null $ignore                        [optional] <p>An array of words not to capitalize or
8344
     *                                                     null. Default: null</p>
8345
     * @param string        $encoding                      [optional] <p>Default: 'UTF-8'</p>
8346
     * @param bool          $clean_utf8                    [optional] <p>Remove non UTF-8 chars from the
8347
     *                                                     string.</p>
8348
     * @param string|null   $lang                          [optional] <p>Set the language for special cases: az,
8349
     *                                                     el, lt, tr</p>
8350
     * @param bool          $try_to_keep_the_string_length [optional] <p>true === try to keep the string length:
8351
     *                                                     e.g. ẞ -> ß</p>
8352
     * @param bool          $use_trim_first                [optional] <p>true === trim the input string,
8353
     *                                                     first</p>
8354
     * @param string|null   $word_define_chars             [optional] <p>An string of chars that will be used as
8355
     *                                                     whitespace separator === words.</p>
8356
     *
8357
     * @psalm-pure
8358
     *
8359
     * @return string
8360
     *                <p>The titleized string.</p>
8361
     */
8362 10
    public static function str_titleize(
8363
        string $str,
8364
        array $ignore = null,
8365
        string $encoding = 'UTF-8',
8366
        bool $clean_utf8 = false,
8367
        string $lang = null,
8368
        bool $try_to_keep_the_string_length = false,
8369
        bool $use_trim_first = true,
8370
        string $word_define_chars = null
8371
    ): string {
8372 10
        if ($str === '') {
8373
            return '';
8374
        }
8375
8376 10
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
8377 9
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
8378
        }
8379
8380 10
        if ($use_trim_first) {
8381 10
            $str = \trim($str);
8382
        }
8383
8384 10
        if ($clean_utf8) {
8385
            $str = self::clean($str);
8386
        }
8387
8388 10
        $use_mb_functions = $lang === null && !$try_to_keep_the_string_length;
8389
8390 10
        if ($word_define_chars) {
8391 4
            $word_define_chars = \preg_quote($word_define_chars, '/');
8392
        } else {
8393 6
            $word_define_chars = '';
8394
        }
8395
8396 10
        $str = (string) \preg_replace_callback(
8397 10
            '/([^\\s' . $word_define_chars . ']+)/u',
8398 10
            static function (array $match) use ($try_to_keep_the_string_length, $lang, $ignore, $use_mb_functions, $encoding): string {
8399 10
                if ($ignore !== null && \in_array($match[0], $ignore, true)) {
8400 4
                    return $match[0];
8401
                }
8402
8403 10
                if ($use_mb_functions) {
8404 10
                    if ($encoding === 'UTF-8') {
8405 10
                        return \mb_strtoupper(\mb_substr($match[0], 0, 1))
8406 10
                               . \mb_strtolower(\mb_substr($match[0], 1));
8407
                    }
8408
8409
                    return \mb_strtoupper(\mb_substr($match[0], 0, 1, $encoding), $encoding)
8410
                           . \mb_strtolower(\mb_substr($match[0], 1, null, $encoding), $encoding);
8411
                }
8412
8413
                return self::ucfirst(
8414
                    self::strtolower(
8415
                        $match[0],
8416
                        $encoding,
8417
                        false,
8418
                        $lang,
8419
                        $try_to_keep_the_string_length
8420
                    ),
8421
                    $encoding,
8422
                    false,
8423
                    $lang,
8424
                    $try_to_keep_the_string_length
8425
                );
8426 10
            },
8427 10
            $str
8428
        );
8429
8430 10
        return $str;
8431
    }
8432
8433
    /**
8434
     * Convert a string into a obfuscate string.
8435
     *
8436
     * EXAMPLE: <code>
8437
     *
8438
     * UTF8::str_obfuscate('[email protected]', 0.5, '*', ['@', '.']); // e.g. "l***@m**lleke*.*r*"
8439
     * </code>
8440
     *
8441
     * @param string   $str
8442
     * @param float    $percent
8443
     * @param string   $obfuscateChar
8444
     * @param string[] $keepChars
8445
     *
8446
     * @psalm-pure
8447
     *
8448
     * @return string
8449
     *                <p>The obfuscate string.</p>
8450
     */
8451 1
    public static function str_obfuscate(
8452
        string $str,
8453
        float $percent = 0.5,
8454
        string $obfuscateChar = '*',
8455
        array $keepChars = []
8456
    ): string {
8457 1
        $obfuscateCharHelper = "\u{2603}";
8458 1
        $str = \str_replace($obfuscateChar, $obfuscateCharHelper, $str);
8459
8460 1
        $chars = self::chars($str);
8461 1
        $charsMax = \count($chars);
8462 1
        $charsMaxChange = \round($charsMax * $percent);
8463 1
        $charsCounter = 0;
8464 1
        $charKeyDone = [];
8465
8466 1
        while ($charsCounter < $charsMaxChange) {
8467 1
            foreach ($chars as $charKey => $char) {
8468 1
                if (isset($charKeyDone[$charKey])) {
8469 1
                    continue;
8470
                }
8471
8472 1
                if (\random_int(0, 100) > 50) {
8473 1
                    continue;
8474
                }
8475
8476 1
                if ($char === $obfuscateChar) {
8477
                    continue;
8478
                }
8479
8480 1
                ++$charsCounter;
8481 1
                $charKeyDone[$charKey] = true;
8482
8483 1
                if ($charsCounter > $charsMaxChange) {
8484
                    break;
8485
                }
8486
8487 1
                if (\in_array($char, $keepChars, true)) {
8488 1
                    continue;
8489
                }
8490
8491 1
                $chars[$charKey] = $obfuscateChar;
8492
            }
8493
        }
8494
8495 1
        $str = \implode('', $chars);
8496
8497 1
        return \str_replace($obfuscateCharHelper, $obfuscateChar, $str);
8498
    }
8499
8500
    /**
8501
     * Returns a trimmed string in proper title case.
8502
     *
8503
     * Also accepts an array, $ignore, allowing you to list words not to be
8504
     * capitalized.
8505
     *
8506
     * Adapted from John Gruber's script.
8507
     *
8508
     * @see https://gist.github.com/gruber/9f9e8650d68b13ce4d78
8509
     *
8510
     * @param string   $str
8511
     * @param string[] $ignore   <p>An array of words not to capitalize.</p>
8512
     * @param string   $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
8513
     *
8514
     * @psalm-pure
8515
     *
8516
     * @return string
8517
     *                <p>The titleized string.</p>
8518
     */
8519 35
    public static function str_titleize_for_humans(
8520
        string $str,
8521
        array $ignore = [],
8522
        string $encoding = 'UTF-8'
8523
    ): string {
8524 35
        if ($str === '') {
8525
            return '';
8526
        }
8527
8528 35
        $small_words = [
8529
            '(?<!q&)a',
8530
            'an',
8531
            'and',
8532
            'as',
8533
            'at(?!&t)',
8534
            'but',
8535
            'by',
8536
            'en',
8537
            'for',
8538
            'if',
8539
            'in',
8540
            'of',
8541
            'on',
8542
            'or',
8543
            'the',
8544
            'to',
8545
            'v[.]?',
8546
            'via',
8547
            'vs[.]?',
8548
        ];
8549
8550 35
        if ($ignore !== []) {
8551 1
            $small_words = \array_merge($small_words, $ignore);
8552
        }
8553
8554 35
        $small_words_rx = \implode('|', $small_words);
8555 35
        $apostrophe_rx = '(?x: [\'’] [[:lower:]]* )?';
8556
8557 35
        $str = \trim($str);
8558
8559 35
        if (!self::has_lowercase($str)) {
8560 2
            $str = self::strtolower($str, $encoding);
8561
        }
8562
8563
        // the main substitutions
8564 35
        $str = (string) \preg_replace_callback(
8565
            '~\\b (_*) (?:                                                           # 1. Leading underscore and
8566
                        ( (?<=[ ][/\\\\]) [[:alpha:]]+ [-_[:alpha:]/\\\\]+ |                # 2. file path or 
8567 35
                          [-_[:alpha:]]+ [@.:] [-_[:alpha:]@.:/]+ ' . $apostrophe_rx . ' )  #    URL, domain, or email
8568
                        |
8569 35
                        ( (?i: ' . $small_words_rx . ' ) ' . $apostrophe_rx . ' )           # 3. or small word (case-insensitive)
8570
                        |
8571 35
                        ( [[:alpha:]] [[:lower:]\'’()\[\]{}]* ' . $apostrophe_rx . ' )     # 4. or word w/o internal caps
8572
                        |
8573 35
                        ( [[:alpha:]] [[:alpha:]\'’()\[\]{}]* ' . $apostrophe_rx . ' )     # 5. or some other word
8574
                      ) (_*) \\b                                                          # 6. With trailing underscore
8575
                    ~ux',
8576
            /**
8577
             * @param string[] $matches
8578
             *
8579
             * @psalm-pure
8580
             *
8581
             * @return string
8582
             */
8583 35
            static function (array $matches) use ($encoding): string {
8584
                // preserve leading underscore
8585 35
                $str = $matches[1];
8586 35
                if ($matches[2]) {
8587
                    // preserve URLs, domains, emails and file paths
8588 5
                    $str .= $matches[2];
8589 35
                } elseif ($matches[3]) {
8590
                    // lower-case small words
8591 25
                    $str .= self::strtolower($matches[3], $encoding);
8592 35
                } elseif ($matches[4]) {
8593
                    // capitalize word w/o internal caps
8594 34
                    $str .= static::ucfirst($matches[4], $encoding);
8595
                } else {
8596
                    // preserve other kinds of word (iPhone)
8597 7
                    $str .= $matches[5];
8598
                }
8599
                // preserve trailing underscore
8600 35
                $str .= $matches[6];
8601
8602 35
                return $str;
8603 35
            },
8604 35
            $str
8605
        );
8606
8607
        // Exceptions for small words: capitalize at start of title...
8608 35
        $str = (string) \preg_replace_callback(
8609
            '~(  \\A [[:punct:]]*            # start of title...
8610
                      |  [:.;?!][ ]+                # or of subsentence...
8611
                      |  [ ][\'"“‘(\[][ ]* )        # or of inserted subphrase...
8612 35
                      ( ' . $small_words_rx . ' ) \\b # ...followed by small word
8613
                     ~uxi',
8614
            /**
8615
             * @param string[] $matches
8616
             *
8617
             * @psalm-pure
8618
             *
8619
             * @return string
8620
             */
8621 35
            static function (array $matches) use ($encoding): string {
8622 11
                return $matches[1] . static::ucfirst($matches[2], $encoding);
8623 35
            },
8624 35
            $str
8625
        );
8626
8627
        // ...and end of title
8628 35
        $str = (string) \preg_replace_callback(
8629 35
            '~\\b ( ' . $small_words_rx . ' ) # small word...
8630
                      (?= [[:punct:]]* \Z          # ...at the end of the title...
8631
                      |   [\'"’”)\]] [ ] )         # ...or of an inserted subphrase?
8632
                     ~uxi',
8633
            /**
8634
             * @param string[] $matches
8635
             *
8636
             * @psalm-pure
8637
             *
8638
             * @return string
8639
             */
8640 35
            static function (array $matches) use ($encoding): string {
8641 3
                return static::ucfirst($matches[1], $encoding);
8642 35
            },
8643 35
            $str
8644
        );
8645
8646
        // Exceptions for small words in hyphenated compound words.
8647
        // e.g. "in-flight" -> In-Flight
8648 35
        $str = (string) \preg_replace_callback(
8649
            '~\\b
8650
                        (?<! -)                   # Negative lookbehind for a hyphen; we do not want to match man-in-the-middle but do want (in-flight)
8651 35
                        ( ' . $small_words_rx . ' )
8652
                        (?= -[[:alpha:]]+)        # lookahead for "-someword"
8653
                       ~uxi',
8654
            /**
8655
             * @param string[] $matches
8656
             *
8657
             * @psalm-pure
8658
             *
8659
             * @return string
8660
             */
8661 35
            static function (array $matches) use ($encoding): string {
8662
                return static::ucfirst($matches[1], $encoding);
8663 35
            },
8664 35
            $str
8665
        );
8666
8667
        // e.g. "Stand-in" -> "Stand-In" (Stand is already capped at this point)
8668 35
        $str = (string) \preg_replace_callback(
8669
            '~\\b
8670
                      (?<!…)                    # Negative lookbehind for a hyphen; we do not want to match man-in-the-middle but do want (stand-in)
8671
                      ( [[:alpha:]]+- )         # $1 = first word and hyphen, should already be properly capped
8672 35
                      ( ' . $small_words_rx . ' ) # ...followed by small word
8673
                      (?!	- )                 # Negative lookahead for another -
8674
                     ~uxi',
8675
            /**
8676
             * @param string[] $matches
8677
             *
8678
             * @psalm-pure
8679
             *
8680
             * @return string
8681
             */
8682 35
            static function (array $matches) use ($encoding): string {
8683
                return $matches[1] . static::ucfirst($matches[2], $encoding);
8684 35
            },
8685 35
            $str
8686
        );
8687
8688 35
        return $str;
8689
    }
8690
8691
    /**
8692
     * Get a binary representation of a specific string.
8693
     *
8694
     * EXAPLE: <code>UTF8::str_to_binary('😃'); // '11110000100111111001100010000011'</code>
8695
     *
8696
     * @param string $str <p>The input string.</p>
8697
     *
8698
     * @psalm-pure
8699
     *
8700
     * @return false|string
8701
     *                      <p>false on error</p>
8702
     */
8703 2
    public static function str_to_binary(string $str)
8704
    {
8705
        /** @var array|false $value - needed for PhpStan (stubs error) */
8706 2
        $value = \unpack('H*', $str);
8707 2
        if ($value === false) {
8708
            return false;
8709
        }
8710
8711
        /** @noinspection OffsetOperationsInspection */
8712 2
        return \base_convert($value[1], 16, 2);
8713
    }
8714
8715
    /**
8716
     * @param string   $str
8717
     * @param bool     $remove_empty_values <p>Remove empty values.</p>
8718
     * @param int|null $remove_short_values <p>The min. string length or null to disable</p>
8719
     *
8720
     * @psalm-pure
8721
     *
8722
     * @return string[]
8723
     */
8724 17
    public static function str_to_lines(string $str, bool $remove_empty_values = false, int $remove_short_values = null): array
8725
    {
8726 17
        if ($str === '') {
8727 1
            return $remove_empty_values ? [] : [''];
8728
        }
8729
8730 16
        if (self::$SUPPORT['mbstring'] === true) {
8731 16
            $return = \mb_split("[\r\n]{1,2}", $str);
8732
        } else {
8733
            $return = \preg_split("/[\r\n]{1,2}/u", $str);
8734
        }
8735
8736 16
        if ($return === false) {
8737
            return $remove_empty_values ? [] : [''];
8738
        }
8739
8740
        if (
8741 16
            $remove_short_values === null
8742
            &&
8743 16
            !$remove_empty_values
8744
        ) {
8745 16
            return $return;
8746
        }
8747
8748
        return self::reduce_string_array(
8749
            $return,
8750
            $remove_empty_values,
8751
            $remove_short_values
8752
        );
8753
    }
8754
8755
    /**
8756
     * Convert a string into an array of words.
8757
     *
8758
     * EXAMPLE: <code>UTF8::str_to_words('中文空白 oöäü#s', '#') // array('', '中文空白', ' ', 'oöäü#s', '')</code>
8759
     *
8760
     * @param string   $str
8761
     * @param string   $char_list           <p>Additional chars for the definition of "words".</p>
8762
     * @param bool     $remove_empty_values <p>Remove empty values.</p>
8763
     * @param int|null $remove_short_values <p>The min. string length or null to disable</p>
8764
     *
8765
     * @psalm-pure
8766
     *
8767
     * @return string[]
8768
     */
8769 16
    public static function str_to_words(
8770
        string $str,
8771
        string $char_list = '',
8772
        bool $remove_empty_values = false,
8773
        int $remove_short_values = null
8774
    ): array {
8775 16
        if ($str === '') {
8776 4
            return $remove_empty_values ? [] : [''];
8777
        }
8778
8779 16
        $char_list = self::rxClass($char_list, '\pL');
8780
8781 16
        $return = \preg_split("/({$char_list}+(?:[\p{Pd}’']{$char_list}+)*)/u", $str, -1, \PREG_SPLIT_DELIM_CAPTURE);
8782 16
        if ($return === false) {
8783
            return $remove_empty_values ? [] : [''];
8784
        }
8785
8786
        if (
8787 16
            $remove_short_values === null
8788
            &&
8789 16
            !$remove_empty_values
8790
        ) {
8791 16
            return $return;
0 ignored issues
show
Bug Best Practice introduced by
The expression return $return returns an array which contains values of type array which are incompatible with the documented value type string.
Loading history...
8792
        }
8793
8794 2
        $tmp_return = self::reduce_string_array(
8795 2
            $return,
8796
            $remove_empty_values,
8797
            $remove_short_values
8798
        );
8799
8800 2
        foreach ($tmp_return as &$item) {
8801 2
            $item = (string) $item;
8802
        }
8803
8804 2
        return $tmp_return;
8805
    }
8806
8807
    /**
8808
     * Truncates the string to a given length. If $substring is provided, and
8809
     * truncating occurs, the string is further truncated so that the substring
8810
     * may be appended without exceeding the desired length.
8811
     *
8812
     * @param string $str
8813
     * @param int    $length    <p>Desired length of the truncated string.</p>
8814
     * @param string $substring [optional] <p>The substring to append if it can fit. Default: ''</p>
8815
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
8816
     *
8817
     * @psalm-pure
8818
     *
8819
     * @return string
8820
     *                <p>A string after truncating.</p>
8821
     */
8822 22
    public static function str_truncate(
8823
        string $str,
8824
        int $length,
8825
        string $substring = '',
8826
        string $encoding = 'UTF-8'
8827
    ): string {
8828 22
        if ($str === '') {
8829
            return '';
8830
        }
8831
8832 22
        if ($encoding === 'UTF-8') {
8833 10
            if ($length >= (int) \mb_strlen($str)) {
8834 2
                return $str;
8835
            }
8836
8837 8
            if ($substring !== '') {
8838 4
                $length -= (int) \mb_strlen($substring);
8839
8840
                /** @noinspection UnnecessaryCastingInspection */
8841 4
                return (string) \mb_substr($str, 0, $length) . $substring;
8842
            }
8843
8844 4
            return (string) \mb_substr($str, 0, $length);
8845
        }
8846
8847 12
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
8848
8849 12
        if ($length >= (int) self::strlen($str, $encoding)) {
8850 2
            return $str;
8851
        }
8852
8853 10
        if ($substring !== '') {
8854 6
            $length -= (int) self::strlen($substring, $encoding);
8855
        }
8856
8857
        return (
8858 10
               (string) self::substr(
8859 10
                   $str,
8860 10
                   0,
8861
                   $length,
8862
                   $encoding
8863
               )
8864 10
               ) . $substring;
8865
    }
8866
8867
    /**
8868
     * Truncates the string to a given length, while ensuring that it does not
8869
     * split words. If $substring is provided, and truncating occurs, the
8870
     * string is further truncated so that the substring may be appended without
8871
     * exceeding the desired length.
8872
     *
8873
     * @param string $str
8874
     * @param int    $length                                 <p>Desired length of the truncated string.</p>
8875
     * @param string $substring                              [optional] <p>The substring to append if it can fit.
8876
     *                                                       Default:
8877
     *                                                       ''</p>
8878
     * @param string $encoding                               [optional] <p>Default: 'UTF-8'</p>
8879
     * @param bool   $ignore_do_not_split_words_for_one_word [optional] <p>Default: false</p>
8880
     *
8881
     * @psalm-pure
8882
     *
8883
     * @return string
8884
     *                <p>A string after truncating.</p>
8885
     */
8886 47
    public static function str_truncate_safe(
8887
        string $str,
8888
        int $length,
8889
        string $substring = '',
8890
        string $encoding = 'UTF-8',
8891
        bool $ignore_do_not_split_words_for_one_word = false
8892
    ): string {
8893 47
        if ($str === '' || $length <= 0) {
8894 1
            return $substring;
8895
        }
8896
8897 47
        if ($encoding === 'UTF-8') {
8898 21
            if ($length >= (int) \mb_strlen($str)) {
8899 5
                return $str;
8900
            }
8901
8902
            // need to further trim the string so we can append the substring
8903 17
            $length -= (int) \mb_strlen($substring);
8904 17
            if ($length <= 0) {
8905 1
                return $substring;
8906
            }
8907
8908
            /** @var false|string $truncated - needed for PhpStan (stubs error) */
8909 17
            $truncated = \mb_substr($str, 0, $length);
8910 17
            if ($truncated === false) {
8911
                return '';
8912
            }
8913
8914
            // if the last word was truncated
8915 17
            $space_position = \mb_strpos($str, ' ', $length - 1);
8916 17
            if ($space_position !== $length) {
8917
                // find pos of the last occurrence of a space, get up to that
8918 13
                $last_position = \mb_strrpos($truncated, ' ', 0);
8919
8920
                if (
8921 13
                    $last_position !== false
8922
                    ||
8923
                    (
8924 3
                        $space_position !== false
8925
                        &&
8926 13
                        !$ignore_do_not_split_words_for_one_word
8927
                    )
8928
                ) {
8929 17
                    $truncated = (string) \mb_substr($truncated, 0, (int) $last_position);
8930
                }
8931
            }
8932
        } else {
8933 26
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
8934
8935 26
            if ($length >= (int) self::strlen($str, $encoding)) {
8936 4
                return $str;
8937
            }
8938
8939
            // need to further trim the string so we can append the substring
8940 22
            $length -= (int) self::strlen($substring, $encoding);
8941 22
            if ($length <= 0) {
8942
                return $substring;
8943
            }
8944
8945 22
            $truncated = self::substr($str, 0, $length, $encoding);
8946
8947 22
            if ($truncated === false) {
8948
                return '';
8949
            }
8950
8951
            // if the last word was truncated
8952 22
            $space_position = self::strpos($str, ' ', $length - 1, $encoding);
8953 22
            if ($space_position !== $length) {
8954
                // find pos of the last occurrence of a space, get up to that
8955 12
                $last_position = self::strrpos($truncated, ' ', 0, $encoding);
8956
8957
                if (
8958 12
                    $last_position !== false
8959
                    ||
8960
                    (
8961 4
                        $space_position !== false
8962
                        &&
8963 12
                        !$ignore_do_not_split_words_for_one_word
8964
                    )
8965
                ) {
8966 9
                    $truncated = (string) self::substr($truncated, 0, (int) $last_position, $encoding);
8967
                }
8968
            }
8969
        }
8970
8971 39
        return $truncated . $substring;
8972
    }
8973
8974
    /**
8975
     * Returns a lowercase and trimmed string separated by underscores.
8976
     * Underscores are inserted before uppercase characters (with the exception
8977
     * of the first character of the string), and in place of spaces as well as
8978
     * dashes.
8979
     *
8980
     * @param string $str
8981
     *
8982
     * @psalm-pure
8983
     *
8984
     * @return string
8985
     *                <p>The underscored string.</p>
8986
     */
8987 16
    public static function str_underscored(string $str): string
8988
    {
8989 16
        return self::str_delimit($str, '_');
8990
    }
8991
8992
    /**
8993
     * Returns an UpperCamelCase version of the supplied string. It trims
8994
     * surrounding spaces, capitalizes letters following digits, spaces, dashes
8995
     * and underscores, and removes spaces, dashes, underscores.
8996
     *
8997
     * @param string      $str                           <p>The input string.</p>
8998
     * @param string      $encoding                      [optional] <p>Default: 'UTF-8'</p>
8999
     * @param bool        $clean_utf8                    [optional] <p>Remove non UTF-8 chars from the string.</p>
9000
     * @param string|null $lang                          [optional] <p>Set the language for special cases: az, el, lt,
9001
     *                                                   tr</p>
9002
     * @param bool        $try_to_keep_the_string_length [optional] <p>true === try to keep the string length: e.g. ẞ
9003
     *                                                   -> ß</p>
9004
     *
9005
     * @psalm-pure
9006
     *
9007
     * @return string
9008
     *                <p>A string in UpperCamelCase.</p>
9009
     */
9010 13
    public static function str_upper_camelize(
9011
        string $str,
9012
        string $encoding = 'UTF-8',
9013
        bool $clean_utf8 = false,
9014
        string $lang = null,
9015
        bool $try_to_keep_the_string_length = false
9016
    ): string {
9017 13
        return self::ucfirst(self::str_camelize($str, $encoding), $encoding, $clean_utf8, $lang, $try_to_keep_the_string_length);
9018
    }
9019
9020
    /**
9021
     * Get the number of words in a specific string.
9022
     *
9023
     * EXAMPLES: <code>
9024
     * // format: 0 -> return only word count (int)
9025
     * //
9026
     * UTF8::str_word_count('中文空白 öäü abc#c'); // 4
9027
     * UTF8::str_word_count('中文空白 öäü abc#c', 0, '#'); // 3
9028
     *
9029
     * // format: 1 -> return words (array)
9030
     * //
9031
     * UTF8::str_word_count('中文空白 öäü abc#c', 1); // array('中文空白', 'öäü', 'abc', 'c')
9032
     * UTF8::str_word_count('中文空白 öäü abc#c', 1, '#'); // array('中文空白', 'öäü', 'abc#c')
9033
     *
9034
     * // format: 2 -> return words with offset (array)
9035
     * //
9036
     * UTF8::str_word_count('中文空白 öäü ab#c', 2); // array(0 => '中文空白', 5 => 'öäü', 9 => 'abc', 13 => 'c')
9037
     * UTF8::str_word_count('中文空白 öäü ab#c', 2, '#'); // array(0 => '中文空白', 5 => 'öäü', 9 => 'abc#c')
9038
     * </code>
9039
     *
9040
     * @param string $str       <p>The input string.</p>
9041
     * @param int    $format    [optional] <p>
9042
     *                          <strong>0</strong> => return a number of words (default)<br>
9043
     *                          <strong>1</strong> => return an array of words<br>
9044
     *                          <strong>2</strong> => return an array of words with word-offset as key
9045
     *                          </p>
9046
     * @param string $char_list [optional] <p>Additional chars that contains to words and do not start a new word.</p>
9047
     *
9048
     * @psalm-pure
9049
     *
9050
     * @return int|string[]
9051
     *                      <p>The number of words in the string.</p>
9052
     */
9053 2
    public static function str_word_count(string $str, int $format = 0, string $char_list = '')
9054
    {
9055 2
        $str_parts = self::str_to_words($str, $char_list);
9056
9057 2
        $len = \count($str_parts);
9058
9059 2
        if ($format === 1) {
9060 2
            $number_of_words = [];
9061 2
            for ($i = 1; $i < $len; $i += 2) {
9062 2
                $number_of_words[] = $str_parts[$i];
9063
            }
9064 2
        } elseif ($format === 2) {
9065 2
            $number_of_words = [];
9066 2
            $offset = (int) self::strlen($str_parts[0]);
9067 2
            for ($i = 1; $i < $len; $i += 2) {
9068 2
                $number_of_words[$offset] = $str_parts[$i];
9069 2
                $offset += (int) self::strlen($str_parts[$i]) + (int) self::strlen($str_parts[$i + 1]);
9070
            }
9071
        } else {
9072 2
            $number_of_words = (int) (($len - 1) / 2);
9073
        }
9074
9075 2
        return $number_of_words;
9076
    }
9077
9078
    /**
9079
     * Case-insensitive string comparison.
9080
     *
9081
     * INFO: Case-insensitive version of UTF8::strcmp()
9082
     *
9083
     * EXAMPLE: <code>UTF8::strcasecmp("iñtërnâtiôn\nàlizætiøn", "Iñtërnâtiôn\nàlizætiøn"); // 0</code>
9084
     *
9085
     * @param string $str1     <p>The first string.</p>
9086
     * @param string $str2     <p>The second string.</p>
9087
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
9088
     *
9089
     * @psalm-pure
9090
     *
9091
     * @return int
9092
     *             <strong>&lt; 0</strong> if str1 is less than str2;<br>
9093
     *             <strong>&gt; 0</strong> if str1 is greater than str2,<br>
9094
     *             <strong>0</strong> if they are equal
9095
     */
9096 23
    public static function strcasecmp(
9097
        string $str1,
9098
        string $str2,
9099
        string $encoding = 'UTF-8'
9100
    ): int {
9101 23
        return self::strcmp(
9102 23
            self::strtocasefold(
9103 23
                $str1,
9104 23
                true,
9105 23
                false,
9106
                $encoding,
9107 23
                null,
9108 23
                false
9109
            ),
9110 23
            self::strtocasefold(
9111 23
                $str2,
9112 23
                true,
9113 23
                false,
9114
                $encoding,
9115 23
                null,
9116 23
                false
9117
            )
9118
        );
9119
    }
9120
9121
    /**
9122
     * Case-sensitive string comparison.
9123
     *
9124
     * EXAMPLE: <code>UTF8::strcmp("iñtërnâtiôn\nàlizætiøn", "iñtërnâtiôn\nàlizætiøn"); // 0</code>
9125
     *
9126
     * @param string $str1 <p>The first string.</p>
9127
     * @param string $str2 <p>The second string.</p>
9128
     *
9129
     * @psalm-pure
9130
     *
9131
     * @return int
9132
     *             <strong>&lt; 0</strong> if str1 is less than str2<br>
9133
     *             <strong>&gt; 0</strong> if str1 is greater than str2<br>
9134
     *             <strong>0</strong> if they are equal
9135
     */
9136 29
    public static function strcmp(string $str1, string $str2): int
9137
    {
9138 29
        if ($str1 === $str2) {
9139 21
            return 0;
9140
        }
9141
9142 24
        return \strcmp(
9143
            /** @phpstan-ignore-next-line - we use only NFD */
9144 24
            \Normalizer::normalize($str1, \Normalizer::NFD),
9145
            /** @phpstan-ignore-next-line - we use only NFD */
9146 24
            \Normalizer::normalize($str2, \Normalizer::NFD)
9147
        );
9148
    }
9149
9150
    /**
9151
     * Find length of initial segment not matching mask.
9152
     *
9153
     * @param string   $str
9154
     * @param string   $char_list
9155
     * @param int      $offset
9156
     * @param int|null $length
9157
     * @param string   $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
9158
     *
9159
     * @psalm-pure
9160
     *
9161
     * @return int
9162
     */
9163 12
    public static function strcspn(
9164
        string $str,
9165
        string $char_list,
9166
        int $offset = 0,
9167
        int $length = null,
9168
        string $encoding = 'UTF-8'
9169
    ): int {
9170 12
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
9171
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
9172
        }
9173
9174 12
        if ($char_list === '') {
9175 2
            return (int) self::strlen($str, $encoding);
9176
        }
9177
9178 11
        if ($offset || $length !== null) {
9179 3
            if ($encoding === 'UTF-8') {
9180 3
                if ($length === null) {
9181 2
                    $str_tmp = \mb_substr($str, $offset);
9182
                } else {
9183 3
                    $str_tmp = \mb_substr($str, $offset, $length);
9184
                }
9185
            } else {
9186
                $str_tmp = self::substr($str, $offset, $length, $encoding);
9187
            }
9188
9189 3
            if ($str_tmp === false) {
9190
                return 0;
9191
            }
9192
9193 3
            $str = $str_tmp;
9194
        }
9195
9196 11
        if ($str === '') {
9197 2
            return 0;
9198
        }
9199
9200 10
        $matches = [];
9201 10
        if (\preg_match('/^(.*?)' . self::rxClass($char_list) . '/us', $str, $matches)) {
9202 9
            $return = self::strlen($matches[1], $encoding);
9203 9
            if ($return === false) {
9204
                return 0;
9205
            }
9206
9207 9
            return $return;
9208
        }
9209
9210 2
        return (int) self::strlen($str, $encoding);
9211
    }
9212
9213
    /**
9214
     * Create a UTF-8 string from code points.
9215
     *
9216
     * INFO: opposite to UTF8::codepoints()
9217
     *
9218
     * EXAMPLE: <code>UTF8::string(array(246, 228, 252)); // 'öäü'</code>
9219
     *
9220
     * @param int|int[]|string|string[] $intOrHex <p>Integer or Hexadecimal codepoints.</p>
9221
     *
9222
     * @phpstan-param int[]|numeric-string[]|int|numeric-string $intOrHex
9223
     *
9224
     * @psalm-pure
9225
     *
9226
     * @return string
9227
     *                <p>A UTF-8 encoded string.</p>
9228
     */
9229 4
    public static function string($intOrHex): string
9230
    {
9231 4
        if ($intOrHex === []) {
9232 4
            return '';
9233
        }
9234
9235 4
        if (!\is_array($intOrHex)) {
9236 1
            $intOrHex = [$intOrHex];
9237
        }
9238
9239 4
        $str = '';
9240 4
        foreach ($intOrHex as $strPart) {
9241 4
            $str .= '&#' . (int) $strPart . ';';
9242
        }
9243
9244
        // We cannot use html_entity_decode() here, as it will not return
9245
        // characters for many values < 160.
9246 4
        return mb_convert_encoding($str, 'UTF-8', 'HTML-ENTITIES');
0 ignored issues
show
Bug Best Practice introduced by
The expression return mb_convert_encodi...TF-8', 'HTML-ENTITIES') could return the type array which is incompatible with the type-hinted return string. Consider adding an additional type-check to rule them out.
Loading history...
9247
    }
9248
9249
    /**
9250
     * Checks if string starts with "BOM" (Byte Order Mark Character) character.
9251
     *
9252
     * EXAMPLE: <code>UTF8::string_has_bom("\xef\xbb\xbf foobar"); // true</code>
9253
     *
9254
     * @param string $str <p>The input string.</p>
9255
     *
9256
     * @psalm-pure
9257
     *
9258
     * @return bool
9259
     *              <p>
9260
     *              <strong>true</strong> if the string has BOM at the start,<br>
9261
     *              <strong>false</strong> otherwise
9262
     *              </p>
9263
     */
9264 40
    public static function string_has_bom(string $str): bool
9265
    {
9266 40
        foreach (self::$BOM as $bom_string => &$bom_byte_length) {
9267 40
            if (\strncmp($str, $bom_string, $bom_byte_length) === 0) {
9268 13
                return true;
9269
            }
9270
        }
9271
9272 40
        return false;
9273
    }
9274
9275
    /**
9276
     * Strip HTML and PHP tags from a string + clean invalid UTF-8.
9277
     *
9278
     * EXAMPLE: <code>UTF8::strip_tags("<span>κόσμε\xa0\xa1</span>"); // 'κόσμε'</code>
9279
     *
9280
     * @see http://php.net/manual/en/function.strip-tags.php
9281
     *
9282
     * @param string      $str            <p>
9283
     *                                    The input string.
9284
     *                                    </p>
9285
     * @param string|null $allowable_tags [optional] <p>
9286
     *                                    You can use the optional second parameter to specify tags which should
9287
     *                                    not be stripped.
9288
     *                                    </p>
9289
     *                                    <p>
9290
     *                                    HTML comments and PHP tags are also stripped. This is hardcoded and
9291
     *                                    can not be changed with allowable_tags.
9292
     *                                    </p>
9293
     * @param bool        $clean_utf8     [optional] <p>Remove non UTF-8 chars from the string.</p>
9294
     *
9295
     * @psalm-pure
9296
     *
9297
     * @return string
9298
     *                <p>The stripped string.</p>
9299
     */
9300 4
    public static function strip_tags(
9301
        string $str,
9302
        string $allowable_tags = null,
9303
        bool $clean_utf8 = false
9304
    ): string {
9305 4
        if ($str === '') {
9306 1
            return '';
9307
        }
9308
9309 4
        if ($clean_utf8) {
9310 2
            $str = self::clean($str);
9311
        }
9312
9313 4
        if ($allowable_tags === null) {
9314 4
            return \strip_tags($str);
9315
        }
9316
9317 2
        return \strip_tags($str, $allowable_tags);
9318
    }
9319
9320
    /**
9321
     * Strip all whitespace characters. This includes tabs and newline
9322
     * characters, as well as multibyte whitespace such as the thin space
9323
     * and ideographic space.
9324
     *
9325
     * EXAMPLE: <code>UTF8::strip_whitespace('   Ο     συγγραφέας  '); // 'Οσυγγραφέας'</code>
9326
     *
9327
     * @param string $str
9328
     *
9329
     * @psalm-pure
9330
     *
9331
     * @return string
9332
     */
9333 36
    public static function strip_whitespace(string $str): string
9334
    {
9335 36
        if ($str === '') {
9336 3
            return '';
9337
        }
9338
9339 33
        return (string) \preg_replace('/[[:space:]]+/u', '', $str);
9340
    }
9341
9342
    /**
9343
     * Find the position of the first occurrence of a substring in a string, case-insensitive.
9344
     *
9345
     * INFO: use UTF8::stripos_in_byte() for the byte-length
9346
     *
9347
     * EXAMPLE: <code>UTF8::stripos('aσσb', 'ΣΣ'); // 1</code> (σσ == ΣΣ)
9348
     *
9349
     * @see http://php.net/manual/en/function.mb-stripos.php
9350
     *
9351
     * @param string $haystack   <p>The string from which to get the position of the first occurrence of needle.</p>
9352
     * @param string $needle     <p>The string to find in haystack.</p>
9353
     * @param int    $offset     [optional] <p>The position in haystack to start searching.</p>
9354
     * @param string $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
9355
     * @param bool   $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
9356
     *
9357
     * @psalm-pure
9358
     *
9359
     * @return false|int
9360
     *                   Return the <strong>(int)</strong> numeric position of the first occurrence of needle in the
9361
     *                   haystack string,<br> or <strong>false</strong> if needle is not found
9362
     */
9363 25
    public static function stripos(
9364
        string $haystack,
9365
        string $needle,
9366
        int $offset = 0,
9367
        string $encoding = 'UTF-8',
9368
        bool $clean_utf8 = false
9369
    ) {
9370 25
        if ($haystack === '') {
9371 5
            if (\PHP_VERSION_ID >= 80000 && $needle === '') {
9372 2
                return 0;
9373
            }
9374
9375 5
            return false;
9376
        }
9377
9378 24
        if ($needle === '' && \PHP_VERSION_ID < 80000) {
9379
            return false;
9380
        }
9381
9382 24
        if ($clean_utf8) {
9383
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
9384
            // if invalid characters are found in $haystack before $needle
9385 1
            $haystack = self::clean($haystack);
9386 1
            $needle = self::clean($needle);
9387
        }
9388
9389 24
        if (self::$SUPPORT['mbstring'] === true) {
9390 24
            if ($encoding === 'UTF-8') {
9391 24
                return \mb_stripos($haystack, $needle, $offset);
9392
            }
9393
9394 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
9395
9396 2
            return \mb_stripos($haystack, $needle, $offset, $encoding);
9397
        }
9398
9399 2
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
9400
9401
        if (
9402 2
            $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
9403
            &&
9404 2
            $offset >= 0 // grapheme_stripos() can't handle negative offset
9405
            &&
9406 2
            self::$SUPPORT['intl'] === true
9407
        ) {
9408
            $return_tmp = \grapheme_stripos($haystack, $needle, $offset);
9409
            if ($return_tmp !== false) {
9410
                return $return_tmp;
9411
            }
9412
        }
9413
9414
        //
9415
        // fallback for ascii only
9416
        //
9417
9418 2
        if (ASCII::is_ascii($haystack . $needle)) {
9419 2
            return \stripos($haystack, $needle, $offset);
9420
        }
9421
9422
        //
9423
        // fallback via vanilla php
9424
        //
9425
9426 2
        $haystack = self::strtocasefold($haystack, true, false, $encoding, null, false);
9427 2
        $needle = self::strtocasefold($needle, true, false, $encoding, null, false);
9428
9429 2
        return self::strpos($haystack, $needle, $offset, $encoding);
9430
    }
9431
9432
    /**
9433
     * Returns all of haystack starting from and including the first occurrence of needle to the end.
9434
     *
9435
     * EXAMPLE: <code>
9436
     * $str = 'iñtërnâtiônàlizætiøn';
9437
     * $search = 'NÂT';
9438
     *
9439
     * UTF8::stristr($str, $search)); // 'nâtiônàlizætiøn'
9440
     * UTF8::stristr($str, $search, true)); // 'iñtër'
9441
     * </code>
9442
     *
9443
     * @param string $haystack      <p>The input string. Must be valid UTF-8.</p>
9444
     * @param string $needle        <p>The string to look for. Must be valid UTF-8.</p>
9445
     * @param bool   $before_needle [optional] <p>
9446
     *                              If <b>TRUE</b>, it returns the part of the
9447
     *                              haystack before the first occurrence of the needle (excluding the needle).
9448
     *                              </p>
9449
     * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
9450
     * @param bool   $clean_utf8    [optional] <p>Remove non UTF-8 chars from the string.</p>
9451
     *
9452
     * @psalm-pure
9453
     *
9454
     * @return false|string
9455
     *                      <p>A sub-string,<br>or <strong>false</strong> if needle is not found.</p>
9456
     */
9457 13
    public static function stristr(
9458
        string $haystack,
9459
        string $needle,
9460
        bool $before_needle = false,
9461
        string $encoding = 'UTF-8',
9462
        bool $clean_utf8 = false
9463
    ) {
9464 13
        if ($haystack === '') {
9465 3
            if (\PHP_VERSION_ID >= 80000 && $needle === '') {
9466 2
                return '';
9467
            }
9468
9469 2
            return false;
9470
        }
9471
9472 11
        if ($clean_utf8) {
9473
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
9474
            // if invalid characters are found in $haystack before $needle
9475 1
            $needle = self::clean($needle);
9476 1
            $haystack = self::clean($haystack);
9477
        }
9478
9479 11
        if ($needle === '') {
9480 2
            if (\PHP_VERSION_ID >= 80000) {
9481 2
                return $haystack;
9482
            }
9483
9484
            return false;
9485
        }
9486
9487 10
        if (self::$SUPPORT['mbstring'] === true) {
9488 10
            if ($encoding === 'UTF-8') {
9489 10
                return \mb_stristr($haystack, $needle, $before_needle);
9490
            }
9491
9492 1
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
9493
9494 1
            return \mb_stristr($haystack, $needle, $before_needle, $encoding);
9495
        }
9496
9497
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
9498
9499
        if (
9500
            $encoding !== 'UTF-8'
9501
            &&
9502
            self::$SUPPORT['mbstring'] === false
9503
        ) {
9504
            /**
9505
             * @psalm-suppress ImpureFunctionCall - is is only a warning
9506
             */
9507
            \trigger_error('UTF8::stristr() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
9508
        }
9509
9510
        if (
9511
            $encoding === 'UTF-8' // INFO: "grapheme_stristr()" can't handle other encodings
9512
            &&
9513
            self::$SUPPORT['intl'] === true
9514
        ) {
9515
            $return_tmp = \grapheme_stristr($haystack, $needle, $before_needle);
9516
            if ($return_tmp !== false) {
9517
                return $return_tmp;
9518
            }
9519
        }
9520
9521
        if (ASCII::is_ascii($needle . $haystack)) {
9522
            return \stristr($haystack, $needle, $before_needle);
9523
        }
9524
9525
        \preg_match('/^(.*?)' . \preg_quote($needle, '/') . '/usi', $haystack, $match);
9526
9527
        if (!isset($match[1])) {
9528
            return false;
9529
        }
9530
9531
        if ($before_needle) {
9532
            return $match[1];
9533
        }
9534
9535
        return self::substr($haystack, (int) self::strlen($match[1], $encoding), null, $encoding);
9536
    }
9537
9538
    /**
9539
     * Get the string length, not the byte-length!
9540
     *
9541
     * INFO: use UTF8::strwidth() for the char-length
9542
     *
9543
     * EXAMPLE: <code>UTF8::strlen("Iñtërnâtiôn\xE9àlizætiøn")); // 20</code>
9544
     *
9545
     * @see http://php.net/manual/en/function.mb-strlen.php
9546
     *
9547
     * @param string $str        <p>The string being checked for length.</p>
9548
     * @param string $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
9549
     * @param bool   $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
9550
     *
9551
     * @psalm-pure
9552
     *
9553
     * @return false|int
9554
     *                   <p>
9555
     *                   The number <strong>(int)</strong> of characters in the string $str having character encoding
9556
     *                   $encoding.
9557
     *                   (One multi-byte character counted as +1).
9558
     *                   <br>
9559
     *                   Can return <strong>false</strong>, if e.g. mbstring is not installed and we process invalid
9560
     *                   chars.
9561
     *                   </p>
9562
     */
9563 174
    public static function strlen(
9564
        string $str,
9565
        string $encoding = 'UTF-8',
9566
        bool $clean_utf8 = false
9567
    ) {
9568 174
        if ($str === '') {
9569 25
            return 0;
9570
        }
9571
9572 172
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
9573 12
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
9574
        }
9575
9576 172
        if ($clean_utf8) {
9577
            // "mb_strlen" and "\iconv_strlen" returns wrong length,
9578
            // if invalid characters are found in $str
9579 5
            $str = self::clean($str);
9580
        }
9581
9582
        //
9583
        // fallback via mbstring
9584
        //
9585
9586 172
        if (self::$SUPPORT['mbstring'] === true) {
9587 166
            if ($encoding === 'UTF-8') {
9588
                /** @noinspection PhpUsageOfSilenceOperatorInspection - ignore warnings, it's working anyway */
9589 166
                return @\mb_strlen($str);
9590
            }
9591
9592
            /** @noinspection PhpUsageOfSilenceOperatorInspection - ignore warnings, it's working anyway */
9593 4
            return @\mb_strlen($str, $encoding);
9594
        }
9595
9596
        //
9597
        // fallback for binary || ascii only
9598
        //
9599
9600
        if (
9601 8
            $encoding === 'CP850'
9602
            ||
9603 8
            $encoding === 'ASCII'
9604
        ) {
9605
            return \strlen($str);
9606
        }
9607
9608
        if (
9609 8
            $encoding !== 'UTF-8'
9610
            &&
9611 8
            self::$SUPPORT['mbstring'] === false
9612
            &&
9613 8
            self::$SUPPORT['iconv'] === false
9614
        ) {
9615
            /**
9616
             * @psalm-suppress ImpureFunctionCall - is is only a warning
9617
             */
9618 2
            \trigger_error('UTF8::strlen() without mbstring / iconv cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
9619
        }
9620
9621
        //
9622
        // fallback via iconv
9623
        //
9624
9625 8
        if (self::$SUPPORT['iconv'] === true) {
9626
            $return_tmp = \iconv_strlen($str, $encoding);
9627
            if ($return_tmp !== false) {
9628
                return $return_tmp;
9629
            }
9630
        }
9631
9632
        //
9633
        // fallback via intl
9634
        //
9635
9636
        if (
9637 8
            $encoding === 'UTF-8' // INFO: "grapheme_strlen()" can't handle other encodings
9638
            &&
9639 8
            self::$SUPPORT['intl'] === true
9640
        ) {
9641
            $return_tmp = \grapheme_strlen($str);
9642
            if ($return_tmp !== null) {
9643
                return $return_tmp;
9644
            }
9645
        }
9646
9647
        //
9648
        // fallback for ascii only
9649
        //
9650
9651 8
        if (ASCII::is_ascii($str)) {
9652 4
            return \strlen($str);
9653
        }
9654
9655
        //
9656
        // fallback via vanilla php
9657
        //
9658
9659 8
        \preg_match_all('/./us', $str, $parts);
9660
9661 8
        $return_tmp = \count($parts[0]);
9662 8
        if ($return_tmp === 0) {
9663
            return false;
9664
        }
9665
9666 8
        return $return_tmp;
9667
    }
9668
9669
    /**
9670
     * Get string length in byte.
9671
     *
9672
     * @param string $str
9673
     *
9674
     * @psalm-pure
9675
     *
9676
     * @return int
9677
     */
9678 1
    public static function strlen_in_byte(string $str): int
9679
    {
9680 1
        if ($str === '') {
9681
            return 0;
9682
        }
9683
9684 1
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
9685
            // "mb_" is available if overload is used, so use it ...
9686
            return \mb_strlen($str, 'CP850'); // 8-BIT
9687
        }
9688
9689 1
        return \strlen($str);
9690
    }
9691
9692
    /**
9693
     * Case-insensitive string comparisons using a "natural order" algorithm.
9694
     *
9695
     * INFO: natural order version of UTF8::strcasecmp()
9696
     *
9697
     * EXAMPLES: <code>
9698
     * UTF8::strnatcasecmp('2', '10Hello WORLD 中文空白!'); // -1
9699
     * UTF8::strcasecmp('2Hello world 中文空白!', '10Hello WORLD 中文空白!'); // 1
9700
     *
9701
     * UTF8::strnatcasecmp('10Hello world 中文空白!', '2Hello WORLD 中文空白!'); // 1
9702
     * UTF8::strcasecmp('10Hello world 中文空白!', '2Hello WORLD 中文空白!'); // -1
9703
     * </code>
9704
     *
9705
     * @param string $str1     <p>The first string.</p>
9706
     * @param string $str2     <p>The second string.</p>
9707
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
9708
     *
9709
     * @psalm-pure
9710
     *
9711
     * @return int
9712
     *             <strong>&lt; 0</strong> if str1 is less than str2<br>
9713
     *             <strong>&gt; 0</strong> if str1 is greater than str2<br>
9714
     *             <strong>0</strong> if they are equal
9715
     */
9716 2
    public static function strnatcasecmp(string $str1, string $str2, string $encoding = 'UTF-8'): int
9717
    {
9718 2
        return self::strnatcmp(
9719 2
            self::strtocasefold($str1, true, false, $encoding, null, false),
9720 2
            self::strtocasefold($str2, true, false, $encoding, null, false)
9721
        );
9722
    }
9723
9724
    /**
9725
     * String comparisons using a "natural order" algorithm
9726
     *
9727
     * INFO: natural order version of UTF8::strcmp()
9728
     *
9729
     * EXAMPLES: <code>
9730
     * UTF8::strnatcmp('2Hello world 中文空白!', '10Hello WORLD 中文空白!'); // -1
9731
     * UTF8::strcmp('2Hello world 中文空白!', '10Hello WORLD 中文空白!'); // 1
9732
     *
9733
     * UTF8::strnatcmp('10Hello world 中文空白!', '2Hello WORLD 中文空白!'); // 1
9734
     * UTF8::strcmp('10Hello world 中文空白!', '2Hello WORLD 中文空白!'); // -1
9735
     * </code>
9736
     *
9737
     * @see http://php.net/manual/en/function.strnatcmp.php
9738
     *
9739
     * @param string $str1 <p>The first string.</p>
9740
     * @param string $str2 <p>The second string.</p>
9741
     *
9742
     * @psalm-pure
9743
     *
9744
     * @return int
9745
     *             <strong>&lt; 0</strong> if str1 is less than str2;<br>
9746
     *             <strong>&gt; 0</strong> if str1 is greater than str2;<br>
9747
     *             <strong>0</strong> if they are equal
9748
     */
9749 4
    public static function strnatcmp(string $str1, string $str2): int
9750
    {
9751 4
        if ($str1 === $str2) {
9752 4
            return 0;
9753
        }
9754
9755 4
        return \strnatcmp(
9756 4
            (string) self::strtonatfold($str1),
9757 4
            (string) self::strtonatfold($str2)
9758
        );
9759
    }
9760
9761
    /**
9762
     * Case-insensitive string comparison of the first n characters.
9763
     *
9764
     * EXAMPLE: <code>
9765
     * UTF8::strcasecmp("iñtërnâtiôn\nàlizætiøn321", "iñtërnâtiôn\nàlizætiøn123", 5); // 0
9766
     * </code>
9767
     *
9768
     * @see http://php.net/manual/en/function.strncasecmp.php
9769
     *
9770
     * @param string $str1     <p>The first string.</p>
9771
     * @param string $str2     <p>The second string.</p>
9772
     * @param int    $len      <p>The length of strings to be used in the comparison.</p>
9773
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
9774
     *
9775
     * @psalm-pure
9776
     *
9777
     * @return int
9778
     *             <strong>&lt; 0</strong> if <i>str1</i> is less than <i>str2</i>;<br>
9779
     *             <strong>&gt; 0</strong> if <i>str1</i> is greater than <i>str2</i>;<br>
9780
     *             <strong>0</strong> if they are equal
9781
     */
9782 2
    public static function strncasecmp(
9783
        string $str1,
9784
        string $str2,
9785
        int $len,
9786
        string $encoding = 'UTF-8'
9787
    ): int {
9788 2
        return self::strncmp(
9789 2
            self::strtocasefold($str1, true, false, $encoding, null, false),
9790 2
            self::strtocasefold($str2, true, false, $encoding, null, false),
9791
            $len
9792
        );
9793
    }
9794
9795
    /**
9796
     * String comparison of the first n characters.
9797
     *
9798
     * EXAMPLE: <code>
9799
     * UTF8::strncmp("Iñtërnâtiôn\nàlizætiøn321", "Iñtërnâtiôn\nàlizætiøn123", 5); // 0
9800
     * </code>
9801
     *
9802
     * @see http://php.net/manual/en/function.strncmp.php
9803
     *
9804
     * @param string $str1     <p>The first string.</p>
9805
     * @param string $str2     <p>The second string.</p>
9806
     * @param int    $len      <p>Number of characters to use in the comparison.</p>
9807
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
9808
     *
9809
     * @psalm-pure
9810
     *
9811
     * @return int
9812
     *             <strong>&lt; 0</strong> if <i>str1</i> is less than <i>str2</i>;<br>
9813
     *             <strong>&gt; 0</strong> if <i>str1</i> is greater than <i>str2</i>;<br>
9814
     *             <strong>0</strong> if they are equal
9815
     */
9816 4
    public static function strncmp(
9817
        string $str1,
9818
        string $str2,
9819
        int $len,
9820
        string $encoding = 'UTF-8'
9821
    ): int {
9822 4
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
9823
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
9824
        }
9825
9826 4
        if ($encoding === 'UTF-8') {
9827 4
            $str1 = (string) \mb_substr($str1, 0, $len);
9828 4
            $str2 = (string) \mb_substr($str2, 0, $len);
9829
        } else {
9830
            $str1 = (string) self::substr($str1, 0, $len, $encoding);
9831
            $str2 = (string) self::substr($str2, 0, $len, $encoding);
9832
        }
9833
9834 4
        return self::strcmp($str1, $str2);
9835
    }
9836
9837
    /**
9838
     * Search a string for any of a set of characters.
9839
     *
9840
     * EXAMPLE: <code>UTF8::strpbrk('-中文空白-', '白'); // '白-'</code>
9841
     *
9842
     * @see http://php.net/manual/en/function.strpbrk.php
9843
     *
9844
     * @param string $haystack  <p>The string where char_list is looked for.</p>
9845
     * @param string $char_list <p>This parameter is case-sensitive.</p>
9846
     *
9847
     * @psalm-pure
9848
     *
9849
     * @return false|string
9850
     *                      <p>The string starting from the character found, or false if it is not found.</p>
9851
     */
9852 2
    public static function strpbrk(string $haystack, string $char_list)
9853
    {
9854 2
        if ($haystack === '' || $char_list === '') {
9855 2
            return false;
9856
        }
9857
9858 2
        if (\preg_match('/' . self::rxClass($char_list) . '/us', $haystack, $m)) {
9859 2
            return \substr($haystack, (int) \strpos($haystack, $m[0]));
9860
        }
9861
9862 2
        return false;
9863
    }
9864
9865
    /**
9866
     * Find the position of the first occurrence of a substring in a string.
9867
     *
9868
     * INFO: use UTF8::strpos_in_byte() for the byte-length
9869
     *
9870
     * EXAMPLE: <code>UTF8::strpos('ABC-ÖÄÜ-中文空白-中文空白', '中'); // 8</code>
9871
     *
9872
     * @see http://php.net/manual/en/function.mb-strpos.php
9873
     *
9874
     * @param string     $haystack   <p>The string from which to get the position of the first occurrence of needle.</p>
9875
     * @param int|string $needle     <p>The string to find in haystack.<br>Or a code point as int.</p>
9876
     * @param int        $offset     [optional] <p>The search offset. If it is not specified, 0 is used.</p>
9877
     * @param string     $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
9878
     * @param bool       $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
9879
     *
9880
     * @psalm-pure
9881
     *
9882
     * @return false|int
9883
     *                   The <strong>(int)</strong> numeric position of the first occurrence of needle in the haystack
9884
     *                   string.<br> If needle is not found it returns false.
9885
     */
9886 52
    public static function strpos(
9887
        string $haystack,
9888
        $needle,
9889
        int $offset = 0,
9890
        string $encoding = 'UTF-8',
9891
        bool $clean_utf8 = false
9892
    ) {
9893 52
        if ($haystack === '') {
9894 4
            if (\PHP_VERSION_ID >= 80000) {
9895 4
                if ($needle === '') {
9896 4
                    return 0;
9897
                }
9898
            } else {
9899
                return false;
9900
            }
9901
        }
9902
9903
        // iconv and mbstring do not support integer $needle
9904 52
        if ((int) $needle === $needle) {
9905
            $needle = (string) self::chr($needle);
9906
        }
9907 52
        $needle = (string) $needle;
9908
9909 52
        if ($haystack === '') {
9910 2
            if (\PHP_VERSION_ID >= 80000 && $needle === '') {
9911
                return 0;
9912
            }
9913
9914 2
            return false;
9915
        }
9916
9917 51
        if ($needle === '' && \PHP_VERSION_ID < 80000) {
9918
            return false;
9919
        }
9920
9921 51
        if ($clean_utf8) {
9922
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
9923
            // if invalid characters are found in $haystack before $needle
9924 3
            $needle = self::clean($needle);
9925 3
            $haystack = self::clean($haystack);
9926
        }
9927
9928 51
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
9929 10
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
9930
        }
9931
9932
        //
9933
        // fallback via mbstring
9934
        //
9935
9936 51
        if (self::$SUPPORT['mbstring'] === true) {
9937 49
            if ($encoding === 'UTF-8') {
9938
                /** @noinspection PhpUsageOfSilenceOperatorInspection - Offset not contained in string */
9939 49
                return @\mb_strpos($haystack, $needle, $offset);
9940
            }
9941
9942
            /** @noinspection PhpUsageOfSilenceOperatorInspection - Offset not contained in string */
9943 2
            return @\mb_strpos($haystack, $needle, $offset, $encoding);
9944
        }
9945
9946
        //
9947
        // fallback for binary || ascii only
9948
        //
9949
        if (
9950 4
            $encoding === 'CP850'
9951
            ||
9952 4
            $encoding === 'ASCII'
9953
        ) {
9954 2
            return \strpos($haystack, $needle, $offset);
9955
        }
9956
9957
        if (
9958 4
            $encoding !== 'UTF-8'
9959
            &&
9960 4
            self::$SUPPORT['iconv'] === false
9961
            &&
9962 4
            self::$SUPPORT['mbstring'] === false
9963
        ) {
9964
            /**
9965
             * @psalm-suppress ImpureFunctionCall - is is only a warning
9966
             */
9967 2
            \trigger_error('UTF8::strpos() without mbstring / iconv cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
9968
        }
9969
9970
        //
9971
        // fallback via intl
9972
        //
9973
9974
        if (
9975 4
            $encoding === 'UTF-8' // INFO: "grapheme_strpos()" can't handle other encodings
9976
            &&
9977 4
            $offset >= 0 // grapheme_strpos() can't handle negative offset
9978
            &&
9979 4
            self::$SUPPORT['intl'] === true
9980
        ) {
9981
            $return_tmp = \grapheme_strpos($haystack, $needle, $offset);
9982
            if ($return_tmp !== false) {
9983
                return $return_tmp;
9984
            }
9985
        }
9986
9987
        //
9988
        // fallback via iconv
9989
        //
9990
9991
        if (
9992 4
            $offset >= 0 // iconv_strpos() can't handle negative offset
9993
            &&
9994 4
            self::$SUPPORT['iconv'] === true
9995
        ) {
9996
            // ignore invalid negative offset to keep compatibility
9997
            // with php < 5.5.35, < 5.6.21, < 7.0.6
9998
            $return_tmp = \iconv_strpos($haystack, $needle, $offset > 0 ? $offset : 0, $encoding);
9999
            if ($return_tmp !== false) {
10000
                return $return_tmp;
10001
            }
10002
        }
10003
10004
        //
10005
        // fallback for ascii only
10006
        //
10007
10008 4
        if (ASCII::is_ascii($haystack . $needle)) {
10009
            /** @noinspection PhpUsageOfSilenceOperatorInspection - Offset not contained in string */
10010 2
            return @\strpos($haystack, $needle, $offset);
10011
        }
10012
10013
        //
10014
        // fallback via vanilla php
10015
        //
10016
10017 4
        $haystack_tmp = self::substr($haystack, $offset, null, $encoding);
10018 4
        if ($haystack_tmp === false) {
10019
            $haystack_tmp = '';
10020
        }
10021 4
        $haystack = (string) $haystack_tmp;
10022
10023 4
        if ($offset < 0) {
10024
            $offset = 0;
10025
        }
10026
10027 4
        $pos = \strpos($haystack, $needle);
10028 4
        if ($pos === false) {
10029 3
            return false;
10030
        }
10031
10032 4
        if ($pos) {
10033 4
            return $offset + (int) self::strlen(\substr($haystack, 0, $pos), $encoding);
10034
        }
10035
10036 4
        return $offset + 0;
10037
    }
10038
10039
    /**
10040
     * Find the position of the first occurrence of a substring in a string.
10041
     *
10042
     * @param string $haystack <p>
10043
     *                         The string being checked.
10044
     *                         </p>
10045
     * @param string $needle   <p>
10046
     *                         The position counted from the beginning of haystack.
10047
     *                         </p>
10048
     * @param int    $offset   [optional] <p>
10049
     *                         The search offset. If it is not specified, 0 is used.
10050
     *                         </p>
10051
     *
10052
     * @psalm-pure
10053
     *
10054
     * @return false|int
10055
     *                   <p>The numeric position of the first occurrence of needle in the
10056
     *                   haystack string. If needle is not found, it returns false.</p>
10057
     */
10058 2
    public static function strpos_in_byte(string $haystack, string $needle, int $offset = 0)
10059
    {
10060 2
        if ($haystack === '' || $needle === '') {
10061
            return false;
10062
        }
10063
10064 2
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
10065
            // "mb_" is available if overload is used, so use it ...
10066
            return \mb_strpos($haystack, $needle, $offset, 'CP850'); // 8-BIT
10067
        }
10068
10069 2
        return \strpos($haystack, $needle, $offset);
10070
    }
10071
10072
    /**
10073
     * Find the position of the first occurrence of a substring in a string, case-insensitive.
10074
     *
10075
     * @param string $haystack <p>
10076
     *                         The string being checked.
10077
     *                         </p>
10078
     * @param string $needle   <p>
10079
     *                         The position counted from the beginning of haystack.
10080
     *                         </p>
10081
     * @param int    $offset   [optional] <p>
10082
     *                         The search offset. If it is not specified, 0 is used.
10083
     *                         </p>
10084
     *
10085
     * @psalm-pure
10086
     *
10087
     * @return false|int
10088
     *                   <p>The numeric position of the first occurrence of needle in the
10089
     *                   haystack string. If needle is not found, it returns false.</p>
10090
     */
10091 2
    public static function stripos_in_byte(string $haystack, string $needle, int $offset = 0)
10092
    {
10093 2
        if ($haystack === '' || $needle === '') {
10094
            return false;
10095
        }
10096
10097 2
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
10098
            // "mb_" is available if overload is used, so use it ...
10099
            return \mb_stripos($haystack, $needle, $offset, 'CP850'); // 8-BIT
10100
        }
10101
10102 2
        return \stripos($haystack, $needle, $offset);
10103
    }
10104
10105
    /**
10106
     * Find the last occurrence of a character in a string within another.
10107
     *
10108
     * EXAMPLE: <code>UTF8::strrchr('κόσμεκόσμε-äöü', 'κόσμε'); // 'κόσμε-äöü'</code>
10109
     *
10110
     * @see http://php.net/manual/en/function.mb-strrchr.php
10111
     *
10112
     * @param string $haystack      <p>The string from which to get the last occurrence of needle.</p>
10113
     * @param string $needle        <p>The string to find in haystack</p>
10114
     * @param bool   $before_needle [optional] <p>
10115
     *                              Determines which portion of haystack
10116
     *                              this function returns.
10117
     *                              If set to true, it returns all of haystack
10118
     *                              from the beginning to the last occurrence of needle.
10119
     *                              If set to false, it returns all of haystack
10120
     *                              from the last occurrence of needle to the end,
10121
     *                              </p>
10122
     * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
10123
     * @param bool   $clean_utf8    [optional] <p>Remove non UTF-8 chars from the string.</p>
10124
     *
10125
     * @psalm-pure
10126
     *
10127
     * @return false|string
10128
     *                      <p>The portion of haystack or false if needle is not found.</p>
10129
     */
10130 2
    public static function strrchr(
10131
        string $haystack,
10132
        string $needle,
10133
        bool $before_needle = false,
10134
        string $encoding = 'UTF-8',
10135
        bool $clean_utf8 = false
10136
    ) {
10137 2
        if ($haystack === '' || $needle === '') {
10138 2
            return false;
10139
        }
10140
10141 2
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
10142 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
10143
        }
10144
10145 2
        if ($clean_utf8) {
10146
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
10147
            // if invalid characters are found in $haystack before $needle
10148 2
            $needle = self::clean($needle);
10149 2
            $haystack = self::clean($haystack);
10150
        }
10151
10152
        //
10153
        // fallback via mbstring
10154
        //
10155
10156 2
        if (self::$SUPPORT['mbstring'] === true) {
10157 2
            if ($encoding === 'UTF-8') {
10158 2
                return \mb_strrchr($haystack, $needle, $before_needle);
10159
            }
10160
10161 2
            return \mb_strrchr($haystack, $needle, $before_needle, $encoding);
10162
        }
10163
10164
        //
10165
        // fallback for binary || ascii only
10166
        //
10167
10168
        if (
10169
            !$before_needle
10170
            &&
10171
            (
10172
                $encoding === 'CP850'
10173
                ||
10174
                $encoding === 'ASCII'
10175
            )
10176
        ) {
10177
            return \strrchr($haystack, $needle);
10178
        }
10179
10180
        if (
10181
            $encoding !== 'UTF-8'
10182
            &&
10183
            self::$SUPPORT['mbstring'] === false
10184
        ) {
10185
            /**
10186
             * @psalm-suppress ImpureFunctionCall - is is only a warning
10187
             */
10188
            \trigger_error('UTF8::strrchr() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
10189
        }
10190
10191
        //
10192
        // fallback via iconv
10193
        //
10194
10195
        if (self::$SUPPORT['iconv'] === true) {
10196
            $needle_tmp = self::substr($needle, 0, 1, $encoding);
10197
            if ($needle_tmp === false) {
10198
                return false;
10199
            }
10200
            $needle = $needle_tmp;
10201
10202
            $pos = \iconv_strrpos($haystack, $needle, $encoding);
10203
            if ($pos === false) {
10204
                return false;
10205
            }
10206
10207
            if ($before_needle) {
10208
                return self::substr($haystack, 0, $pos, $encoding);
10209
            }
10210
10211
            return self::substr($haystack, $pos, null, $encoding);
10212
        }
10213
10214
        //
10215
        // fallback via vanilla php
10216
        //
10217
10218
        $needle_tmp = self::substr($needle, 0, 1, $encoding);
10219
        if ($needle_tmp === false) {
10220
            return false;
10221
        }
10222
        $needle = $needle_tmp;
10223
10224
        $pos = self::strrpos($haystack, $needle, 0, $encoding);
10225
        if ($pos === false) {
10226
            return false;
10227
        }
10228
10229
        if ($before_needle) {
10230
            return self::substr($haystack, 0, $pos, $encoding);
10231
        }
10232
10233
        return self::substr($haystack, $pos, null, $encoding);
10234
    }
10235
10236
    /**
10237
     * Reverses characters order in the string.
10238
     *
10239
     * EXAMPLE: <code>UTF8::strrev('κ-öäü'); // 'üäö-κ'</code>
10240
     *
10241
     * @param string $str      <p>The input string.</p>
10242
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
10243
     *
10244
     * @psalm-pure
10245
     *
10246
     * @return string
10247
     *                <p>The string with characters in the reverse sequence.</p>
10248
     */
10249 10
    public static function strrev(string $str, string $encoding = 'UTF-8'): string
10250
    {
10251 10
        if ($str === '') {
10252 4
            return '';
10253
        }
10254
10255
        // init
10256 8
        $reversed = '';
10257
10258 8
        $str = self::emoji_encode($str, true);
10259
10260 8
        if ($encoding === 'UTF-8') {
10261 8
            if (self::$SUPPORT['intl'] === true) {
10262
                // try "grapheme" first: https://stackoverflow.com/questions/17496493/strrev-dosent-support-utf-8
10263 8
                $i = (int) \grapheme_strlen($str);
10264 8
                while ($i--) {
10265 8
                    $reversed_tmp = \grapheme_substr($str, $i, 1);
10266 8
                    if ($reversed_tmp !== false) {
10267 8
                        $reversed .= $reversed_tmp;
10268
                    }
10269
                }
10270
            } else {
10271
                $i = (int) \mb_strlen($str);
10272 8
                while ($i--) {
10273
                    $reversed_tmp = \mb_substr($str, $i, 1);
10274
                    if ($reversed_tmp !== false) {
10275
                        $reversed .= $reversed_tmp;
10276
                    }
10277
                }
10278
            }
10279
        } else {
10280
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
10281
10282
            $i = (int) self::strlen($str, $encoding);
10283
            while ($i--) {
10284
                $reversed_tmp = self::substr($str, $i, 1, $encoding);
10285
                if ($reversed_tmp !== false) {
10286
                    $reversed .= $reversed_tmp;
10287
                }
10288
            }
10289
        }
10290
10291 8
        return self::emoji_decode($reversed, true);
10292
    }
10293
10294
    /**
10295
     * Find the last occurrence of a character in a string within another, case-insensitive.
10296
     *
10297
     * EXAMPLE: <code>UTF8::strrichr('Aκόσμεκόσμε-äöü', 'aκόσμε'); // 'Aκόσμεκόσμε-äöü'</code>
10298
     *
10299
     * @see http://php.net/manual/en/function.mb-strrichr.php
10300
     *
10301
     * @param string $haystack      <p>The string from which to get the last occurrence of needle.</p>
10302
     * @param string $needle        <p>The string to find in haystack.</p>
10303
     * @param bool   $before_needle [optional] <p>
10304
     *                              Determines which portion of haystack
10305
     *                              this function returns.
10306
     *                              If set to true, it returns all of haystack
10307
     *                              from the beginning to the last occurrence of needle.
10308
     *                              If set to false, it returns all of haystack
10309
     *                              from the last occurrence of needle to the end,
10310
     *                              </p>
10311
     * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
10312
     * @param bool   $clean_utf8    [optional] <p>Remove non UTF-8 chars from the string.</p>
10313
     *
10314
     * @psalm-pure
10315
     *
10316
     * @return false|string
10317
     *                      <p>The portion of haystack or<br>false if needle is not found.</p>
10318
     */
10319 3
    public static function strrichr(
10320
        string $haystack,
10321
        string $needle,
10322
        bool $before_needle = false,
10323
        string $encoding = 'UTF-8',
10324
        bool $clean_utf8 = false
10325
    ) {
10326 3
        if ($haystack === '' || $needle === '') {
10327 2
            return false;
10328
        }
10329
10330 3
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
10331 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
10332
        }
10333
10334 3
        if ($clean_utf8) {
10335
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
10336
            // if invalid characters are found in $haystack before $needle
10337 2
            $needle = self::clean($needle);
10338 2
            $haystack = self::clean($haystack);
10339
        }
10340
10341
        //
10342
        // fallback via mbstring
10343
        //
10344
10345 3
        if (self::$SUPPORT['mbstring'] === true) {
10346 3
            if ($encoding === 'UTF-8') {
10347 3
                return \mb_strrichr($haystack, $needle, $before_needle);
10348
            }
10349
10350 2
            return \mb_strrichr($haystack, $needle, $before_needle, $encoding);
10351
        }
10352
10353
        //
10354
        // fallback via vanilla php
10355
        //
10356
10357
        $needle_tmp = self::substr($needle, 0, 1, $encoding);
10358
        if ($needle_tmp === false) {
10359
            return false;
10360
        }
10361
        $needle = $needle_tmp;
10362
10363
        $pos = self::strripos($haystack, $needle, 0, $encoding);
10364
        if ($pos === false) {
10365
            return false;
10366
        }
10367
10368
        if ($before_needle) {
10369
            return self::substr($haystack, 0, $pos, $encoding);
10370
        }
10371
10372
        return self::substr($haystack, $pos, null, $encoding);
10373
    }
10374
10375
    /**
10376
     * Find the position of the last occurrence of a substring in a string, case-insensitive.
10377
     *
10378
     * EXAMPLE: <code>UTF8::strripos('ABC-ÖÄÜ-中文空白-中文空白', '中'); // 13</code>
10379
     *
10380
     * @param string     $haystack   <p>The string to look in.</p>
10381
     * @param int|string $needle     <p>The string to look for.</p>
10382
     * @param int        $offset     [optional] <p>Number of characters to ignore in the beginning or end.</p>
10383
     * @param string     $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
10384
     * @param bool       $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
10385
     *
10386
     * @psalm-pure
10387
     *
10388
     * @return false|int
10389
     *                   <p>The <strong>(int)</strong> numeric position of the last occurrence of needle in the haystack
10390
     *                   string.<br>If needle is not found, it returns false.</p>
10391
     */
10392 14
    public static function strripos(
10393
        string $haystack,
10394
        $needle,
10395
        int $offset = 0,
10396
        string $encoding = 'UTF-8',
10397
        bool $clean_utf8 = false
10398
    ) {
10399 14
        if ($haystack === '') {
10400 3
            if (\PHP_VERSION_ID >= 80000) {
10401 3
                if ($needle === '') {
10402 3
                    return 0;
10403
                }
10404
            } else {
10405
                return false;
10406
            }
10407
        }
10408
10409
        // iconv and mbstring do not support integer $needle
10410 14
        if ((int) $needle === $needle && $needle >= 0) {
10411
            $needle = (string) self::chr($needle);
10412
        }
10413 14
        $needle = (string) $needle;
10414
10415 14
        if ($haystack === '') {
10416 1
            if (\PHP_VERSION_ID >= 80000 && $needle === '') {
10417
                return 0;
10418
            }
10419
10420 1
            return false;
10421
        }
10422
10423 14
        if ($needle === '' && \PHP_VERSION_ID < 80000) {
10424
            return false;
10425
        }
10426
10427 14
        if ($clean_utf8) {
10428
            // mb_strripos() && iconv_strripos() is not tolerant to invalid characters
10429 3
            $needle = self::clean($needle);
10430 3
            $haystack = self::clean($haystack);
10431
        }
10432
10433 14
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
10434 9
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
10435
        }
10436
10437
        //
10438
        // fallback via mbstrig
10439
        //
10440
10441 14
        if (self::$SUPPORT['mbstring'] === true) {
10442 14
            if ($encoding === 'UTF-8') {
10443 14
                return \mb_strripos($haystack, $needle, $offset);
10444
            }
10445
10446
            return \mb_strripos($haystack, $needle, $offset, $encoding);
10447
        }
10448
10449
        //
10450
        // fallback for binary || ascii only
10451
        //
10452
10453
        if (
10454
            $encoding === 'CP850'
10455
            ||
10456
            $encoding === 'ASCII'
10457
        ) {
10458
            return \strripos($haystack, $needle, $offset);
10459
        }
10460
10461
        if (
10462
            $encoding !== 'UTF-8'
10463
            &&
10464
            self::$SUPPORT['mbstring'] === false
10465
        ) {
10466
            /**
10467
             * @psalm-suppress ImpureFunctionCall - is is only a warning
10468
             */
10469
            \trigger_error('UTF8::strripos() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
10470
        }
10471
10472
        //
10473
        // fallback via intl
10474
        //
10475
10476
        if (
10477
            $encoding === 'UTF-8' // INFO: "grapheme_strripos()" can't handle other encodings
10478
            &&
10479
            $offset >= 0 // grapheme_strripos() can't handle negative offset
10480
            &&
10481
            self::$SUPPORT['intl'] === true
10482
        ) {
10483
            $return_tmp = \grapheme_strripos($haystack, $needle, $offset);
10484
            if ($return_tmp !== false) {
10485
                return $return_tmp;
10486
            }
10487
        }
10488
10489
        //
10490
        // fallback for ascii only
10491
        //
10492
10493
        if (ASCII::is_ascii($haystack . $needle)) {
10494
            return \strripos($haystack, $needle, $offset);
10495
        }
10496
10497
        //
10498
        // fallback via vanilla php
10499
        //
10500
10501
        $haystack = self::strtocasefold($haystack, true, false, $encoding);
10502
        $needle = self::strtocasefold($needle, true, false, $encoding);
10503
10504
        return self::strrpos($haystack, $needle, $offset, $encoding, $clean_utf8);
10505
    }
10506
10507
    /**
10508
     * Finds position of last occurrence of a string within another, case-insensitive.
10509
     *
10510
     * @param string $haystack <p>
10511
     *                         The string from which to get the position of the last occurrence
10512
     *                         of needle.
10513
     *                         </p>
10514
     * @param string $needle   <p>
10515
     *                         The string to find in haystack.
10516
     *                         </p>
10517
     * @param int    $offset   [optional] <p>
10518
     *                         The position in haystack
10519
     *                         to start searching.
10520
     *                         </p>
10521
     *
10522
     * @psalm-pure
10523
     *
10524
     * @return false|int
10525
     *                   <p>eturn the numeric position of the last occurrence of needle in the
10526
     *                   haystack string, or false if needle is not found.</p>
10527
     */
10528 2
    public static function strripos_in_byte(string $haystack, string $needle, int $offset = 0)
10529
    {
10530 2
        if ($haystack === '' || $needle === '') {
10531
            return false;
10532
        }
10533
10534 2
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
10535
            // "mb_" is available if overload is used, so use it ...
10536
            return \mb_strripos($haystack, $needle, $offset, 'CP850'); // 8-BIT
10537
        }
10538
10539 2
        return \strripos($haystack, $needle, $offset);
10540
    }
10541
10542
    /**
10543
     * Find the position of the last occurrence of a substring in a string.
10544
     *
10545
     * EXAMPLE: <code>UTF8::strrpos('ABC-ÖÄÜ-中文空白-中文空白', '中'); // 13</code>
10546
     *
10547
     * @see http://php.net/manual/en/function.mb-strrpos.php
10548
     *
10549
     * @param string     $haystack   <p>The string being checked, for the last occurrence of needle</p>
10550
     * @param int|string $needle     <p>The string to find in haystack.<br>Or a code point as int.</p>
10551
     * @param int        $offset     [optional] <p>May be specified to begin searching an arbitrary number of characters
10552
     *                               into the string. Negative values will stop searching at an arbitrary point prior to
10553
     *                               the end of the string.
10554
     *                               </p>
10555
     * @param string     $encoding   [optional] <p>Set the charset.</p>
10556
     * @param bool       $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
10557
     *
10558
     * @psalm-pure
10559
     *
10560
     * @return false|int
10561
     *                   <p>The <strong>(int)</strong> numeric position of the last occurrence of needle in the haystack
10562
     *                   string.<br>If needle is not found, it returns false.</p>
10563
     */
10564 35
    public static function strrpos(
10565
        string $haystack,
10566
        $needle,
10567
        int $offset = 0,
10568
        string $encoding = 'UTF-8',
10569
        bool $clean_utf8 = false
10570
    ) {
10571 35
        if ($haystack === '') {
10572 4
            if (\PHP_VERSION_ID >= 80000) {
10573 4
                if ($needle === '') {
10574 4
                    return 0;
10575
                }
10576
            } else {
10577
                return false;
10578
            }
10579
        }
10580
10581
        // iconv and mbstring do not support integer $needle
10582 35
        if ((int) $needle === $needle && $needle >= 0) {
10583 1
            $needle = (string) self::chr($needle);
10584
        }
10585 35
        $needle = (string) $needle;
10586
10587 35
        if ($haystack === '') {
10588 2
            if (\PHP_VERSION_ID >= 80000 && $needle === '') {
10589
                return 0;
10590
            }
10591
10592 2
            return false;
10593
        }
10594
10595 34
        if ($needle === '' && \PHP_VERSION_ID < 80000) {
10596
            return false;
10597
        }
10598
10599 34
        if ($clean_utf8) {
10600
            // mb_strrpos && iconv_strrpos is not tolerant to invalid characters
10601 4
            $needle = self::clean($needle);
10602 4
            $haystack = self::clean($haystack);
10603
        }
10604
10605 34
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
10606 8
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
10607
        }
10608
10609
        //
10610
        // fallback via mbstring
10611
        //
10612
10613 34
        if (self::$SUPPORT['mbstring'] === true) {
10614 34
            if ($encoding === 'UTF-8') {
10615 34
                return \mb_strrpos($haystack, $needle, $offset);
10616
            }
10617
10618 2
            return \mb_strrpos($haystack, $needle, $offset, $encoding);
10619
        }
10620
10621
        //
10622
        // fallback for binary || ascii only
10623
        //
10624
10625
        if (
10626
            $encoding === 'CP850'
10627
            ||
10628
            $encoding === 'ASCII'
10629
        ) {
10630
            return \strrpos($haystack, $needle, $offset);
10631
        }
10632
10633
        if (
10634
            $encoding !== 'UTF-8'
10635
            &&
10636
            self::$SUPPORT['mbstring'] === false
10637
        ) {
10638
            /**
10639
             * @psalm-suppress ImpureFunctionCall - is is only a warning
10640
             */
10641
            \trigger_error('UTF8::strrpos() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
10642
        }
10643
10644
        //
10645
        // fallback via intl
10646
        //
10647
10648
        if (
10649
            $offset >= 0 // grapheme_strrpos() can't handle negative offset
10650
            &&
10651
            $encoding === 'UTF-8' // INFO: "grapheme_strrpos()" can't handle other encodings
10652
            &&
10653
            self::$SUPPORT['intl'] === true
10654
        ) {
10655
            $return_tmp = \grapheme_strrpos($haystack, $needle, $offset);
10656
            if ($return_tmp !== false) {
10657
                return $return_tmp;
10658
            }
10659
        }
10660
10661
        //
10662
        // fallback for ascii only
10663
        //
10664
10665
        if (ASCII::is_ascii($haystack . $needle)) {
10666
            return \strrpos($haystack, $needle, $offset);
10667
        }
10668
10669
        //
10670
        // fallback via vanilla php
10671
        //
10672
10673
        $haystack_tmp = null;
10674
        if ($offset > 0) {
10675
            $haystack_tmp = self::substr($haystack, $offset);
10676
        } elseif ($offset < 0) {
10677
            $haystack_tmp = self::substr($haystack, 0, $offset);
10678
            $offset = 0;
10679
        }
10680
10681
        if ($haystack_tmp !== null) {
10682
            if ($haystack_tmp === false) {
10683
                $haystack_tmp = '';
10684
            }
10685
            $haystack = (string) $haystack_tmp;
10686
        }
10687
10688
        $pos = \strrpos($haystack, $needle);
10689
        if ($pos === false) {
10690
            return false;
10691
        }
10692
10693
        /** @var false|string $str_tmp - needed for PhpStan (stubs error) */
10694
        $str_tmp = \substr($haystack, 0, $pos);
10695
        if ($str_tmp === false) {
10696
            return false;
10697
        }
10698
10699
        return $offset + (int) self::strlen($str_tmp);
10700
    }
10701
10702
    /**
10703
     * Find the position of the last occurrence of a substring in a string.
10704
     *
10705
     * @param string $haystack <p>
10706
     *                         The string being checked, for the last occurrence
10707
     *                         of needle.
10708
     *                         </p>
10709
     * @param string $needle   <p>
10710
     *                         The string to find in haystack.
10711
     *                         </p>
10712
     * @param int    $offset   [optional] <p>May be specified to begin searching an arbitrary number of characters into
10713
     *                         the string. Negative values will stop searching at an arbitrary point
10714
     *                         prior to the end of the string.
10715
     *                         </p>
10716
     *
10717
     * @psalm-pure
10718
     *
10719
     * @return false|int
10720
     *                   <p>The numeric position of the last occurrence of needle in the
10721
     *                   haystack string. If needle is not found, it returns false.</p>
10722
     */
10723 2
    public static function strrpos_in_byte(string $haystack, string $needle, int $offset = 0)
10724
    {
10725 2
        if ($haystack === '' || $needle === '') {
10726
            return false;
10727
        }
10728
10729 2
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
10730
            // "mb_" is available if overload is used, so use it ...
10731
            return \mb_strrpos($haystack, $needle, $offset, 'CP850'); // 8-BIT
10732
        }
10733
10734 2
        return \strrpos($haystack, $needle, $offset);
10735
    }
10736
10737
    /**
10738
     * Finds the length of the initial segment of a string consisting entirely of characters contained within a given
10739
     * mask.
10740
     *
10741
     * EXAMPLE: <code>UTF8::strspn('iñtërnâtiônàlizætiøn', 'itñ'); // '3'</code>
10742
     *
10743
     * @param string   $str      <p>The input string.</p>
10744
     * @param string   $mask     <p>The mask of chars</p>
10745
     * @param int      $offset   [optional]
10746
     * @param int|null $length   [optional]
10747
     * @param string   $encoding [optional] <p>Set the charset.</p>
10748
     *
10749
     * @psalm-pure
10750
     *
10751
     * @return false|int
10752
     */
10753 10
    public static function strspn(
10754
        string $str,
10755
        string $mask,
10756
        int $offset = 0,
10757
        int $length = null,
10758
        string $encoding = 'UTF-8'
10759
    ) {
10760 10
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
10761
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
10762
        }
10763
10764 10
        if ($offset || $length !== null) {
10765 2
            if ($encoding === 'UTF-8') {
10766 2
                if ($length === null) {
10767
                    $str = (string) \mb_substr($str, $offset);
10768
                } else {
10769 2
                    $str = (string) \mb_substr($str, $offset, $length);
10770
                }
10771
            } else {
10772
                $str = (string) self::substr($str, $offset, $length, $encoding);
10773
            }
10774
        }
10775
10776 10
        if ($str === '' || $mask === '') {
10777 2
            return 0;
10778
        }
10779
10780 8
        $matches = [];
10781
10782 8
        return \preg_match('/^' . self::rxClass($mask) . '+/u', $str, $matches) ? (int) self::strlen($matches[0], $encoding) : 0;
10783
    }
10784
10785
    /**
10786
     * Returns part of haystack string from the first occurrence of needle to the end of haystack.
10787
     *
10788
     * EXAMPLE: <code>
10789
     * $str = 'iñtërnâtiônàlizætiøn';
10790
     * $search = 'nât';
10791
     *
10792
     * UTF8::strstr($str, $search)); // 'nâtiônàlizætiøn'
10793
     * UTF8::strstr($str, $search, true)); // 'iñtër'
10794
     * </code>
10795
     *
10796
     * @param string $haystack      <p>The input string. Must be valid UTF-8.</p>
10797
     * @param string $needle        <p>The string to look for. Must be valid UTF-8.</p>
10798
     * @param bool   $before_needle [optional] <p>
10799
     *                              If <b>TRUE</b>, strstr() returns the part of the
10800
     *                              haystack before the first occurrence of the needle (excluding the needle).
10801
     *                              </p>
10802
     * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
10803
     * @param bool   $clean_utf8    [optional] <p>Remove non UTF-8 chars from the string.</p>
10804
     *
10805
     * @psalm-pure
10806
     *
10807
     * @return false|string
10808
     *                      <p>A sub-string,<br>or <strong>false</strong> if needle is not found.</p>
10809
     */
10810 3
    public static function strstr(
10811
        string $haystack,
10812
        string $needle,
10813
        bool $before_needle = false,
10814
        string $encoding = 'UTF-8',
10815
        bool $clean_utf8 = false
10816
    ) {
10817 3
        if ($haystack === '') {
10818 2
            if (\PHP_VERSION_ID >= 80000 && $needle === '') {
10819 1
                return '';
10820
            }
10821
10822 2
            return false;
10823
        }
10824
10825 3
        if ($clean_utf8) {
10826
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
10827
            // if invalid characters are found in $haystack before $needle
10828
            $needle = self::clean($needle);
10829
            $haystack = self::clean($haystack);
10830
        }
10831
10832 3
        if ($needle === '') {
10833 1
            if (\PHP_VERSION_ID >= 80000) {
10834 1
                return $haystack;
10835
            }
10836
10837
            return false;
10838
        }
10839
10840 3
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
10841 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
10842
        }
10843
10844
        //
10845
        // fallback via mbstring
10846
        //
10847
10848 3
        if (self::$SUPPORT['mbstring'] === true) {
10849 3
            if ($encoding === 'UTF-8') {
10850 3
                return \mb_strstr($haystack, $needle, $before_needle);
10851
            }
10852
10853 2
            return \mb_strstr($haystack, $needle, $before_needle, $encoding);
10854
        }
10855
10856
        //
10857
        // fallback for binary || ascii only
10858
        //
10859
10860
        if (
10861
            $encoding === 'CP850'
10862
            ||
10863
            $encoding === 'ASCII'
10864
        ) {
10865
            return \strstr($haystack, $needle, $before_needle);
10866
        }
10867
10868
        if (
10869
            $encoding !== 'UTF-8'
10870
            &&
10871
            self::$SUPPORT['mbstring'] === false
10872
        ) {
10873
            /**
10874
             * @psalm-suppress ImpureFunctionCall - is is only a warning
10875
             */
10876
            \trigger_error('UTF8::strstr() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
10877
        }
10878
10879
        //
10880
        // fallback via intl
10881
        //
10882
10883
        if (
10884
            $encoding === 'UTF-8' // INFO: "grapheme_strstr()" can't handle other encodings
10885
            &&
10886
            self::$SUPPORT['intl'] === true
10887
        ) {
10888
            $return_tmp = \grapheme_strstr($haystack, $needle, $before_needle);
10889
            if ($return_tmp !== false) {
10890
                return $return_tmp;
10891
            }
10892
        }
10893
10894
        //
10895
        // fallback for ascii only
10896
        //
10897
10898
        if (ASCII::is_ascii($haystack . $needle)) {
10899
            return \strstr($haystack, $needle, $before_needle);
10900
        }
10901
10902
        //
10903
        // fallback via vanilla php
10904
        //
10905
10906
        \preg_match('/^(.*?)' . \preg_quote($needle, '/') . '/us', $haystack, $match);
10907
10908
        if (!isset($match[1])) {
10909
            return false;
10910
        }
10911
10912
        if ($before_needle) {
10913
            return $match[1];
10914
        }
10915
10916
        return self::substr($haystack, (int) self::strlen($match[1]));
10917
    }
10918
10919
    /**
10920
     * Finds first occurrence of a string within another.
10921
     *
10922
     * @param string $haystack      <p>
10923
     *                              The string from which to get the first occurrence
10924
     *                              of needle.
10925
     *                              </p>
10926
     * @param string $needle        <p>
10927
     *                              The string to find in haystack.
10928
     *                              </p>
10929
     * @param bool   $before_needle [optional] <p>
10930
     *                              Determines which portion of haystack
10931
     *                              this function returns.
10932
     *                              If set to true, it returns all of haystack
10933
     *                              from the beginning to the first occurrence of needle.
10934
     *                              If set to false, it returns all of haystack
10935
     *                              from the first occurrence of needle to the end,
10936
     *                              </p>
10937
     *
10938
     * @psalm-pure
10939
     *
10940
     * @return false|string
10941
     *                      <p>The portion of haystack,
10942
     *                      or false if needle is not found.</p>
10943
     */
10944 2
    public static function strstr_in_byte(
10945
        string $haystack,
10946
        string $needle,
10947
        bool $before_needle = false
10948
    ) {
10949 2
        if ($haystack === '' || $needle === '') {
10950
            return false;
10951
        }
10952
10953 2
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
10954
            // "mb_" is available if overload is used, so use it ...
10955
            return \mb_strstr($haystack, $needle, $before_needle, 'CP850'); // 8-BIT
10956
        }
10957
10958 2
        return \strstr($haystack, $needle, $before_needle);
10959
    }
10960
10961
    /**
10962
     * Unicode transformation for case-less matching.
10963
     *
10964
     * EXAMPLE: <code>UTF8::strtocasefold('ǰ◌̱'); // 'ǰ◌̱'</code>
10965
     *
10966
     * @see http://unicode.org/reports/tr21/tr21-5.html
10967
     *
10968
     * @param string      $str        <p>The input string.</p>
10969
     * @param bool        $full       [optional] <p>
10970
     *                                <b>true</b>, replace full case folding chars (default)<br>
10971
     *                                <b>false</b>, use only limited static array [UTF8::$COMMON_CASE_FOLD]
10972
     *                                </p>
10973
     * @param bool        $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
10974
     * @param string      $encoding   [optional] <p>Set the charset.</p>
10975
     * @param string|null $lang       [optional] <p>Set the language for special cases: az, el, lt, tr</p>
10976
     * @param bool        $lower      [optional] <p>Use lowercase string, otherwise use uppercase string. PS: uppercase
10977
     *                                is for some languages better ...</p>
10978
     *
10979
     * @psalm-pure
10980
     *
10981
     * @return string
10982
     */
10983 32
    public static function strtocasefold(
10984
        string $str,
10985
        bool $full = true,
10986
        bool $clean_utf8 = false,
10987
        string $encoding = 'UTF-8',
10988
        string $lang = null,
10989
        bool $lower = true
10990
    ): string {
10991 32
        if ($str === '') {
10992 7
            return '';
10993
        }
10994
10995 31
        if ($clean_utf8) {
10996
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
10997
            // if invalid characters are found in $haystack before $needle
10998 2
            $str = self::clean($str);
10999
        }
11000
11001 31
        $str = self::fixStrCaseHelper($str, $lower, $full);
11002
11003 31
        if ($lang === null && $encoding === 'UTF-8') {
11004 31
            if ($lower) {
11005 2
                return \mb_strtolower($str);
11006
            }
11007
11008 29
            return \mb_strtoupper($str);
11009
        }
11010
11011 2
        if ($lower) {
11012
            return self::strtolower($str, $encoding, false, $lang);
11013
        }
11014
11015 2
        return self::strtoupper($str, $encoding, false, $lang);
11016
    }
11017
11018
    /**
11019
     * Make a string lowercase.
11020
     *
11021
     * EXAMPLE: <code>UTF8::strtolower('DÉJÀ Σσς Iıİi'); // 'déjà σσς iıii'</code>
11022
     *
11023
     * @see http://php.net/manual/en/function.mb-strtolower.php
11024
     *
11025
     * @param string      $str                           <p>The string being lowercased.</p>
11026
     * @param string      $encoding                      [optional] <p>Set the charset for e.g. "mb_" function</p>
11027
     * @param bool        $clean_utf8                    [optional] <p>Remove non UTF-8 chars from the string.</p>
11028
     * @param string|null $lang                          [optional] <p>Set the language for special cases: az, el, lt,
11029
     *                                                   tr</p>
11030
     * @param bool        $try_to_keep_the_string_length [optional] <p>true === try to keep the string length: e.g. ẞ
11031
     *                                                   -> ß</p>
11032
     *
11033
     * @psalm-pure
11034
     *
11035
     * @return string
11036
     *                <p>String with all alphabetic characters converted to lowercase.</p>
11037
     */
11038 73
    public static function strtolower(
11039
        $str,
11040
        string $encoding = 'UTF-8',
11041
        bool $clean_utf8 = false,
11042
        string $lang = null,
11043
        bool $try_to_keep_the_string_length = false
11044
    ): string {
11045
        // init
11046 73
        $str = (string) $str;
11047
11048 73
        if ($str === '') {
11049 1
            return '';
11050
        }
11051
11052 72
        if ($clean_utf8) {
11053
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
11054
            // if invalid characters are found in $haystack before $needle
11055 2
            $str = self::clean($str);
11056
        }
11057
11058
        // hack for old php version or for the polyfill ...
11059 72
        if ($try_to_keep_the_string_length) {
11060
            $str = self::fixStrCaseHelper($str, true);
11061
        }
11062
11063 72
        if ($lang === null && $encoding === 'UTF-8') {
11064 13
            return \mb_strtolower($str);
11065
        }
11066
11067 61
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
11068
11069 61
        if ($lang !== null) {
11070 2
            if (self::$SUPPORT['intl'] === true) {
11071 2
                if (self::$INTL_TRANSLITERATOR_LIST === null) {
11072
                    self::$INTL_TRANSLITERATOR_LIST = self::getData('transliterator_list');
11073
                }
11074
11075 2
                $language_code = $lang . '-Lower';
11076 2
                if (!\in_array($language_code, self::$INTL_TRANSLITERATOR_LIST, true)) {
11077
                    /**
11078
                     * @psalm-suppress ImpureFunctionCall - is is only a warning
11079
                     */
11080
                    \trigger_error('UTF8::strtolower() cannot handle special language: ' . $lang . ' | supported: ' . \print_r(self::$INTL_TRANSLITERATOR_LIST, true), \E_USER_WARNING);
0 ignored issues
show
Bug introduced by
Are you sure print_r(self::INTL_TRANSLITERATOR_LIST, true) of type string|true can be used in concatenation? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

11080
                    \trigger_error('UTF8::strtolower() cannot handle special language: ' . $lang . ' | supported: ' . /** @scrutinizer ignore-type */ \print_r(self::$INTL_TRANSLITERATOR_LIST, true), \E_USER_WARNING);
Loading history...
11081
11082
                    $language_code = 'Any-Lower';
11083
                }
11084
11085 2
                return (string) \transliterator_transliterate($language_code, $str);
11086
            }
11087
11088
            /**
11089
             * @psalm-suppress ImpureFunctionCall - is is only a warning
11090
             */
11091
            \trigger_error('UTF8::strtolower() without intl cannot handle the "lang" parameter: ' . $lang, \E_USER_WARNING);
11092
        }
11093
11094
        // always fallback via symfony polyfill
11095 61
        return \mb_strtolower($str, $encoding);
11096
    }
11097
11098
    /**
11099
     * Make a string uppercase.
11100
     *
11101
     * EXAMPLE: <code>UTF8::strtoupper('Déjà Σσς Iıİi'); // 'DÉJÀ ΣΣΣ IIİI'</code>
11102
     *
11103
     * @see http://php.net/manual/en/function.mb-strtoupper.php
11104
     *
11105
     * @param string      $str                           <p>The string being uppercased.</p>
11106
     * @param string      $encoding                      [optional] <p>Set the charset.</p>
11107
     * @param bool        $clean_utf8                    [optional] <p>Remove non UTF-8 chars from the string.</p>
11108
     * @param string|null $lang                          [optional] <p>Set the language for special cases: az, el, lt,
11109
     *                                                   tr</p>
11110
     * @param bool        $try_to_keep_the_string_length [optional] <p>true === try to keep the string length: e.g. ẞ
11111
     *                                                   -> ß</p>
11112
     *
11113
     * @psalm-pure
11114
     *
11115
     * @return string
11116
     *                <p>String with all alphabetic characters converted to uppercase.</p>
11117
     */
11118 17
    public static function strtoupper(
11119
        $str,
11120
        string $encoding = 'UTF-8',
11121
        bool $clean_utf8 = false,
11122
        string $lang = null,
11123
        bool $try_to_keep_the_string_length = false
11124
    ): string {
11125
        // init
11126 17
        $str = (string) $str;
11127
11128 17
        if ($str === '') {
11129 1
            return '';
11130
        }
11131
11132 16
        if ($clean_utf8) {
11133
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
11134
            // if invalid characters are found in $haystack before $needle
11135 2
            $str = self::clean($str);
11136
        }
11137
11138
        // hack for old php version or for the polyfill ...
11139 16
        if ($try_to_keep_the_string_length) {
11140 2
            $str = self::fixStrCaseHelper($str);
11141
        }
11142
11143 16
        if ($lang === null && $encoding === 'UTF-8') {
11144 8
            return \mb_strtoupper($str);
11145
        }
11146
11147 10
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
11148
11149 10
        if ($lang !== null) {
11150 2
            if (self::$SUPPORT['intl'] === true) {
11151 2
                if (self::$INTL_TRANSLITERATOR_LIST === null) {
11152
                    self::$INTL_TRANSLITERATOR_LIST = self::getData('transliterator_list');
11153
                }
11154
11155 2
                $language_code = $lang . '-Upper';
11156 2
                if (!\in_array($language_code, self::$INTL_TRANSLITERATOR_LIST, true)) {
11157
                    /**
11158
                     * @psalm-suppress ImpureFunctionCall - is is only a warning
11159
                     */
11160
                    \trigger_error('UTF8::strtoupper() without intl for special language: ' . $lang, \E_USER_WARNING);
11161
11162
                    $language_code = 'Any-Upper';
11163
                }
11164
11165 2
                return (string) \transliterator_transliterate($language_code, $str);
11166
            }
11167
11168
            /**
11169
             * @psalm-suppress ImpureFunctionCall - is is only a warning
11170
             */
11171
            \trigger_error('UTF8::strtolower() without intl cannot handle the "lang"-parameter: ' . $lang, \E_USER_WARNING);
11172
        }
11173
11174
        // always fallback via symfony polyfill
11175 10
        return \mb_strtoupper($str, $encoding);
11176
    }
11177
11178
    /**
11179
     * Translate characters or replace sub-strings.
11180
     *
11181
     * EXAMPLE:
11182
     * <code>
11183
     * $array = [
11184
     *     'Hello'   => '○●◎',
11185
     *     '中文空白' => 'earth',
11186
     * ];
11187
     * UTF8::strtr('Hello 中文空白', $array); // '○●◎ earth'
11188
     * </code>
11189
     *
11190
     * @see http://php.net/manual/en/function.strtr.php
11191
     *
11192
     * @param string          $str  <p>The string being translated.</p>
11193
     * @param string|string[] $from <p>The string replacing from.</p>
11194
     * @param string|string[] $to   [optional] <p>The string being translated to to.</p>
11195
     *
11196
     * @psalm-pure
11197
     *
11198
     * @return string
11199
     *                <p>This function returns a copy of str, translating all occurrences of each character in "from"
11200
     *                to the corresponding character in "to".</p>
11201
     */
11202 2
    public static function strtr(string $str, $from, $to = ''): string
11203
    {
11204 2
        if ($str === '') {
11205
            return '';
11206
        }
11207
11208 2
        if ($from === $to) {
11209
            return $str;
11210
        }
11211
11212 2
        if ($to !== '') {
11213 2
            if (!\is_array($from)) {
11214 2
                $from = self::str_split($from);
11215
            }
11216
11217 2
            if (!\is_array($to)) {
11218 2
                $to = self::str_split($to);
11219
            }
11220
11221 2
            $count_from = \count($from);
11222 2
            $count_to = \count($to);
11223
11224 2
            if ($count_from !== $count_to) {
11225 2
                if ($count_from > $count_to) {
11226 2
                    $from = \array_slice($from, 0, $count_to);
11227 2
                } elseif ($count_from < $count_to) {
11228 2
                    $to = \array_slice($to, 0, $count_from);
11229
                }
11230
            }
11231
11232 2
            $from = \array_combine($from, $to);
11233 2
            if ($from === false) {
11234
                throw new \InvalidArgumentException('The number of elements for each array isn\'t equal or the arrays are empty: (from: ' . \print_r($from, true) . ' | to: ' . \print_r($to, true) . ')');
0 ignored issues
show
Bug introduced by
Are you sure print_r($from, true) of type string|true can be used in concatenation? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

11234
                throw new \InvalidArgumentException('The number of elements for each array isn\'t equal or the arrays are empty: (from: ' . /** @scrutinizer ignore-type */ \print_r($from, true) . ' | to: ' . \print_r($to, true) . ')');
Loading history...
Bug introduced by
Are you sure print_r($to, true) of type string|true can be used in concatenation? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

11234
                throw new \InvalidArgumentException('The number of elements for each array isn\'t equal or the arrays are empty: (from: ' . \print_r($from, true) . ' | to: ' . /** @scrutinizer ignore-type */ \print_r($to, true) . ')');
Loading history...
11235
            }
11236
        }
11237
11238 2
        if (\is_string($from)) {
11239 2
            return \str_replace($from, $to, $str);
0 ignored issues
show
Bug introduced by
It seems like $to can also be of type array<mixed,string[]>; however, parameter $replace of str_replace() does only seem to accept string|string[], maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

11239
            return \str_replace($from, /** @scrutinizer ignore-type */ $to, $str);
Loading history...
11240
        }
11241
11242 2
        return \strtr($str, $from);
11243
    }
11244
11245
    /**
11246
     * Return the width of a string.
11247
     *
11248
     * INFO: use UTF8::strlen() for the byte-length
11249
     *
11250
     * EXAMPLE: <code>UTF8::strwidth("Iñtërnâtiôn\xE9àlizætiøn")); // 21</code>
11251
     *
11252
     * @param string $str        <p>The input string.</p>
11253
     * @param string $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
11254
     * @param bool   $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
11255
     *
11256
     * @psalm-pure
11257
     *
11258
     * @return int
11259
     */
11260 2
    public static function strwidth(
11261
        string $str,
11262
        string $encoding = 'UTF-8',
11263
        bool $clean_utf8 = false
11264
    ): int {
11265 2
        if ($str === '') {
11266 2
            return 0;
11267
        }
11268
11269 2
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
11270 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
11271
        }
11272
11273 2
        if ($clean_utf8) {
11274
            // iconv and mbstring are not tolerant to invalid encoding
11275
            // further, their behaviour is inconsistent with that of PHP's substr
11276 2
            $str = self::clean($str);
11277
        }
11278
11279
        //
11280
        // fallback via mbstring
11281
        //
11282
11283 2
        if (self::$SUPPORT['mbstring'] === true) {
11284 2
            if ($encoding === 'UTF-8') {
11285 2
                return \mb_strwidth($str);
11286
            }
11287
11288
            return \mb_strwidth($str, $encoding);
11289
        }
11290
11291
        //
11292
        // fallback via vanilla php
11293
        //
11294
11295
        if ($encoding !== 'UTF-8') {
11296
            $str = self::encode('UTF-8', $str, false, $encoding);
11297
        }
11298
11299
        $wide = 0;
11300
        $str = (string) \preg_replace('/[\x{1100}-\x{115F}\x{2329}\x{232A}\x{2E80}-\x{303E}\x{3040}-\x{A4CF}\x{AC00}-\x{D7A3}\x{F900}-\x{FAFF}\x{FE10}-\x{FE19}\x{FE30}-\x{FE6F}\x{FF00}-\x{FF60}\x{FFE0}-\x{FFE6}\x{20000}-\x{2FFFD}\x{30000}-\x{3FFFD}]/u', '', $str, -1, $wide);
11301
11302
        return ($wide << 1) + (int) self::strlen($str);
11303
    }
11304
11305
    /**
11306
     * Get part of a string.
11307
     *
11308
     * EXAMPLE: <code>UTF8::substr('中文空白', 1, 2); // '文空'</code>
11309
     *
11310
     * @see http://php.net/manual/en/function.mb-substr.php
11311
     *
11312
     * @param string   $str        <p>The string being checked.</p>
11313
     * @param int      $offset     <p>The first position used in str.</p>
11314
     * @param int|null $length     [optional] <p>The maximum length of the returned string.</p>
11315
     * @param string   $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
11316
     * @param bool     $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
11317
     *
11318
     * @psalm-pure
11319
     *
11320
     * @return false|string
11321
     *                      The portion of <i>str</i> specified by the <i>offset</i> and
11322
     *                      <i>length</i> parameters.</p><p>If <i>str</i> is shorter than <i>offset</i>
11323
     *                      characters long, <b>FALSE</b> will be returned.
11324
     */
11325 172
    public static function substr(
11326
        string $str,
11327
        int $offset = 0,
11328
        int $length = null,
11329
        string $encoding = 'UTF-8',
11330
        bool $clean_utf8 = false
11331
    ) {
11332
        // empty string
11333 172
        if ($str === '' || $length === 0) {
11334 8
            return '';
11335
        }
11336
11337 168
        if ($clean_utf8) {
11338
            // iconv and mbstring are not tolerant to invalid encoding
11339
            // further, their behaviour is inconsistent with that of PHP's substr
11340 2
            $str = self::clean($str);
11341
        }
11342
11343
        // whole string
11344 168
        if (!$offset && $length === null) {
11345 7
            return $str;
11346
        }
11347
11348 163
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
11349 19
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
11350
        }
11351
11352
        //
11353
        // fallback via mbstring
11354
        //
11355
11356 163
        if (self::$SUPPORT['mbstring'] === true && $encoding === 'UTF-8') {
11357 161
            if ($length === null) {
11358 64
                return \mb_substr($str, $offset);
11359
            }
11360
11361 102
            return \mb_substr($str, $offset, $length);
11362
        }
11363
11364
        //
11365
        // fallback for binary || ascii only
11366
        //
11367
11368
        if (
11369 4
            $encoding === 'CP850'
11370
            ||
11371 4
            $encoding === 'ASCII'
11372
        ) {
11373
            if ($length === null) {
11374
                return \substr($str, $offset);
11375
            }
11376
11377
            return \substr($str, $offset, $length);
11378
        }
11379
11380
        // otherwise we need the string-length
11381 4
        $str_length = 0;
11382 4
        if ($offset || $length === null) {
11383 4
            $str_length = self::strlen($str, $encoding);
11384
        }
11385
11386
        // e.g.: invalid chars + mbstring not installed
11387 4
        if ($str_length === false) {
11388
            return false;
11389
        }
11390
11391
        // empty string
11392 4
        if ($offset === $str_length && !$length) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $length of type integer|null is loosely compared to false; this is ambiguous if the integer can be 0. You might want to explicitly use === null instead.

In PHP, under loose comparison (like ==, or !=, or switch conditions), values of different types might be equal.

For integer values, zero is a special case, in particular the following results might be unexpected:

0   == false // true
0   == null  // true
123 == false // false
123 == null  // false

// It is often better to use strict comparison
0 === false // false
0 === null  // false
Loading history...
11393
            return '';
11394
        }
11395
11396
        // impossible
11397 4
        if ($offset && $offset > $str_length) {
11398
            return '';
11399
        }
11400
11401 4
        $length = $length ?? $str_length;
11402
11403
        if (
11404 4
            $encoding !== 'UTF-8'
11405
            &&
11406 4
            self::$SUPPORT['mbstring'] === false
11407
        ) {
11408
            /**
11409
             * @psalm-suppress ImpureFunctionCall - is is only a warning
11410
             */
11411 2
            \trigger_error('UTF8::substr() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
11412
        }
11413
11414
        //
11415
        // fallback via intl
11416
        //
11417
11418
        if (
11419 4
            $encoding === 'UTF-8' // INFO: "grapheme_substr()" can't handle other encodings
11420
            &&
11421 4
            $offset >= 0 // grapheme_substr() can't handle negative offset
11422
            &&
11423 4
            self::$SUPPORT['intl'] === true
11424
        ) {
11425
            $return_tmp = \grapheme_substr($str, $offset, $length);
11426
            if ($return_tmp !== false) {
11427
                return $return_tmp;
11428
            }
11429
        }
11430
11431
        //
11432
        // fallback via iconv
11433
        //
11434
11435
        if (
11436 4
            $length >= 0 // "iconv_substr()" can't handle negative length
11437
            &&
11438 4
            self::$SUPPORT['iconv'] === true
11439
        ) {
11440
            $return_tmp = \iconv_substr($str, $offset, $length);
11441
            if ($return_tmp !== false) {
11442
                return $return_tmp;
11443
            }
11444
        }
11445
11446
        //
11447
        // fallback for ascii only
11448
        //
11449
11450 4
        if (ASCII::is_ascii($str)) {
11451
            return \substr($str, $offset, $length);
11452
        }
11453
11454
        //
11455
        // fallback via vanilla php
11456
        //
11457
11458
        // split to array, and remove invalid characters
11459
        // &&
11460
        // extract relevant part, and join to make sting again
11461 4
        return \implode('', \array_slice(self::str_split($str), $offset, $length));
11462
    }
11463
11464
    /**
11465
     * Binary-safe comparison of two strings from an offset, up to a length of characters.
11466
     *
11467
     * EXAMPLE: <code>
11468
     * UTF8::substr_compare("○●◎\r", '●◎', 0, 2); // -1
11469
     * UTF8::substr_compare("○●◎\r", '◎●', 1, 2); // 1
11470
     * UTF8::substr_compare("○●◎\r", '●◎', 1, 2); // 0
11471
     * </code>
11472
     *
11473
     * @param string   $str1               <p>The main string being compared.</p>
11474
     * @param string   $str2               <p>The secondary string being compared.</p>
11475
     * @param int      $offset             [optional] <p>The start position for the comparison. If negative, it starts
11476
     *                                     counting from the end of the string.</p>
11477
     * @param int|null $length             [optional] <p>The length of the comparison. The default value is the largest
11478
     *                                     of the length of the str compared to the length of main_str less the
11479
     *                                     offset.</p>
11480
     * @param bool     $case_insensitivity [optional] <p>If case_insensitivity is TRUE, comparison is case
11481
     *                                     insensitive.</p>
11482
     * @param string   $encoding           [optional] <p>Set the charset for e.g. "mb_" function</p>
11483
     *
11484
     * @psalm-pure
11485
     *
11486
     * @return int
11487
     *             <strong>&lt; 0</strong> if str1 is less than str2;<br>
11488
     *             <strong>&gt; 0</strong> if str1 is greater than str2,<br>
11489
     *             <strong>0</strong> if they are equal
11490
     */
11491 2
    public static function substr_compare(
11492
        string $str1,
11493
        string $str2,
11494
        int $offset = 0,
11495
        int $length = null,
11496
        bool $case_insensitivity = false,
11497
        string $encoding = 'UTF-8'
11498
    ): int {
11499
        if (
11500 2
            $offset !== 0
11501
            ||
11502 2
            $length !== null
11503
        ) {
11504 2
            if ($encoding === 'UTF-8') {
11505 2
                if ($length === null) {
11506 2
                    $str1 = (string) \mb_substr($str1, $offset);
11507
                } else {
11508 2
                    $str1 = (string) \mb_substr($str1, $offset, $length);
11509
                }
11510 2
                $str2 = (string) \mb_substr($str2, 0, (int) self::strlen($str1));
11511
            } else {
11512
                $encoding = self::normalize_encoding($encoding, 'UTF-8');
11513
11514
                $str1 = (string) self::substr($str1, $offset, $length, $encoding);
11515
                $str2 = (string) self::substr($str2, 0, (int) self::strlen($str1), $encoding);
11516
            }
11517
        }
11518
11519 2
        if ($case_insensitivity) {
11520 2
            return self::strcasecmp($str1, $str2, $encoding);
11521
        }
11522
11523 2
        return self::strcmp($str1, $str2);
11524
    }
11525
11526
    /**
11527
     * Count the number of substring occurrences.
11528
     *
11529
     * EXAMPLE: <code>UTF8::substr_count('中文空白', '文空', 1, 2); // 1</code>
11530
     *
11531
     * @see http://php.net/manual/en/function.substr-count.php
11532
     *
11533
     * @param string   $haystack   <p>The string to search in.</p>
11534
     * @param string   $needle     <p>The substring to search for.</p>
11535
     * @param int      $offset     [optional] <p>The offset where to start counting.</p>
11536
     * @param int|null $length     [optional] <p>
11537
     *                             The maximum length after the specified offset to search for the
11538
     *                             substring. It outputs a warning if the offset plus the length is
11539
     *                             greater than the haystack length.
11540
     *                             </p>
11541
     * @param string   $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
11542
     * @param bool     $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
11543
     *
11544
     * @psalm-pure
11545
     *
11546
     * @return false|int
11547
     *                   <p>This functions returns an integer or false if there isn't a string.</p>
11548
     */
11549 5
    public static function substr_count(
11550
        string $haystack,
11551
        string $needle,
11552
        int $offset = 0,
11553
        int $length = null,
11554
        string $encoding = 'UTF-8',
11555
        bool $clean_utf8 = false
11556
    ) {
11557 5
        if ($needle === '') {
11558 2
            return false;
11559
        }
11560
11561 5
        if ($haystack === '') {
11562 2
            if (\PHP_VERSION_ID >= 80000) {
11563 2
                return 0;
11564
            }
11565
11566
            return 0;
11567
        }
11568
11569 5
        if ($length === 0) {
11570 2
            return 0;
11571
        }
11572
11573 5
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
11574 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
11575
        }
11576
11577 5
        if ($clean_utf8) {
11578
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
11579
            // if invalid characters are found in $haystack before $needle
11580
            $needle = self::clean($needle);
11581
            $haystack = self::clean($haystack);
11582
        }
11583
11584 5
        if ($offset || $length > 0) {
11585 2
            if ($length === null) {
11586 2
                $length_tmp = self::strlen($haystack, $encoding);
11587 2
                if ($length_tmp === false) {
11588
                    return false;
11589
                }
11590 2
                $length = $length_tmp;
11591
            }
11592
11593 2
            if ($encoding === 'UTF-8') {
11594 2
                $haystack = (string) \mb_substr($haystack, $offset, $length);
11595
            } else {
11596 2
                $haystack = (string) \mb_substr($haystack, $offset, $length, $encoding);
11597
            }
11598
        }
11599
11600
        if (
11601 5
            $encoding !== 'UTF-8'
11602
            &&
11603 5
            self::$SUPPORT['mbstring'] === false
11604
        ) {
11605
            /**
11606
             * @psalm-suppress ImpureFunctionCall - is is only a warning
11607
             */
11608
            \trigger_error('UTF8::substr_count() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
11609
        }
11610
11611 5
        if (self::$SUPPORT['mbstring'] === true) {
11612 5
            if ($encoding === 'UTF-8') {
11613 5
                return \mb_substr_count($haystack, $needle);
11614
            }
11615
11616 2
            return \mb_substr_count($haystack, $needle, $encoding);
11617
        }
11618
11619
        \preg_match_all('/' . \preg_quote($needle, '/') . '/us', $haystack, $matches, \PREG_SET_ORDER);
11620
11621
        return \count($matches);
11622
    }
11623
11624
    /**
11625
     * Count the number of substring occurrences.
11626
     *
11627
     * @param string   $haystack <p>
11628
     *                           The string being checked.
11629
     *                           </p>
11630
     * @param string   $needle   <p>
11631
     *                           The string being found.
11632
     *                           </p>
11633
     * @param int      $offset   [optional] <p>
11634
     *                           The offset where to start counting
11635
     *                           </p>
11636
     * @param int|null $length   [optional] <p>
11637
     *                           The maximum length after the specified offset to search for the
11638
     *                           substring. It outputs a warning if the offset plus the length is
11639
     *                           greater than the haystack length.
11640
     *                           </p>
11641
     *
11642
     * @psalm-pure
11643
     *
11644
     * @return false|int
11645
     *                   <p>The number of times the
11646
     *                   needle substring occurs in the
11647
     *                   haystack string.</p>
11648
     */
11649 4
    public static function substr_count_in_byte(
11650
        string $haystack,
11651
        string $needle,
11652
        int $offset = 0,
11653
        int $length = null
11654
    ) {
11655 4
        if ($haystack === '' || $needle === '') {
11656 1
            return 0;
11657
        }
11658
11659
        if (
11660 3
            ($offset || $length !== null)
11661
            &&
11662 3
            self::$SUPPORT['mbstring_func_overload'] === true
11663
        ) {
11664
            if ($length === null) {
11665
                $length_tmp = self::strlen($haystack);
11666
                if ($length_tmp === false) {
11667
                    return false;
11668
                }
11669
                $length = $length_tmp;
11670
            }
11671
11672
            if (
11673
                (
11674
                    $length !== 0
11675
                    &&
11676
                    $offset !== 0
11677
                )
11678
                &&
11679
                ($length + $offset) <= 0
11680
                &&
11681
                \PHP_VERSION_ID < 71000 // output from "substr_count()" have changed in PHP 7.1
11682
            ) {
11683
                return false;
11684
            }
11685
11686
            /** @var false|string $haystack_tmp - needed for PhpStan (stubs error) */
11687
            $haystack_tmp = \substr($haystack, $offset, $length);
11688
            if ($haystack_tmp === false) {
11689
                $haystack_tmp = '';
11690
            }
11691
            $haystack = (string) $haystack_tmp;
11692
        }
11693
11694 3
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
11695
            // "mb_" is available if overload is used, so use it ...
11696
            return \mb_substr_count($haystack, $needle, 'CP850'); // 8-BIT
11697
        }
11698
11699 3
        if ($length === null) {
11700 3
            return \substr_count($haystack, $needle, $offset);
11701
        }
11702
11703
        return \substr_count($haystack, $needle, $offset, $length);
11704
    }
11705
11706
    /**
11707
     * Returns the number of occurrences of $substring in the given string.
11708
     * By default, the comparison is case-sensitive, but can be made insensitive
11709
     * by setting $case_sensitive to false.
11710
     *
11711
     * @param string $str            <p>The input string.</p>
11712
     * @param string $substring      <p>The substring to search for.</p>
11713
     * @param bool   $case_sensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
11714
     * @param string $encoding       [optional] <p>Set the charset for e.g. "mb_" function</p>
11715
     *
11716
     * @psalm-pure
11717
     *
11718
     * @return int
11719
     */
11720 15
    public static function substr_count_simple(
11721
        string $str,
11722
        string $substring,
11723
        bool $case_sensitive = true,
11724
        string $encoding = 'UTF-8'
11725
    ): int {
11726 15
        if ($str === '' || $substring === '') {
11727 2
            return 0;
11728
        }
11729
11730 13
        if ($encoding === 'UTF-8') {
11731 7
            if ($case_sensitive) {
11732
                return (int) \mb_substr_count($str, $substring);
11733
            }
11734
11735 7
            return (int) \mb_substr_count(
11736 7
                \mb_strtoupper($str),
11737 7
                \mb_strtoupper($substring)
11738
            );
11739
        }
11740
11741 6
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
11742
11743 6
        if ($case_sensitive) {
11744 3
            return (int) \mb_substr_count($str, $substring, $encoding);
11745
        }
11746
11747 3
        return (int) \mb_substr_count(
11748 3
            self::strtocasefold($str, true, false, $encoding, null, false),
11749 3
            self::strtocasefold($substring, true, false, $encoding, null, false),
11750 3
            $encoding
11751
        );
11752
    }
11753
11754
    /**
11755
     * Removes a prefix ($needle) from the beginning of the string ($haystack), case-insensitive.
11756
     *
11757
     * EXMAPLE: <code>
11758
     * UTF8::substr_ileft('ΚόσμεMiddleEnd', 'Κόσμε'); // 'MiddleEnd'
11759
     * UTF8::substr_ileft('ΚόσμεMiddleEnd', 'κόσμε'); // 'MiddleEnd'
11760
     * </code>
11761
     *
11762
     * @param string $haystack <p>The string to search in.</p>
11763
     * @param string $needle   <p>The substring to search for.</p>
11764
     *
11765
     * @psalm-pure
11766
     *
11767
     * @return string
11768
     *                <p>Return the sub-string.</p>
11769
     */
11770 2
    public static function substr_ileft(string $haystack, string $needle): string
11771
    {
11772 2
        if ($haystack === '') {
11773 2
            return '';
11774
        }
11775
11776 2
        if ($needle === '') {
11777 2
            return $haystack;
11778
        }
11779
11780 2
        if (self::str_istarts_with($haystack, $needle)) {
11781 2
            $haystack = (string) \mb_substr($haystack, (int) self::strlen($needle));
11782
        }
11783
11784 2
        return $haystack;
11785
    }
11786
11787
    /**
11788
     * Get part of a string process in bytes.
11789
     *
11790
     * @param string   $str    <p>The string being checked.</p>
11791
     * @param int      $offset <p>The first position used in str.</p>
11792
     * @param int|null $length [optional] <p>The maximum length of the returned string.</p>
11793
     *
11794
     * @psalm-pure
11795
     *
11796
     * @return false|string
11797
     *                      The portion of <i>str</i> specified by the <i>offset</i> and
11798
     *                      <i>length</i> parameters.</p><p>If <i>str</i> is shorter than <i>offset</i>
11799
     *                      characters long, <b>FALSE</b> will be returned.
11800
     */
11801 1
    public static function substr_in_byte(string $str, int $offset = 0, int $length = null)
11802
    {
11803
        // empty string
11804 1
        if ($str === '' || $length === 0) {
11805
            return '';
11806
        }
11807
11808
        // whole string
11809 1
        if (!$offset && $length === null) {
11810
            return $str;
11811
        }
11812
11813 1
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
11814
            // "mb_" is available if overload is used, so use it ...
11815
            return \mb_substr($str, $offset, $length, 'CP850'); // 8-BIT
11816
        }
11817
11818 1
        return \substr($str, $offset, $length ?? 2147483647);
11819
    }
11820
11821
    /**
11822
     * Removes a suffix ($needle) from the end of the string ($haystack), case-insensitive.
11823
     *
11824
     * EXAMPLE: <code>
11825
     * UTF8::substr_iright('BeginMiddleΚόσμε', 'Κόσμε'); // 'BeginMiddle'
11826
     * UTF8::substr_iright('BeginMiddleΚόσμε', 'κόσμε'); // 'BeginMiddle'
11827
     * </code>
11828
     *
11829
     * @param string $haystack <p>The string to search in.</p>
11830
     * @param string $needle   <p>The substring to search for.</p>
11831
     *
11832
     * @psalm-pure
11833
     *
11834
     * @return string
11835
     *                <p>Return the sub-string.<p>
11836
     */
11837 2
    public static function substr_iright(string $haystack, string $needle): string
11838
    {
11839 2
        if ($haystack === '') {
11840 2
            return '';
11841
        }
11842
11843 2
        if ($needle === '') {
11844 2
            return $haystack;
11845
        }
11846
11847 2
        if (self::str_iends_with($haystack, $needle)) {
11848 2
            $haystack = (string) \mb_substr($haystack, 0, (int) self::strlen($haystack) - (int) self::strlen($needle));
11849
        }
11850
11851 2
        return $haystack;
11852
    }
11853
11854
    /**
11855
     * Removes a prefix ($needle) from the beginning of the string ($haystack).
11856
     *
11857
     * EXAMPLE: <code>
11858
     * UTF8::substr_left('ΚόσμεMiddleEnd', 'Κόσμε'); // 'MiddleEnd'
11859
     * UTF8::substr_left('ΚόσμεMiddleEnd', 'κόσμε'); // 'ΚόσμεMiddleEnd'
11860
     * </code>
11861
     *
11862
     * @param string $haystack <p>The string to search in.</p>
11863
     * @param string $needle   <p>The substring to search for.</p>
11864
     *
11865
     * @psalm-pure
11866
     *
11867
     * @return string
11868
     *                <p>Return the sub-string.</p>
11869
     */
11870 2
    public static function substr_left(string $haystack, string $needle): string
11871
    {
11872 2
        if ($haystack === '') {
11873 2
            return '';
11874
        }
11875
11876 2
        if ($needle === '') {
11877 2
            return $haystack;
11878
        }
11879
11880 2
        if (self::str_starts_with($haystack, $needle)) {
11881 2
            $haystack = (string) \mb_substr($haystack, (int) self::strlen($needle));
11882
        }
11883
11884 2
        return $haystack;
11885
    }
11886
11887
    /**
11888
     * Replace text within a portion of a string.
11889
     *
11890
     * EXAMPLE: <code>UTF8::substr_replace(array('Iñtërnâtiônàlizætiøn', 'foo'), 'æ', 1); // array('Iæñtërnâtiônàlizætiøn', 'fæoo')</code>
11891
     *
11892
     * source: https://gist.github.com/stemar/8287074
11893
     *
11894
     * @param string|string[] $str         <p>The input string or an array of stings.</p>
11895
     * @param string|string[] $replacement <p>The replacement string or an array of stings.</p>
11896
     * @param int|int[]       $offset      <p>
11897
     *                                     If start is positive, the replacing will begin at the start'th offset
11898
     *                                     into string.
11899
     *                                     <br><br>
11900
     *                                     If start is negative, the replacing will begin at the start'th character
11901
     *                                     from the end of string.
11902
     *                                     </p>
11903
     * @param int|int[]|null  $length      [optional] <p>If given and is positive, it represents the length of the
11904
     *                                     portion of string which is to be replaced. If it is negative, it
11905
     *                                     represents the number of characters from the end of string at which to
11906
     *                                     stop replacing. If it is not given, then it will default to strlen(
11907
     *                                     string ); i.e. end the replacing at the end of string. Of course, if
11908
     *                                     length is zero then this function will have the effect of inserting
11909
     *                                     replacement into string at the given start offset.</p>
11910
     * @param string          $encoding    [optional] <p>Set the charset for e.g. "mb_" function</p>
11911
     *
11912
     * @psalm-pure
11913
     *
11914
     * @return string|string[]
11915
     *                         <p>The result string is returned. If string is an array then array is returned.</p>
11916
     *
11917
     * @template TSubstrReplace
11918
     * @phpstan-param TSubstrReplace $str
11919
     * @phpstan-return TSubstrReplace
11920
     */
11921 10
    public static function substr_replace(
11922
        $str,
11923
        $replacement,
11924
        $offset,
11925
        $length = null,
11926
        string $encoding = 'UTF-8'
11927
    ) {
11928 10
        if (\is_array($str)) {
11929 1
            $num = \count($str);
11930
11931
            // the replacement
11932 1
            if (\is_array($replacement)) {
11933 1
                $replacement = \array_slice($replacement, 0, $num);
11934
            } else {
11935 1
                $replacement = \array_pad([$replacement], $num, $replacement);
11936
            }
11937
11938
            // the offset
11939 1
            if (\is_array($offset)) {
11940 1
                $offset = \array_slice($offset, 0, $num);
11941 1
                foreach ($offset as &$value_tmp) {
11942 1
                    $value_tmp = (int) $value_tmp === $value_tmp ? $value_tmp : 0;
11943
                }
11944 1
                unset($value_tmp);
11945
            } else {
11946 1
                $offset = \array_pad([$offset], $num, $offset);
11947
            }
11948
11949
            // the length
11950 1
            if ($length === null) {
11951 1
                $length = \array_fill(0, $num, 0);
11952 1
            } elseif (\is_array($length)) {
11953 1
                $length = \array_slice($length, 0, $num);
11954 1
                foreach ($length as &$value_tmp_V2) {
11955 1
                    $value_tmp_V2 = (int) $value_tmp_V2 === $value_tmp_V2 ? $value_tmp_V2 : $num;
11956
                }
11957 1
                unset($value_tmp_V2);
11958
            } else {
11959 1
                $length = \array_pad([$length], $num, $length);
11960
            }
11961
11962
            // recursive call
11963
            /** @phpstan-ignore-next-line - phpstan currently can't handle recursive calls */
11964 1
            return \array_map([self::class, 'substr_replace'], $str, $replacement, $offset, $length);
11965
        }
11966
11967 10
        if (\is_array($replacement)) {
11968 1
            if ($replacement !== []) {
11969 1
                $replacement = $replacement[0];
11970
            } else {
11971 1
                $replacement = '';
11972
            }
11973
        }
11974
11975
        // init
11976 10
        $str = (string) $str;
11977 10
        $replacement = (string) $replacement;
11978
11979 10
        if (\is_array($length)) {
11980
            throw new \InvalidArgumentException('Parameter "$length" can only be an array, if "$str" is also an array.');
11981
        }
11982
11983 10
        if (\is_array($offset)) {
11984
            throw new \InvalidArgumentException('Parameter "$offset" can only be an array, if "$str" is also an array.');
11985
        }
11986
11987 10
        if ($str === '') {
11988 5
            return $replacement;
11989
        }
11990
11991 9
        if (self::$SUPPORT['mbstring'] === true) {
11992 9
            $string_length = (int) self::strlen($str, $encoding);
11993
11994 9
            if ($offset < 0) {
11995 1
                $offset = (int) \max(0, $string_length + $offset);
11996 9
            } elseif ($offset > $string_length) {
11997 1
                $offset = $string_length;
11998
            }
11999
12000 9
            if ($length !== null && $length < 0) {
12001 1
                $length = (int) \max(0, $string_length - $offset + $length);
12002 9
            } elseif ($length === null || $length > $string_length) {
12003 4
                $length = $string_length;
12004
            }
12005
12006 9
            if (($offset + $length) > $string_length) {
12007 4
                $length = $string_length - $offset;
12008
            }
12009
12010 9
            return ((string) \mb_substr($str, 0, $offset, $encoding)) .
12011 9
                   $replacement .
12012 9
                   ((string) \mb_substr($str, $offset + $length, $string_length - $offset - $length, $encoding));
12013
        }
12014
12015
        //
12016
        // fallback for ascii only
12017
        //
12018
12019
        if (ASCII::is_ascii($str)) {
12020
            return ($length === null) ?
12021
                \substr_replace($str, $replacement, $offset) :
12022
                \substr_replace($str, $replacement, $offset, $length);
12023
        }
12024
12025
        //
12026
        // fallback via vanilla php
12027
        //
12028
12029
        \preg_match_all('/./us', $str, $str_matches);
12030
        \preg_match_all('/./us', $replacement, $replacement_matches);
12031
12032
        if ($length === null) {
12033
            $length_tmp = self::strlen($str, $encoding);
12034
            if ($length_tmp === false) {
12035
                // e.g.: non mbstring support + invalid chars
12036
                return '';
12037
            }
12038
            $length = $length_tmp;
12039
        }
12040
12041
        \array_splice($str_matches[0], $offset, $length, $replacement_matches[0]);
12042
12043
        return \implode('', $str_matches[0]);
12044
    }
12045
12046
    /**
12047
     * Removes a suffix ($needle) from the end of the string ($haystack).
12048
     *
12049
     * EXAMPLE: <code>
12050
     * UTF8::substr_right('BeginMiddleΚόσμε', 'Κόσμε'); // 'BeginMiddle'
12051
     * UTF8::substr_right('BeginMiddleΚόσμε', 'κόσμε'); // 'BeginMiddleΚόσμε'
12052
     * </code>
12053
     *
12054
     * @param string $haystack <p>The string to search in.</p>
12055
     * @param string $needle   <p>The substring to search for.</p>
12056
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
12057
     *
12058
     * @psalm-pure
12059
     *
12060
     * @return string
12061
     *                <p>Return the sub-string.</p>
12062
     */
12063 2
    public static function substr_right(
12064
        string $haystack,
12065
        string $needle,
12066
        string $encoding = 'UTF-8'
12067
    ): string {
12068 2
        if ($haystack === '') {
12069 2
            return '';
12070
        }
12071
12072 2
        if ($needle === '') {
12073 2
            return $haystack;
12074
        }
12075
12076
        if (
12077 2
            $encoding === 'UTF-8'
12078
            &&
12079 2
            \substr($haystack, -\strlen($needle)) === $needle
12080
        ) {
12081 2
            return (string) \mb_substr($haystack, 0, (int) \mb_strlen($haystack) - (int) \mb_strlen($needle));
12082
        }
12083
12084 2
        if (\substr($haystack, -\strlen($needle)) === $needle) {
12085
            return (string) self::substr(
12086
                $haystack,
12087
                0,
12088
                (int) self::strlen($haystack, $encoding) - (int) self::strlen($needle, $encoding),
12089
                $encoding
12090
            );
12091
        }
12092
12093 2
        return $haystack;
12094
    }
12095
12096
    /**
12097
     * Returns a case swapped version of the string.
12098
     *
12099
     * EXAMPLE: <code>UTF8::swapCase('déJÀ σσς iıII'); // 'DÉjà ΣΣΣ IIii'</code>
12100
     *
12101
     * @param string $str        <p>The input string.</p>
12102
     * @param string $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
12103
     * @param bool   $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
12104
     *
12105
     * @psalm-pure
12106
     *
12107
     * @return string
12108
     *                <p>Each character's case swapped.</p>
12109
     */
12110 6
    public static function swapCase(string $str, string $encoding = 'UTF-8', bool $clean_utf8 = false): string
12111
    {
12112 6
        if ($str === '') {
12113 1
            return '';
12114
        }
12115
12116 6
        if ($clean_utf8) {
12117
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
12118
            // if invalid characters are found in $haystack before $needle
12119 2
            $str = self::clean($str);
12120
        }
12121
12122 6
        if ($encoding === 'UTF-8') {
12123 4
            return (string) (\mb_strtolower($str) ^ \mb_strtoupper($str) ^ $str);
12124
        }
12125
12126 4
        return (string) (self::strtolower($str, $encoding) ^ self::strtoupper($str, $encoding) ^ $str);
12127
    }
12128
12129
    /**
12130
     * Checks whether symfony-polyfills are used.
12131
     *
12132
     * @psalm-pure
12133
     *
12134
     * @return bool
12135
     *              <p><strong>true</strong> if in use, <strong>false</strong> otherwise</p>
12136
     *
12137
     * @internal <p>Please do not use it anymore, we will make is private in next major version.</p>
12138
     */
12139
    public static function symfony_polyfill_used(): bool
12140
    {
12141
        // init
12142
        $return = false;
12143
12144
        $return_tmp = \extension_loaded('mbstring');
12145
        if (!$return_tmp && \function_exists('mb_strlen')) {
12146
            $return = true;
12147
        }
12148
12149
        $return_tmp = \extension_loaded('iconv');
12150
        if (!$return_tmp && \function_exists('iconv')) {
12151
            $return = true;
12152
        }
12153
12154
        return $return;
12155
    }
12156
12157
    /**
12158
     * @param string $str
12159
     * @param int    $tab_length
12160
     *
12161
     * @psalm-pure
12162
     *
12163
     * @return string
12164
     */
12165 6
    public static function tabs_to_spaces(string $str, int $tab_length = 4): string
12166
    {
12167 6
        if ($tab_length === 4) {
12168 3
            $spaces = '    ';
12169 3
        } elseif ($tab_length === 2) {
12170 1
            $spaces = '  ';
12171
        } else {
12172 2
            $spaces = \str_repeat(' ', $tab_length);
12173
        }
12174
12175 6
        return \str_replace("\t", $spaces, $str);
12176
    }
12177
12178
    /**
12179
     * Converts the first character of each word in the string to uppercase
12180
     * and all other chars to lowercase.
12181
     *
12182
     * @param string      $str                           <p>The input string.</p>
12183
     * @param string      $encoding                      [optional] <p>Set the charset for e.g. "mb_" function</p>
12184
     * @param bool        $clean_utf8                    [optional] <p>Remove non UTF-8 chars from the string.</p>
12185
     * @param string|null $lang                          [optional] <p>Set the language for special cases: az, el, lt,
12186
     *                                                   tr</p>
12187
     * @param bool        $try_to_keep_the_string_length [optional] <p>true === try to keep the string length: e.g. ẞ
12188
     *                                                   -> ß</p>
12189
     *
12190
     * @psalm-pure
12191
     *
12192
     * @return string
12193
     *                <p>A string with all characters of $str being title-cased.</p>
12194
     */
12195 5
    public static function titlecase(
12196
        string $str,
12197
        string $encoding = 'UTF-8',
12198
        bool $clean_utf8 = false,
12199
        string $lang = null,
12200
        bool $try_to_keep_the_string_length = false
12201
    ): string {
12202 5
        if ($clean_utf8) {
12203
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
12204
            // if invalid characters are found in $haystack before $needle
12205
            $str = self::clean($str);
12206
        }
12207
12208
        if (
12209 5
            $lang === null
12210
            &&
12211 5
            !$try_to_keep_the_string_length
12212
        ) {
12213 5
            if ($encoding === 'UTF-8') {
12214 3
                return \mb_convert_case($str, \MB_CASE_TITLE);
12215
            }
12216
12217 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
12218
12219 2
            return \mb_convert_case($str, \MB_CASE_TITLE, $encoding);
12220
        }
12221
12222
        return self::str_titleize(
12223
            $str,
12224
            null,
12225
            $encoding,
12226
            false,
12227
            $lang,
12228
            $try_to_keep_the_string_length,
12229
            false
12230
        );
12231
    }
12232
12233
    /**
12234
     * Convert a string into ASCII.
12235
     *
12236
     * EXAMPLE: <code>UTF8::to_ascii('déjà σσς iıii'); // 'deja sss iiii'</code>
12237
     *
12238
     * @param string $str     <p>The input string.</p>
12239
     * @param string $unknown [optional] <p>Character use if character unknown. (default is ?)</p>
12240
     * @param bool   $strict  [optional] <p>Use "transliterator_transliterate()" from PHP-Intl | WARNING: bad
12241
     *                        performance</p>
12242
     *
12243
     * @psalm-pure
12244
     *
12245
     * @return string
12246
     */
12247 37
    public static function to_ascii(
12248
        string $str,
12249
        string $unknown = '?',
12250
        bool $strict = false
12251
    ): string {
12252 37
        return ASCII::to_transliterate($str, $unknown, $strict);
12253
    }
12254
12255
    /**
12256
     * @param bool|float|int|string $str
12257
     *
12258
     * @psalm-pure
12259
     *
12260
     * @return bool
12261
     */
12262 25
    public static function to_boolean($str): bool
12263
    {
12264
        // init
12265 25
        $str = (string) $str;
12266
12267 25
        if ($str === '') {
12268 2
            return false;
12269
        }
12270
12271
        // Info: http://php.net/manual/en/filter.filters.validate.php
12272 23
        $map = [
12273
            'true'  => true,
12274
            '1'     => true,
12275
            'on'    => true,
12276
            'yes'   => true,
12277
            'false' => false,
12278
            '0'     => false,
12279
            'off'   => false,
12280
            'no'    => false,
12281
        ];
12282
12283 23
        if (isset($map[$str])) {
12284 13
            return $map[$str];
12285
        }
12286
12287 10
        $key = \strtolower($str);
12288 10
        if (isset($map[$key])) {
12289 2
            return $map[$key];
12290
        }
12291
12292 8
        if (\is_numeric($str)) {
12293 6
            return ((float) $str) > 0;
12294
        }
12295
12296 2
        return (bool) \trim($str);
12297
    }
12298
12299
    /**
12300
     * Convert given string to safe filename (and keep string case).
12301
     *
12302
     * @param string $str
12303
     * @param bool   $use_transliterate No transliteration, conversion etc. is done by default - unsafe characters are
12304
     *                                  simply replaced with hyphen.
12305
     * @param string $fallback_char
12306
     *
12307
     * @psalm-pure
12308
     *
12309
     * @return string
12310
     */
12311 1
    public static function to_filename(
12312
        string $str,
12313
        bool $use_transliterate = false,
12314
        string $fallback_char = '-'
12315
    ): string {
12316 1
        return ASCII::to_filename(
12317 1
            $str,
12318
            $use_transliterate,
12319
            $fallback_char
12320
        );
12321
    }
12322
12323
    /**
12324
     * Convert a string into "ISO-8859"-encoding (Latin-1).
12325
     *
12326
     * EXAMPLE: <code>UTF8::to_utf8(UTF8::to_iso8859('  -ABC-中文空白-  ')); // '  -ABC-????-  '</code>
12327
     *
12328
     * @param string|string[] $str
12329
     *
12330
     * @psalm-pure
12331
     *
12332
     * @return string|string[]
12333
     *
12334
     * @template TToIso8859
12335
     * @phpstan-param TToIso8859 $str
12336
     * @phpstan-return TToIso8859
12337
     */
12338 8
    public static function to_iso8859($str)
12339
    {
12340 8
        if (\is_array($str)) {
12341 2
            foreach ($str as &$v) {
12342 2
                $v = self::to_iso8859($v);
12343
            }
12344
12345 2
            return $str;
12346
        }
12347
12348 8
        $str = (string) $str;
12349 8
        if ($str === '') {
12350 2
            return '';
12351
        }
12352
12353 8
        return self::utf8_decode($str);
12354
    }
12355
12356
    /**
12357
     * This function leaves UTF-8 characters alone, while converting almost all non-UTF8 to UTF8.
12358
     *
12359
     * <ul>
12360
     * <li>It decode UTF-8 codepoints and Unicode escape sequences.</li>
12361
     * <li>It assumes that the encoding of the original string is either WINDOWS-1252 or ISO-8859.</li>
12362
     * <li>WARNING: It does not remove invalid UTF-8 characters, so you maybe need to use "UTF8::clean()" for this
12363
     * case.</li>
12364
     * </ul>
12365
     *
12366
     * EXAMPLE: <code>UTF8::to_utf8(["\u0063\u0061\u0074"]); // array('cat')</code>
12367
     *
12368
     * @param string|string[] $str                        <p>Any string or array of strings.</p>
12369
     * @param bool            $decode_html_entity_to_utf8 <p>Set to true, if you need to decode html-entities.</p>
12370
     *
12371
     * @psalm-pure
12372
     *
12373
     * @return string|string[]
12374
     *                         <p>The UTF-8 encoded string</p>
12375
     *
12376
     * @template TToUtf8
12377
     * @phpstan-param TToUtf8 $str
12378
     * @phpstan-return TToUtf8
12379
     */
12380 42
    public static function to_utf8($str, bool $decode_html_entity_to_utf8 = false)
12381
    {
12382 42
        if (\is_array($str)) {
12383 4
            foreach ($str as &$v) {
12384 4
                $v = self::to_utf8_string($v, $decode_html_entity_to_utf8);
12385
            }
12386
12387
            /** @phpstan-var TToUtf8 $str */
12388 4
            return $str;
12389
        }
12390
12391
        /** @phpstan-var TToUtf8 $str */
12392 42
        $str = self::to_utf8_string($str, $decode_html_entity_to_utf8);
12393
12394 42
        return $str;
12395
    }
12396
12397
    /**
12398
     * This function leaves UTF-8 characters alone, while converting almost all non-UTF8 to UTF8.
12399
     *
12400
     * <ul>
12401
     * <li>It decode UTF-8 codepoints and Unicode escape sequences.</li>
12402
     * <li>It assumes that the encoding of the original string is either WINDOWS-1252 or ISO-8859.</li>
12403
     * <li>WARNING: It does not remove invalid UTF-8 characters, so you maybe need to use "UTF8::clean()" for this
12404
     * case.</li>
12405
     * </ul>
12406
     *
12407
     * EXAMPLE: <code>UTF8::to_utf8_string("\u0063\u0061\u0074"); // 'cat'</code>
12408
     *
12409
     * @param string $str                        <p>Any string.</p>
12410
     * @param bool   $decode_html_entity_to_utf8 <p>Set to true, if you need to decode html-entities.</p>
12411
     *
12412
     * @psalm-pure
12413
     *
12414
     * @return string
12415
     *                <p>The UTF-8 encoded string</p>
12416
     */
12417 42
    public static function to_utf8_string(string $str, bool $decode_html_entity_to_utf8 = false): string
12418
    {
12419 42
        if ($str === '') {
12420 7
            return $str;
12421
        }
12422
12423 42
        $max = \strlen($str);
12424 42
        $buf = '';
12425
12426 42
        for ($i = 0; $i < $max; ++$i) {
12427 42
            $c1 = $str[$i];
12428
12429 42
            if ($c1 >= "\xC0") { // should be converted to UTF8, if it's not UTF8 already
12430
12431 38
                if ($c1 <= "\xDF") { // looks like 2 bytes UTF8
12432
12433 35
                    $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
12434
12435 35
                    if ($c2 >= "\x80" && $c2 <= "\xBF") { // yeah, almost sure it's UTF8 already
12436 21
                        $buf .= $c1 . $c2;
12437 21
                        ++$i;
12438
                    } else { // not valid UTF8 - convert it
12439 35
                        $buf .= self::to_utf8_convert_helper($c1);
12440
                    }
12441 35
                } elseif ($c1 >= "\xE0" && $c1 <= "\xEF") { // looks like 3 bytes UTF8
12442
12443 34
                    $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
12444 34
                    $c3 = $i + 2 >= $max ? "\x00" : $str[$i + 2];
12445
12446 34
                    if ($c2 >= "\x80" && $c2 <= "\xBF" && $c3 >= "\x80" && $c3 <= "\xBF") { // yeah, almost sure it's UTF8 already
12447 17
                        $buf .= $c1 . $c2 . $c3;
12448 17
                        $i += 2;
12449
                    } else { // not valid UTF8 - convert it
12450 34
                        $buf .= self::to_utf8_convert_helper($c1);
12451
                    }
12452 28
                } elseif ($c1 >= "\xF0" && $c1 <= "\xF7") { // looks like 4 bytes UTF8
12453
12454 28
                    $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
12455 28
                    $c3 = $i + 2 >= $max ? "\x00" : $str[$i + 2];
12456 28
                    $c4 = $i + 3 >= $max ? "\x00" : $str[$i + 3];
12457
12458 28
                    if ($c2 >= "\x80" && $c2 <= "\xBF" && $c3 >= "\x80" && $c3 <= "\xBF" && $c4 >= "\x80" && $c4 <= "\xBF") { // yeah, almost sure it's UTF8 already
12459 10
                        $buf .= $c1 . $c2 . $c3 . $c4;
12460 10
                        $i += 3;
12461
                    } else { // not valid UTF8 - convert it
12462 28
                        $buf .= self::to_utf8_convert_helper($c1);
12463
                    }
12464
                } else { // doesn't look like UTF8, but should be converted
12465
12466 38
                    $buf .= self::to_utf8_convert_helper($c1);
12467
                }
12468 40
            } elseif (($c1 & "\xC0") === "\x80") { // needs conversion
12469
12470 4
                $buf .= self::to_utf8_convert_helper($c1);
12471
            } else { // it doesn't need conversion
12472
12473 40
                $buf .= $c1;
12474
            }
12475
        }
12476
12477
        // decode unicode escape sequences + unicode surrogate pairs
12478 42
        $buf = \preg_replace_callback(
12479 42
            '/\\\\u([dD][89abAB][0-9a-fA-F]{2})\\\\u([dD][cdefCDEF][\da-fA-F]{2})|\\\\u([0-9a-fA-F]{4})/',
12480
            /**
12481
             * @param array $matches
12482
             *
12483
             * @psalm-pure
12484
             *
12485
             * @return string
12486
             */
12487 42
            static function (array $matches): string {
12488 13
                if (isset($matches[3])) {
12489 13
                    $cp = (int) \hexdec($matches[3]);
12490
                } else {
12491
                    // http://unicode.org/faq/utf_bom.html#utf16-4
12492 1
                    $cp = ((int) \hexdec($matches[1]) << 10)
12493 1
                          + (int) \hexdec($matches[2])
12494 1
                          + 0x10000
12495 1
                          - (0xD800 << 10)
12496 1
                          - 0xDC00;
12497
                }
12498
12499
                // https://github.com/php/php-src/blob/php-7.3.2/ext/standard/html.c#L471
12500
                //
12501
                // php_utf32_utf8(unsigned char *buf, unsigned k)
12502
12503 13
                if ($cp < 0x80) {
12504 8
                    return (string) self::chr($cp);
12505
                }
12506
12507 10
                if ($cp < 0xA0) {
12508
                    /** @noinspection UnnecessaryCastingInspection */
12509
                    return (string) self::chr(0xC0 | $cp >> 6) . (string) self::chr(0x80 | $cp & 0x3F);
12510
                }
12511
12512 10
                return self::decimal_to_chr($cp);
12513 42
            },
12514 42
            $buf
12515
        );
12516
12517 42
        if ($buf === null) {
12518
            return '';
12519
        }
12520
12521
        // decode UTF-8 codepoints
12522 42
        if ($decode_html_entity_to_utf8) {
12523 3
            $buf = self::html_entity_decode($buf);
12524
        }
12525
12526 42
        return $buf;
12527
    }
12528
12529
    /**
12530
     * Returns the given string as an integer, or null if the string isn't numeric.
12531
     *
12532
     * @param string $str
12533
     *
12534
     * @psalm-pure
12535
     *
12536
     * @return int|null
12537
     *                  <p>null if the string isn't numeric</p>
12538
     */
12539 1
    public static function to_int(string $str)
12540
    {
12541 1
        if (\is_numeric($str)) {
12542 1
            return (int) $str;
12543
        }
12544
12545 1
        return null;
12546
    }
12547
12548
    /**
12549
     * Returns the given input as string, or null if the input isn't int|float|string
12550
     * and do not implement the "__toString()" method.
12551
     *
12552
     * @param float|int|object|string|null $input
12553
     *
12554
     * @psalm-pure
12555
     *
12556
     * @return string|null
12557
     *                     <p>null if the input isn't int|float|string and has no "__toString()" method</p>
12558
     */
12559 1
    public static function to_string($input)
12560
    {
12561 1
        if ($input === null) {
12562
            return null;
12563
        }
12564
12565
        /** @var string $input_type - hack for psalm */
12566 1
        $input_type = \gettype($input);
12567
12568
        if (
12569 1
            $input_type === 'string'
12570
            ||
12571 1
            $input_type === 'integer'
12572
            ||
12573 1
            $input_type === 'float'
12574
            ||
12575 1
            $input_type === 'double'
12576
        ) {
12577 1
            return (string) $input;
12578
        }
12579
12580
        /** @phpstan-ignore-next-line - "gettype": FP? */
12581 1
        if ($input_type === 'object' && \method_exists($input, '__toString')) {
12582 1
            return (string) $input;
12583
        }
12584
12585 1
        return null;
12586
    }
12587
12588
    /**
12589
     * Strip whitespace or other characters from the beginning and end of a UTF-8 string.
12590
     *
12591
     * INFO: This is slower then "trim()"
12592
     *
12593
     * We can only use the original-function, if we use <= 7-Bit in the string / chars
12594
     * but the check for ASCII (7-Bit) cost more time, then we can safe here.
12595
     *
12596
     * EXAMPLE: <code>UTF8::trim('   -ABC-中文空白-  '); // '-ABC-中文空白-'</code>
12597
     *
12598
     * @param string      $str   <p>The string to be trimmed</p>
12599
     * @param string|null $chars [optional] <p>Optional characters to be stripped</p>
12600
     *
12601
     * @psalm-pure
12602
     *
12603
     * @return string
12604
     *                <p>The trimmed string.</p>
12605
     */
12606 57
    public static function trim(string $str = '', string $chars = null): string
12607
    {
12608 57
        if ($str === '') {
12609 9
            return '';
12610
        }
12611
12612 50
        if (self::$SUPPORT['mbstring'] === true) {
12613 50
            if ($chars !== null) {
12614
                /** @noinspection PregQuoteUsageInspection */
12615 28
                $chars = \preg_quote($chars);
12616 28
                $pattern = "^[${chars}]+|[${chars}]+\$";
12617
            } else {
12618 22
                $pattern = '^[\\s]+|[\\s]+$';
12619
            }
12620
12621 50
            return (string) \mb_ereg_replace($pattern, '', $str);
12622
        }
12623
12624 8
        if ($chars !== null) {
12625
            $chars = \preg_quote($chars, '/');
12626
            $pattern = "^[${chars}]+|[${chars}]+\$";
12627
        } else {
12628 8
            $pattern = '^[\\s]+|[\\s]+$';
12629
        }
12630
12631 8
        return self::regex_replace($str, $pattern, '');
12632
    }
12633
12634
    /**
12635
     * Makes string's first char uppercase.
12636
     *
12637
     * EXAMPLE: <code>UTF8::ucfirst('ñtërnâtiônàlizætiøn foo'); // 'Ñtërnâtiônàlizætiøn foo'</code>
12638
     *
12639
     * @param string      $str                           <p>The input string.</p>
12640
     * @param string      $encoding                      [optional] <p>Set the charset for e.g. "mb_" function</p>
12641
     * @param bool        $clean_utf8                    [optional] <p>Remove non UTF-8 chars from the string.</p>
12642
     * @param string|null $lang                          [optional] <p>Set the language for special cases: az, el, lt,
12643
     *                                                   tr</p>
12644
     * @param bool        $try_to_keep_the_string_length [optional] <p>true === try to keep the string length: e.g. ẞ
12645
     *                                                   -> ß</p>
12646
     *
12647
     * @psalm-pure
12648
     *
12649
     * @return string
12650
     *                <p>The resulting string with with char uppercase.</p>
12651
     */
12652 69
    public static function ucfirst(
12653
        string $str,
12654
        string $encoding = 'UTF-8',
12655
        bool $clean_utf8 = false,
12656
        string $lang = null,
12657
        bool $try_to_keep_the_string_length = false
12658
    ): string {
12659 69
        if ($str === '') {
12660 3
            return '';
12661
        }
12662
12663 68
        if ($clean_utf8) {
12664
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
12665
            // if invalid characters are found in $haystack before $needle
12666 1
            $str = self::clean($str);
12667
        }
12668
12669 68
        $use_mb_functions = $lang === null && !$try_to_keep_the_string_length;
12670
12671 68
        if ($encoding === 'UTF-8') {
12672 22
            $str_part_two = (string) \mb_substr($str, 1);
12673
12674 22
            if ($use_mb_functions) {
12675 22
                $str_part_one = \mb_strtoupper(
12676 22
                    (string) \mb_substr($str, 0, 1)
12677
                );
12678
            } else {
12679 22
                $str_part_one = self::strtoupper(
12680
                    (string) \mb_substr($str, 0, 1),
12681
                    $encoding,
12682
                    false,
12683
                    $lang,
12684
                    $try_to_keep_the_string_length
12685
                );
12686
            }
12687
        } else {
12688 47
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
12689
12690 47
            $str_part_two = (string) self::substr($str, 1, null, $encoding);
12691
12692 47
            if ($use_mb_functions) {
12693 47
                $str_part_one = \mb_strtoupper(
12694 47
                    (string) \mb_substr($str, 0, 1, $encoding),
12695 47
                    $encoding
12696
                );
12697
            } else {
12698
                $str_part_one = self::strtoupper(
12699
                    (string) self::substr($str, 0, 1, $encoding),
12700
                    $encoding,
12701
                    false,
12702
                    $lang,
12703
                    $try_to_keep_the_string_length
12704
                );
12705
            }
12706
        }
12707
12708 68
        return $str_part_one . $str_part_two;
12709
    }
12710
12711
    /**
12712
     * Uppercase for all words in the string.
12713
     *
12714
     * EXAMPLE: <code>UTF8::ucwords('iñt ërn âTi ônà liz æti øn'); // 'Iñt Ërn ÂTi Ônà Liz Æti Øn'</code>
12715
     *
12716
     * @param string   $str        <p>The input string.</p>
12717
     * @param string[] $exceptions [optional] <p>Exclusion for some words.</p>
12718
     * @param string   $char_list  [optional] <p>Additional chars that contains to words and do not start a new
12719
     *                             word.</p>
12720
     * @param string   $encoding   [optional] <p>Set the charset.</p>
12721
     * @param bool     $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
12722
     *
12723
     * @psalm-pure
12724
     *
12725
     * @return string
12726
     */
12727 9
    public static function ucwords(
12728
        string $str,
12729
        array $exceptions = [],
12730
        string $char_list = '',
12731
        string $encoding = 'UTF-8',
12732
        bool $clean_utf8 = false
12733
    ): string {
12734 9
        if (!$str) {
12735 2
            return '';
12736
        }
12737
12738
        // INFO: mb_convert_case($str, MB_CASE_TITLE);
12739
        // -> MB_CASE_TITLE didn't only uppercase the first letter, it also lowercase all other letters
12740
12741 8
        if ($clean_utf8) {
12742
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
12743
            // if invalid characters are found in $haystack before $needle
12744 1
            $str = self::clean($str);
12745
        }
12746
12747 8
        $use_php_default_functions = !(bool) ($char_list . \implode('', $exceptions));
12748
12749
        if (
12750 8
            $use_php_default_functions
12751
            &&
12752 8
            ASCII::is_ascii($str)
12753
        ) {
12754
            return \ucwords($str);
12755
        }
12756
12757 8
        $words = self::str_to_words($str, $char_list);
12758 8
        $use_exceptions = $exceptions !== [];
12759
12760 8
        $words_str = '';
12761 8
        foreach ($words as &$word) {
12762 8
            if (!$word) {
12763 8
                continue;
12764
            }
12765
12766
            if (
12767 8
                !$use_exceptions
12768
                ||
12769 8
                !\in_array($word, $exceptions, true)
12770
            ) {
12771 8
                $words_str .= self::ucfirst($word, $encoding);
12772
            } else {
12773 1
                $words_str .= $word;
12774
            }
12775
        }
12776
12777 8
        return $words_str;
12778
    }
12779
12780
    /**
12781
     * Multi decode HTML entity + fix urlencoded-win1252-chars.
12782
     *
12783
     * EXAMPLE: <code>UTF8::urldecode('tes%20öäü%20\u00edtest+test'); // 'tes öäü ítest test'</code>
12784
     *
12785
     * e.g:
12786
     * 'test+test'                     => 'test test'
12787
     * 'D&#252;sseldorf'               => 'Düsseldorf'
12788
     * 'D%FCsseldorf'                  => 'Düsseldorf'
12789
     * 'D&#xFC;sseldorf'               => 'Düsseldorf'
12790
     * 'D%26%23xFC%3Bsseldorf'         => 'Düsseldorf'
12791
     * 'Düsseldorf'                   => 'Düsseldorf'
12792
     * 'D%C3%BCsseldorf'               => 'Düsseldorf'
12793
     * 'D%C3%83%C2%BCsseldorf'         => 'Düsseldorf'
12794
     * 'D%25C3%2583%25C2%25BCsseldorf' => 'Düsseldorf'
12795
     *
12796
     * @param string $str          <p>The input string.</p>
12797
     * @param bool   $multi_decode <p>Decode as often as possible.</p>
12798
     *
12799
     * @psalm-pure
12800
     *
12801
     * @return string
12802
     */
12803 4
    public static function urldecode(string $str, bool $multi_decode = true): string
12804
    {
12805 4
        if ($str === '') {
12806 3
            return '';
12807
        }
12808
12809 4
        $str = self::urldecode_unicode_helper($str);
12810
12811 4
        if ($multi_decode) {
12812
            do {
12813 3
                $str_compare = $str;
12814
12815
                /**
12816
                 * @psalm-suppress PossiblyInvalidArgument
12817
                 */
12818 3
                $str = \urldecode(
12819 3
                    self::html_entity_decode(
12820 3
                        self::to_utf8($str),
12821 3
                        \ENT_QUOTES | \ENT_HTML5
12822
                    )
12823
                );
12824 3
            } while ($str_compare !== $str);
12825
        } else {
12826
            /**
12827
             * @psalm-suppress PossiblyInvalidArgument
12828
             */
12829 1
            $str = \urldecode(
12830 1
                self::html_entity_decode(
12831 1
                    self::to_utf8($str),
12832 1
                    \ENT_QUOTES | \ENT_HTML5
12833
                )
12834
            );
12835
        }
12836
12837 4
        return self::fix_simple_utf8($str);
12838
    }
12839
12840
    /**
12841
     * Decodes a UTF-8 string to ISO-8859-1.
12842
     *
12843
     * EXAMPLE: <code>UTF8::encode('UTF-8', UTF8::utf8_decode('-ABC-中文空白-')); // '-ABC-????-'</code>
12844
     *
12845
     * @param string $str             <p>The input string.</p>
12846
     * @param bool   $keep_utf8_chars
12847
     *
12848
     * @psalm-pure
12849
     *
12850
     * @return string
12851
     */
12852 14
    public static function utf8_decode(string $str, bool $keep_utf8_chars = false): string
12853
    {
12854 14
        if ($str === '') {
12855 6
            return '';
12856
        }
12857
12858
        // save for later comparision
12859 14
        $str_backup = $str;
12860 14
        $len = \strlen($str);
12861
12862 14
        if (self::$ORD === null) {
12863
            self::$ORD = self::getData('ord');
12864
        }
12865
12866 14
        if (self::$CHR === null) {
12867
            self::$CHR = self::getData('chr');
12868
        }
12869
12870 14
        $no_char_found = '?';
12871 14
        for ($i = 0, $j = 0; $i < $len; ++$i, ++$j) {
12872 14
            switch ($str[$i] & "\xF0") {
12873 14
                case "\xC0":
12874 13
                case "\xD0":
12875 13
                    $c = (self::$ORD[$str[$i] & "\x1F"] << 6) | self::$ORD[$str[++$i] & "\x3F"];
12876 13
                    $str[$j] = $c < 256 ? self::$CHR[$c] : $no_char_found;
12877
12878 13
                    break;
12879
12880 13
                case "\xF0":
12881
                    ++$i;
12882
12883
                // no break
12884
12885 13
                case "\xE0":
12886 11
                    $str[$j] = $no_char_found;
12887 11
                    $i += 2;
12888
12889 11
                    break;
12890
12891
                default:
12892 12
                    $str[$j] = $str[$i];
12893
            }
12894
        }
12895
12896
        /** @var false|string $return - needed for PhpStan (stubs error) */
12897 14
        $return = \substr($str, 0, $j);
12898 14
        if ($return === false) {
12899
            $return = '';
12900
        }
12901
12902
        if (
12903 14
            $keep_utf8_chars
12904
            &&
12905 14
            (int) self::strlen($return) >= (int) self::strlen($str_backup)
12906
        ) {
12907 2
            return $str_backup;
12908
        }
12909
12910 14
        return $return;
12911
    }
12912
12913
    /**
12914
     * Encodes an ISO-8859-1 string to UTF-8.
12915
     *
12916
     * EXAMPLE: <code>UTF8::utf8_decode(UTF8::utf8_encode('-ABC-中文空白-')); // '-ABC-中文空白-'</code>
12917
     *
12918
     * @param string $str <p>The input string.</p>
12919
     *
12920
     * @psalm-pure
12921
     *
12922
     * @return string
12923
     */
12924 16
    public static function utf8_encode(string $str): string
12925
    {
12926 16
        if ($str === '') {
12927 14
            return '';
12928
        }
12929
12930
        /** @var false|string $str - the polyfill maybe return false */
12931 16
        $str = \utf8_encode($str);
0 ignored issues
show
Bug introduced by
It seems like $str can also be of type false; however, parameter $string of utf8_encode() does only seem to accept string, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

12931
        $str = \utf8_encode(/** @scrutinizer ignore-type */ $str);
Loading history...
12932
12933 16
        if ($str === false) {
12934
            return '';
12935
        }
12936
12937 16
        return $str;
12938
    }
12939
12940
    /**
12941
     * Returns an array with all utf8 whitespace characters.
12942
     *
12943
     * @see http://www.bogofilter.org/pipermail/bogofilter/2003-March/001889.html
12944
     *
12945
     * @psalm-pure
12946
     *
12947
     * @return string[]
12948
     *                  An array with all known whitespace characters as values and the type of whitespace as keys
12949
     *                  as defined in above URL
12950
     */
12951 2
    public static function whitespace_table(): array
12952
    {
12953 2
        return self::$WHITESPACE_TABLE;
12954
    }
12955
12956
    /**
12957
     * Limit the number of words in a string.
12958
     *
12959
     * EXAMPLE: <code>UTF8::words_limit('fòô bàř fòô', 2, ''); // 'fòô bàř'</code>
12960
     *
12961
     * @param string $str        <p>The input string.</p>
12962
     * @param int    $limit      <p>The limit of words as integer.</p>
12963
     * @param string $str_add_on <p>Replacement for the striped string.</p>
12964
     *
12965
     * @psalm-pure
12966
     *
12967
     * @return string
12968
     */
12969 2
    public static function words_limit(
12970
        string $str,
12971
        int $limit = 100,
12972
        string $str_add_on = '…'
12973
    ): string {
12974 2
        if ($str === '' || $limit < 1) {
12975 2
            return '';
12976
        }
12977
12978 2
        \preg_match('/^\\s*+(?:[^\\s]++\\s*+){1,' . $limit . '}/u', $str, $matches);
12979
12980
        if (
12981 2
            !isset($matches[0])
12982
            ||
12983 2
            \mb_strlen($str) === (int) \mb_strlen($matches[0])
12984
        ) {
12985 2
            return $str;
12986
        }
12987
12988 2
        return \rtrim($matches[0]) . $str_add_on;
12989
    }
12990
12991
    /**
12992
     * Wraps a string to a given number of characters
12993
     *
12994
     * EXAMPLE: <code>UTF8::wordwrap('Iñtërnâtiônàlizætiøn', 2, '<br>', true)); // 'Iñ<br>të<br>rn<br>ât<br>iô<br>nà<br>li<br>zæ<br>ti<br>øn'</code>
12995
     *
12996
     * @see http://php.net/manual/en/function.wordwrap.php
12997
     *
12998
     * @param string $str   <p>The input string.</p>
12999
     * @param int    $width [optional] <p>The column width.</p>
13000
     * @param string $break [optional] <p>The line is broken using the optional break parameter.</p>
13001
     * @param bool   $cut   [optional] <p>
13002
     *                      If the cut is set to true, the string is
13003
     *                      always wrapped at or before the specified width. So if you have
13004
     *                      a word that is larger than the given width, it is broken apart.
13005
     *                      </p>
13006
     *
13007
     * @psalm-pure
13008
     *
13009
     * @return string
13010
     *                <p>The given string wrapped at the specified column.</p>
13011
     */
13012 12
    public static function wordwrap(
13013
        string $str,
13014
        int $width = 75,
13015
        string $break = "\n",
13016
        bool $cut = false
13017
    ): string {
13018 12
        if ($str === '' || $break === '') {
13019 4
            return '';
13020
        }
13021
13022 10
        $str_split = \explode($break, $str);
13023
13024
        /** @var string[] $charsArray */
13025 10
        $charsArray = [];
13026 10
        $word_split = '';
13027 10
        foreach ($str_split as $i => $i_value) {
13028 10
            if ($i) {
13029 3
                $charsArray[] = $break;
13030 3
                $word_split .= '#';
13031
            }
13032
13033 10
            foreach (self::str_split($i_value) as $c) {
13034 10
                $charsArray[] = $c;
13035 10
                if ($c === ' ') {
13036 3
                    $word_split .= ' ';
13037
                } else {
13038 10
                    $word_split .= '?';
13039
                }
13040
            }
13041
        }
13042
13043 10
        $str_return = '';
13044 10
        $j = 0;
13045 10
        $b = -1;
13046 10
        $i = -1;
13047 10
        $word_split = \wordwrap($word_split, $width, '#', $cut);
13048
13049 10
        $max = \mb_strlen($word_split);
13050
        /** @noinspection PhpAssignmentInConditionInspection - is ok here */
13051 10
        while (($b = \mb_strpos($word_split, '#', $b + 1)) !== false) {
13052 8
            for (++$i; $i < $b; ++$i) {
13053 8
                if (isset($charsArray[$j])) {
13054 8
                    $str_return .= $charsArray[$j];
13055 8
                    unset($charsArray[$j]);
13056
                }
13057 8
                ++$j;
13058
13059
                // prevent endless loop, e.g. if there is a error in the "mb_*" polyfill
13060 8
                if ($i > $max) {
13061
                    break 2;
13062
                }
13063
            }
13064
13065
            if (
13066 8
                $break === $charsArray[$j]
13067
                ||
13068 8
                $charsArray[$j] === ' '
13069
            ) {
13070 5
                unset($charsArray[$j++]);
13071
            }
13072
13073 8
            $str_return .= $break;
13074
13075
            // prevent endless loop, e.g. if there is a error in the "mb_*" polyfill
13076 8
            if ($b > $max) {
13077
                break;
13078
            }
13079
        }
13080
13081 10
        return $str_return . \implode('', $charsArray);
13082
    }
13083
13084
    /**
13085
     * Line-Wrap the string after $limit, but split the string by "$delimiter" before ...
13086
     *    ... so that we wrap the per line.
13087
     *
13088
     * @param string      $str             <p>The input string.</p>
13089
     * @param int         $width           [optional] <p>The column width.</p>
13090
     * @param string      $break           [optional] <p>The line is broken using the optional break parameter.</p>
13091
     * @param bool        $cut             [optional] <p>
13092
     *                                     If the cut is set to true, the string is
13093
     *                                     always wrapped at or before the specified width. So if you have
13094
     *                                     a word that is larger than the given width, it is broken apart.
13095
     *                                     </p>
13096
     * @param bool        $add_final_break [optional] <p>
13097
     *                                     If this flag is true, then the method will add a $break at the end
13098
     *                                     of the result string.
13099
     *                                     </p>
13100
     * @param string|null $delimiter       [optional] <p>
13101
     *                                     You can change the default behavior, where we split the string by newline.
13102
     *                                     </p>
13103
     *
13104
     * @psalm-pure
13105
     *
13106
     * @return string
13107
     */
13108 1
    public static function wordwrap_per_line(
13109
        string $str,
13110
        int $width = 75,
13111
        string $break = "\n",
13112
        bool $cut = false,
13113
        bool $add_final_break = true,
13114
        string $delimiter = null
13115
    ): string {
13116 1
        if ($delimiter === null) {
13117 1
            $strings = \preg_split('/\\r\\n|\\r|\\n/', $str);
13118
        } else {
13119 1
            $strings = \explode($delimiter, $str);
13120
        }
13121
13122 1
        $string_helper_array = [];
13123 1
        if ($strings !== false) {
13124 1
            foreach ($strings as $value) {
13125 1
                $string_helper_array[] = self::wordwrap($value, $width, $break, $cut);
13126
            }
13127
        }
13128
13129 1
        if ($add_final_break) {
13130 1
            $final_break = $break;
13131
        } else {
13132 1
            $final_break = '';
13133
        }
13134
13135 1
        return \implode($delimiter ?? "\n", $string_helper_array) . $final_break;
13136
    }
13137
13138
    /**
13139
     * Returns an array of Unicode White Space characters.
13140
     *
13141
     * @psalm-pure
13142
     *
13143
     * @return string[]
13144
     *                  <p>An array with numeric code point as key and White Space Character as value.</p>
13145
     */
13146 2
    public static function ws(): array
13147
    {
13148 2
        return self::$WHITESPACE;
13149
    }
13150
13151
    /**
13152
     * Checks whether the passed string contains only byte sequences that are valid UTF-8 characters.
13153
     *
13154
     * EXAMPLE: <code>
13155
     * UTF8::is_utf8_string('Iñtërnâtiônàlizætiøn']); // true
13156
     * //
13157
     * UTF8::is_utf8_string("Iñtërnâtiônàlizætiøn\xA0\xA1"); // false
13158
     * </code>
13159
     *
13160
     * @see          http://hsivonen.iki.fi/php-utf8/
13161
     *
13162
     * @param string $str    <p>The string to be checked.</p>
13163
     * @param bool   $strict <p>Check also if the string is not UTF-16 or UTF-32.</p>
13164
     *
13165
     * @psalm-pure
13166
     *
13167
     * @return bool
13168
     */
13169 110
    private static function is_utf8_string(string $str, bool $strict = false)
13170
    {
13171 110
        if ($str === '') {
13172 15
            return true;
13173
        }
13174
13175 103
        if ($strict) {
13176 2
            $is_binary = self::is_binary($str, true);
13177
13178 2
            if ($is_binary && self::is_utf16($str, false) !== false) {
13179 2
                return false;
13180
            }
13181
13182 2
            if ($is_binary && self::is_utf32($str, false) !== false) {
13183
                return false;
13184
            }
13185
        }
13186
13187 103
        if (self::$SUPPORT['pcre_utf8']) {
13188
            // If even just the first character can be matched, when the /u
13189
            // modifier is used, then it's valid UTF-8. If the UTF-8 is somehow
13190
            // invalid, nothing at all will match, even if the string contains
13191
            // some valid sequences
13192 103
            return \preg_match('/^./us', $str) === 1;
13193
        }
13194
13195 2
        $mState = 0; // cached expected number of octets after the current octet
13196
        // until the beginning of the next UTF8 character sequence
13197 2
        $mUcs4 = 0; // cached Unicode character
13198 2
        $mBytes = 1; // cached expected number of octets in the current sequence
13199
13200 2
        if (self::$ORD === null) {
13201
            self::$ORD = self::getData('ord');
13202
        }
13203
13204 2
        $len = \strlen($str);
13205 2
        for ($i = 0; $i < $len; ++$i) {
13206 2
            $in = self::$ORD[$str[$i]];
13207
13208 2
            if ($mState === 0) {
13209
                // When mState is zero we expect either a US-ASCII character or a
13210
                // multi-octet sequence.
13211 2
                if ((0x80 & $in) === 0) {
13212
                    // US-ASCII, pass straight through.
13213 2
                    $mBytes = 1;
13214 2
                } elseif ((0xE0 & $in) === 0xC0) {
13215
                    // First octet of 2 octet sequence.
13216 2
                    $mUcs4 = $in;
13217 2
                    $mUcs4 = ($mUcs4 & 0x1F) << 6;
13218 2
                    $mState = 1;
13219 2
                    $mBytes = 2;
13220 2
                } elseif ((0xF0 & $in) === 0xE0) {
13221
                    // First octet of 3 octet sequence.
13222 2
                    $mUcs4 = $in;
13223 2
                    $mUcs4 = ($mUcs4 & 0x0F) << 12;
13224 2
                    $mState = 2;
13225 2
                    $mBytes = 3;
13226
                } elseif ((0xF8 & $in) === 0xF0) {
13227
                    // First octet of 4 octet sequence.
13228
                    $mUcs4 = $in;
13229
                    $mUcs4 = ($mUcs4 & 0x07) << 18;
13230
                    $mState = 3;
13231
                    $mBytes = 4;
13232
                } elseif ((0xFC & $in) === 0xF8) {
13233
                    /* First octet of 5 octet sequence.
13234
                     *
13235
                     * This is illegal because the encoded codepoint must be either
13236
                     * (a) not the shortest form or
13237
                     * (b) outside the Unicode range of 0-0x10FFFF.
13238
                     * Rather than trying to resynchronize, we will carry on until the end
13239
                     * of the sequence and let the later error handling code catch it.
13240
                     */
13241
                    $mUcs4 = $in;
13242
                    $mUcs4 = ($mUcs4 & 0x03) << 24;
13243
                    $mState = 4;
13244
                    $mBytes = 5;
13245
                } elseif ((0xFE & $in) === 0xFC) {
13246
                    // First octet of 6 octet sequence, see comments for 5 octet sequence.
13247
                    $mUcs4 = $in;
13248
                    $mUcs4 = ($mUcs4 & 1) << 30;
13249
                    $mState = 5;
13250
                    $mBytes = 6;
13251
                } else {
13252
                    // Current octet is neither in the US-ASCII range nor a legal first
13253
                    // octet of a multi-octet sequence.
13254 2
                    return false;
13255
                }
13256 2
            } elseif ((0xC0 & $in) === 0x80) {
13257
13258
                // When mState is non-zero, we expect a continuation of the multi-octet
13259
                // sequence
13260
13261
                // Legal continuation.
13262 2
                $shift = ($mState - 1) * 6;
13263 2
                $tmp = $in;
13264 2
                $tmp = ($tmp & 0x0000003F) << $shift;
13265 2
                $mUcs4 |= $tmp;
13266
                // Prefix: End of the multi-octet sequence. mUcs4 now contains the final
13267
                // Unicode code point to be output.
13268 2
                if (--$mState === 0) {
13269
                    // Check for illegal sequences and code points.
13270
                    //
13271
                    // From Unicode 3.1, non-shortest form is illegal
13272
                    if (
13273 2
                        ($mBytes === 2 && $mUcs4 < 0x0080)
13274
                        ||
13275 2
                        ($mBytes === 3 && $mUcs4 < 0x0800)
13276
                        ||
13277 2
                        ($mBytes === 4 && $mUcs4 < 0x10000)
13278
                        ||
13279 2
                        ($mBytes > 4)
13280
                        ||
13281
                        // From Unicode 3.2, surrogate characters are illegal.
13282 2
                        (($mUcs4 & 0xFFFFF800) === 0xD800)
13283
                        ||
13284
                        // Code points outside the Unicode range are illegal.
13285 2
                        ($mUcs4 > 0x10FFFF)
13286
                    ) {
13287
                        return false;
13288
                    }
13289
                    // initialize UTF8 cache
13290 2
                    $mState = 0;
13291 2
                    $mUcs4 = 0;
13292 2
                    $mBytes = 1;
13293
                }
13294
            } else {
13295
                // ((0xC0 & (*in) != 0x80) && (mState != 0))
13296
                // Incomplete multi-octet sequence.
13297
                return false;
13298
            }
13299
        }
13300
13301 2
        return $mState === 0;
13302
    }
13303
13304
    /**
13305
     * @param string $str
13306
     * @param bool   $use_lowercase      <p>Use uppercase by default, otherwise use lowercase.</p>
13307
     * @param bool   $use_full_case_fold <p>Convert not only common cases.</p>
13308
     *
13309
     * @psalm-pure
13310
     *
13311
     * @return string
13312
     */
13313 33
    private static function fixStrCaseHelper(
13314
        string $str,
13315
        bool $use_lowercase = false,
13316
        bool $use_full_case_fold = false
13317
    ) {
13318 33
        $upper = self::$COMMON_CASE_FOLD['upper'];
13319 33
        $lower = self::$COMMON_CASE_FOLD['lower'];
13320
13321 33
        if ($use_lowercase) {
13322 2
            $str = \str_replace(
13323 2
                $upper,
13324 2
                $lower,
13325 2
                $str
13326
            );
13327
        } else {
13328 31
            $str = \str_replace(
13329 31
                $lower,
13330 31
                $upper,
13331 31
                $str
13332
            );
13333
        }
13334
13335 33
        if ($use_full_case_fold) {
13336
            /**
13337
             * @psalm-suppress ImpureStaticVariable
13338
             *
13339
             * @var array<mixed>|null
13340
             */
13341 31
            static $FULL_CASE_FOLD = null;
13342 31
            if ($FULL_CASE_FOLD === null) {
13343 1
                $FULL_CASE_FOLD = self::getData('caseFolding_full');
13344
            }
13345
13346 31
            if ($use_lowercase) {
13347 2
                $str = \str_replace($FULL_CASE_FOLD[0], $FULL_CASE_FOLD[1], $str);
13348
            } else {
13349 29
                $str = \str_replace($FULL_CASE_FOLD[1], $FULL_CASE_FOLD[0], $str);
13350
            }
13351
        }
13352
13353 33
        return $str;
13354
    }
13355
13356
    /**
13357
     * get data from "/data/*.php"
13358
     *
13359
     * @param string $file
13360
     *
13361
     * @psalm-pure
13362
     *
13363
     * @return array
13364
     */
13365 7
    private static function getData(string $file)
13366
    {
13367
        /** @noinspection PhpIncludeInspection */
13368
        /** @noinspection UsingInclusionReturnValueInspection */
13369
        /** @psalm-suppress UnresolvableInclude */
13370 7
        return include __DIR__ . '/data/' . $file . '.php';
13371
    }
13372
13373
    /**
13374
     * @psalm-pure
13375
     *
13376
     * @return true|null
13377
     */
13378 1
    private static function initEmojiData()
13379
    {
13380 1
        if (self::$EMOJI_KEYS_CACHE === null) {
13381 1
            if (self::$EMOJI === null) {
13382 1
                self::$EMOJI = self::getData('emoji');
13383
            }
13384
13385
            /**
13386
             * @psalm-suppress ImpureFunctionCall - static sort function is used
13387
             */
13388 1
            \uksort(
13389 1
                self::$EMOJI,
13390 1
                static function (string $a, string $b): int {
13391 1
                    return \strlen($b) <=> \strlen($a);
13392 1
                }
13393
            );
13394
13395 1
            self::$EMOJI_KEYS_CACHE = \array_keys(self::$EMOJI);
13396 1
            self::$EMOJI_VALUES_CACHE = self::$EMOJI;
13397
13398 1
            foreach (self::$EMOJI_KEYS_CACHE as $key) {
13399 1
                $tmp_key = \crc32($key);
13400 1
                self::$EMOJI_KEYS_REVERSIBLE_CACHE[] = '_-_PORTABLE_UTF8_-_' . $tmp_key . '_-_' . \strrev((string) $tmp_key) . '_-_8FTU_ELBATROP_-_';
13401
            }
13402
13403 1
            return true;
13404
        }
13405
13406
        return null;
13407
    }
13408
13409
    /**
13410
     * Checks whether mbstring "overloaded" is active on the server.
13411
     *
13412
     * @psalm-pure
13413
     *
13414
     * @return bool
13415
     */
13416
    private static function mbstring_overloaded(): bool
13417
    {
13418
        /**
13419
         * INI directive 'mbstring.func_overload' is deprecated since PHP 7.2
13420
         */
13421
13422
        /** @noinspection PhpComposerExtensionStubsInspection */
13423
        /** @noinspection PhpUsageOfSilenceOperatorInspection */
13424
        /** @noinspection DeprecatedIniOptionsInspection */
13425
        return \defined('MB_OVERLOAD_STRING')
13426
               &&
13427
               ((int) @\ini_get('mbstring.func_overload') & \MB_OVERLOAD_STRING);
13428
    }
13429
13430
    /**
13431
     * @param string[] $strings
13432
     * @param bool     $remove_empty_values
13433
     * @param int|null $remove_short_values
13434
     *
13435
     * @psalm-pure
13436
     *
13437
     * @return string[]
13438
     */
13439 2
    private static function reduce_string_array(
13440
        array $strings,
13441
        bool $remove_empty_values,
13442
        int $remove_short_values = null
13443
    ) {
13444
        // init
13445 2
        $return = [];
13446
13447 2
        foreach ($strings as &$str) {
13448
            if (
13449 2
                $remove_short_values !== null
13450
                &&
13451 2
                \mb_strlen($str) <= $remove_short_values
13452
            ) {
13453 2
                continue;
13454
            }
13455
13456
            if (
13457 2
                $remove_empty_values
13458
                &&
13459 2
                \trim($str) === ''
13460
            ) {
13461 2
                continue;
13462
            }
13463
13464 2
            $return[] = $str;
13465
        }
13466
13467 2
        return $return;
13468
    }
13469
13470
    /**
13471
     * rxClass
13472
     *
13473
     * @param string $s
13474
     * @param string $class
13475
     *
13476
     * @return string
13477
     *                *
13478
     * @psalm-pure
13479
     */
13480 36
    private static function rxClass(string $s, string $class = '')
13481
    {
13482
        /**
13483
         * @psalm-suppress ImpureStaticVariable
13484
         *
13485
         * @var array<string,string>
13486
         */
13487 36
        static $RX_CLASS_CACHE = [];
13488
13489 36
        $cache_key = $s . '_' . $class;
13490
13491 36
        if (isset($RX_CLASS_CACHE[$cache_key])) {
13492 24
            return $RX_CLASS_CACHE[$cache_key];
13493
        }
13494
13495 16
        $class_array[] = $class;
0 ignored issues
show
Comprehensibility Best Practice introduced by
$class_array was never initialized. Although not strictly required by PHP, it is generally a good practice to add $class_array = array(); before regardless.
Loading history...
13496
13497
        /** @noinspection SuspiciousLoopInspection */
13498
        /** @noinspection AlterInForeachInspection */
13499 16
        foreach (self::str_split($s) as &$s) {
0 ignored issues
show
Bug introduced by
The expression self::str_split($s) cannot be used as a reference.

Let?s assume that you have the following foreach statement:

foreach ($array as &$itemValue) { }

$itemValue is assigned by reference. This is possible because the expression (in the example $array) can be used as a reference target.

However, if we were to replace $array with something different like the result of a function call as in

foreach (getArray() as &$itemValue) { }

then assigning by reference is not possible anymore as there is no target that could be modified.

Available Fixes

1. Do not assign by reference
foreach (getArray() as $itemValue) { }
2. Assign to a local variable first
$array = getArray();
foreach ($array as &$itemValue) {}
3. Return a reference
function &getArray() { $array = array(); return $array; }

foreach (getArray() as &$itemValue) { }
Loading history...
13500 15
            if ($s === '-') {
13501
                $class_array[0] = '-' . $class_array[0];
13502 15
            } elseif (!isset($s[2])) {
13503 15
                $class_array[0] .= \preg_quote($s, '/');
13504 1
            } elseif (self::strlen($s) === 1) {
13505 1
                $class_array[0] .= $s;
13506
            } else {
13507
                $class_array[] = $s;
13508
            }
13509
        }
13510
13511 16
        if ($class_array[0]) {
13512 16
            $class_array[0] = '[' . $class_array[0] . ']';
13513
        }
13514
13515 16
        if (\count($class_array) === 1) {
13516 16
            $return = $class_array[0];
13517
        } else {
13518
            $return = '(?:' . \implode('|', $class_array) . ')';
13519
        }
13520
13521 16
        $RX_CLASS_CACHE[$cache_key] = $return;
13522
13523 16
        return $return;
13524
    }
13525
13526
    /**
13527
     * Personal names such as "Marcus Aurelius" are sometimes typed incorrectly using lowercase ("marcus aurelius").
13528
     *
13529
     * @param string $names
13530
     * @param string $delimiter
13531
     * @param string $encoding
13532
     *
13533
     * @psalm-pure
13534
     *
13535
     * @return string
13536
     */
13537 1
    private static function str_capitalize_name_helper(
13538
        string $names,
13539
        string $delimiter,
13540
        string $encoding = 'UTF-8'
13541
    ) {
13542
        // init
13543 1
        $name_helper_array = \explode($delimiter, $names);
13544 1
        if ($name_helper_array === false) {
13545
            return '';
13546
        }
13547
13548 1
        $special_cases = [
13549
            'names' => [
13550
                'ab',
13551
                'af',
13552
                'al',
13553
                'and',
13554
                'ap',
13555
                'bint',
13556
                'binte',
13557
                'da',
13558
                'de',
13559
                'del',
13560
                'den',
13561
                'der',
13562
                'di',
13563
                'dit',
13564
                'ibn',
13565
                'la',
13566
                'mac',
13567
                'nic',
13568
                'of',
13569
                'ter',
13570
                'the',
13571
                'und',
13572
                'van',
13573
                'von',
13574
                'y',
13575
                'zu',
13576
            ],
13577
            'prefixes' => [
13578
                'al-',
13579
                "d'",
13580
                'ff',
13581
                "l'",
13582
                'mac',
13583
                'mc',
13584
                'nic',
13585
            ],
13586
        ];
13587
13588 1
        foreach ($name_helper_array as &$name) {
13589 1
            if (\in_array($name, $special_cases['names'], true)) {
13590 1
                continue;
13591
            }
13592
13593 1
            $continue = false;
13594
13595 1
            if ($delimiter === '-') {
13596 1
                foreach ((array) $special_cases['names'] as &$beginning) {
13597 1
                    if (\strncmp($name, $beginning, \strlen($beginning)) === 0) {
13598 1
                        $continue = true;
13599
13600 1
                        break;
13601
                    }
13602
                }
13603 1
                unset($beginning);
13604
            }
13605
13606 1
            foreach ((array) $special_cases['prefixes'] as &$beginning) {
13607 1
                if (\strncmp($name, $beginning, \strlen($beginning)) === 0) {
13608 1
                    $continue = true;
13609
13610 1
                    break;
13611
                }
13612
            }
13613 1
            unset($beginning);
13614
13615 1
            if ($continue) {
13616 1
                continue;
13617
            }
13618
13619 1
            $name = self::ucfirst($name, $encoding);
13620
        }
13621
13622 1
        return \implode($delimiter, $name_helper_array);
13623
    }
13624
13625
    /**
13626
     * Generic case-sensitive transformation for collation matching.
13627
     *
13628
     * @param string $str <p>The input string</p>
13629
     *
13630
     * @psalm-pure
13631
     *
13632
     * @return string|null
13633
     */
13634 6
    private static function strtonatfold(string $str)
13635
    {
13636 6
        $str = \Normalizer::normalize($str, \Normalizer::NFD);
13637 6
        if ($str === false) {
13638 2
            return '';
13639
        }
13640
13641 6
        return \preg_replace(
13642 6
            '/\p{Mn}+/u',
13643 6
            '',
13644 6
            $str
13645
        );
13646
    }
13647
13648
    /**
13649
     * @param int|string $input
13650
     *
13651
     * @psalm-pure
13652
     *
13653
     * @return string
13654
     */
13655 30
    private static function to_utf8_convert_helper($input)
13656
    {
13657
        // init
13658 30
        $buf = '';
13659
13660 30
        if (self::$ORD === null) {
13661
            self::$ORD = self::getData('ord');
13662
        }
13663
13664 30
        if (self::$CHR === null) {
13665
            self::$CHR = self::getData('chr');
13666
        }
13667
13668 30
        if (self::$WIN1252_TO_UTF8 === null) {
13669 1
            self::$WIN1252_TO_UTF8 = self::getData('win1252_to_utf8');
13670
        }
13671
13672 30
        $ordC1 = self::$ORD[$input];
13673 30
        if (isset(self::$WIN1252_TO_UTF8[$ordC1])) { // found in Windows-1252 special cases
13674 30
            $buf .= self::$WIN1252_TO_UTF8[$ordC1];
13675
        } else {
13676
            /** @noinspection OffsetOperationsInspection */
13677 1
            $cc1 = self::$CHR[$ordC1 / 64] | "\xC0";
13678 1
            $cc2 = ((string) $input & "\x3F") | "\x80";
0 ignored issues
show
Bug introduced by
Are you sure you want to use the bitwise & or did you mean &&?
Loading history...
Bug introduced by
Are you sure you want to use the bitwise | or did you mean ||?
Loading history...
13679 1
            $buf .= $cc1 . $cc2;
13680
        }
13681
13682 30
        return $buf;
13683
    }
13684
13685
    /**
13686
     * @param string $str
13687
     *
13688
     * @psalm-pure
13689
     *
13690
     * @return string
13691
     */
13692 9
    private static function urldecode_unicode_helper(string $str)
13693
    {
13694 9
        if (\strpos($str, '%u') === false) {
13695 9
            return $str;
13696
        }
13697
13698 7
        $pattern = '/%u([0-9a-fA-F]{3,4})/';
13699 7
        if (\preg_match($pattern, $str)) {
13700 7
            $str = (string) \preg_replace($pattern, '&#x\\1;', $str);
13701
        }
13702
13703 7
        return $str;
13704
    }
13705
}
13706