Passed
Pull Request — master (#112)
by
unknown
17:39
created

UTF8   F

Complexity

Total Complexity 1756

Size/Duplication

Total Lines 13745
Duplicated Lines 0 %

Test Coverage

Coverage 81.01%

Importance

Changes 110
Bugs 54 Features 6
Metric Value
eloc 4243
dl 0
loc 13745
ccs 3109
cts 3838
cp 0.8101
rs 0.8
c 110
b 54
f 6
wmc 1756

274 Methods

Rating   Name   Duplication   Size   Complexity  
A str_ensure_left() 0 11 3
A single_chr_html_encode() 0 18 4
A encode_mimeheader() 0 26 5
F extract_text() 0 175 34
B chr_to_decimal() 0 38 8
A add_bom_to_string() 0 7 2
A count_chars() 0 11 1
A ctype_loaded() 0 3 1
D chr() 0 107 19
A chunk_split() 0 3 1
A css_identifier() 0 55 6
A css_stripe_media_queries() 0 6 1
A clean() 0 47 6
A __construct() 0 2 1
B between() 0 48 8
A codepoints() 0 36 5
A chr_map() 0 5 1
A cleanup() 0 24 2
A char_at() 0 7 2
A chars() 0 4 1
A chr_size_list() 0 17 3
A checkForSupport() 0 46 4
A collapse_whitespace() 0 7 2
A access() 0 11 4
A callback() 0 3 1
A binary_to_str() 0 12 3
A bom() 0 3 1
A array_change_key_case() 0 23 5
A emoji_decode() 0 21 3
A decode_mimeheader() 0 8 3
A emoji_encode() 0 21 3
A decimal_to_chr() 0 5 1
F encode() 0 144 37
A chr_to_hex() 0 11 3
A emoji_from_country_code() 0 17 3
A str_substr_after_first_separator() 0 28 6
A file_has_bom() 0 8 2
A max() 0 14 3
B str_camelize() 0 74 10
A parse_str() 0 18 4
A filter_input() 0 16 3
A str_contains() 0 15 3
B str_to_lines() 0 28 8
A substr_in_byte() 0 18 6
A stripos_in_byte() 0 12 4
A is_bom() 0 10 3
A is_hexadecimal() 0 7 2
A get_unique_string() 0 21 3
A strnatcasecmp() 0 5 1
A substr_left() 0 15 4
D strlen() 0 104 19
A str_isubstr_last() 0 25 4
A to_int() 0 7 2
A str_replace_beginning() 0 25 6
A has_uppercase() 0 7 2
A remove_left() 0 28 4
C stripos() 0 67 14
A str_offset_exists() 0 10 2
D strrchr() 0 104 20
A to_filename() 0 9 1
A str_iends_with() 0 11 3
A max_chr_width() 0 8 2
C utf8_decode() 0 59 13
A ltrim() 0 26 5
A levenshtein() 0 7 1
A is_utf8() 0 13 4
A remove_html() 0 3 1
B str_longest_common_suffix() 0 54 10
C wordwrap() 0 70 14
B ucfirst() 0 57 7
A str_pad_both() 0 12 1
A str_substr_last() 0 33 6
A mbstring_loaded() 0 3 1
A str_limit() 0 26 6
A html_escape() 0 6 1
A string() 0 18 4
B str_obfuscate() 0 47 8
D normalize_encoding() 0 147 16
B rxClass() 0 44 8
B get_file_type() 0 60 7
A str_ensure_right() 0 13 4
B str_titleize_for_humans() 0 170 7
D is_utf16() 0 76 18
C filter() 0 59 14
A normalize_whitespace() 0 11 1
A str_starts_with() 0 16 4
A str_humanize() 0 15 1
A is_html() 0 14 2
C substr_count_in_byte() 0 55 15
A strlen_in_byte() 0 12 3
A str_ireplace_ending() 0 21 6
A rtrim() 0 26 5
C str_longest_common_substring() 0 76 16
A regex_replace() 0 20 3
A titlecase() 0 35 5
A getData() 0 6 1
B strtolower() 0 58 10
A urldecode() 0 35 4
A str_isubstr_before_first_separator() 0 19 5
B strrev() 0 43 10
A replace_all() 0 11 2
D substr_replace() 0 123 27
A strstr_in_byte() 0 15 4
A str_matches_pattern() 0 3 1
A is_alpha() 0 7 2
C str_titleize() 0 69 12
A str_split_array() 0 17 2
B get_random_string() 0 54 10
A ws() 0 3 1
A str_replace_first() 0 20 2
A fix_utf8() 0 30 4
A str_pad_right() 0 12 1
B ucwords() 0 51 9
A first_char() 0 14 4
A to_boolean() 0 35 5
C stristr() 0 79 17
A strncasecmp() 0 10 1
B strwidth() 0 43 8
A trim() 0 26 5
A is_serialized() 0 11 3
A str_upper_camelize() 0 8 1
A is_uppercase() 0 7 2
A substr_compare() 0 33 6
A convertMbAscii() 0 19 4
D substr_count() 0 73 17
A strnatcmp() 0 9 2
D str_pad() 0 146 16
A urldecode_unicode_helper() 0 12 3
A str_ireplace() 0 31 5
A str_replace_ending() 0 24 6
A string_has_bom() 0 9 3
B strtr() 0 41 11
B str_contains_all() 0 22 9
A is_ascii() 0 3 1
A normalize_line_ending() 0 3 1
A str_isubstr_after_last_separator() 0 26 5
D range() 0 71 23
B strspn() 0 30 10
A strcasecmp() 0 21 1
A rawurldecode() 0 35 4
B str_capitalize_name_helper() 0 86 10
A utf8_encode() 0 14 3
A normalize_msword() 0 3 1
C str_detect_encoding() 0 111 14
A spaces_to_tabs() 0 11 3
A str_istarts_with() 0 11 3
A is_blank() 0 7 2
A str_replace() 0 18 1
A substr_iright() 0 15 4
D getCharDirection() 0 104 117
A htmlspecialchars() 0 15 3
A replace() 0 11 2
A filter_var_array() 0 15 2
A to_iso8859() 0 16 4
A has_whitespace() 0 7 2
A words_limit() 0 20 5
A strip_tags() 0 18 4
A pcre_utf8_support() 0 4 1
A str_isubstr_before_last_separator() 0 24 6
D str_truncate_safe() 0 86 18
A substr_right() 0 31 6
D str_split() 0 134 29
A str_ends_with_any() 0 13 4
A strrpos_in_byte() 0 12 4
F strrpos() 0 136 31
A remove_right() 0 25 4
A remove_html_breaks() 0 3 1
A showSupport() 0 16 3
A remove_invisible_characters() 0 11 1
A str_replace_last() 0 19 2
A str_substr_before_last_separator() 0 31 6
B is_binary() 0 39 10
A intlChar_loaded() 0 3 1
B strtocasefold() 0 33 7
A lcfirst() 0 44 5
A tabs_to_spaces() 0 11 3
B is_url() 0 40 7
A finfo_loaded() 0 3 1
B str_truncate() 0 43 7
F strripos() 0 113 25
A strpos_in_byte() 0 12 4
A str_ends_with() 0 16 4
A fits_inside() 0 3 1
A to_ascii() 0 6 1
A is_binary_file() 0 16 4
A intl_loaded() 0 3 1
A reduce_string_array() 0 29 6
B str_longest_common_prefix() 0 51 8
A mbstring_overloaded() 0 12 2
A str_pad_left() 0 12 1
A str_substr_first() 0 33 6
A html_stripe_empty_tags() 0 6 1
A remove_bom() 0 22 5
A str_repeat() 0 5 1
A strpbrk() 0 11 4
D to_utf8_string() 0 110 33
A whitespace_table() 0 3 1
A substr_count_simple() 0 31 6
A str_iends_with_any() 0 13 4
A str_substr_after_last_separator() 0 31 6
A str_isubstr_after_first_separator() 0 26 5
A json_loaded() 0 3 1
B str_snakeize() 0 57 6
A is_lowercase() 0 7 2
A str_sort() 0 16 3
A to_utf8() 0 15 3
A str_underscored() 0 3 1
A str_offset_get() 0 14 4
A strip_whitespace() 0 7 2
A str_capitalize_name() 0 8 1
B str_limit_after_word() 0 51 11
A iconv_loaded() 0 3 1
A lcwords() 0 34 6
A swapCase() 0 17 4
A filter_var() 0 15 2
A substr_ileft() 0 15 4
A is_empty() 0 3 1
B html_encode() 0 54 11
A str_dasherize() 0 3 1
D is_utf32() 0 76 18
C ord() 0 68 16
B to_string() 0 27 8
A is_alphanumeric() 0 7 2
A strtonatfold() 0 11 2
A json_decode() 0 17 3
C strcspn() 0 48 12
A fix_simple_utf8() 0 32 5
B is_json() 0 26 8
A is_printable() 0 3 1
A fixStrCaseHelper() 0 41 5
A int_to_hex() 0 7 2
C str_split_pattern() 0 54 13
D strstr() 0 107 21
A has_lowercase() 0 7 2
A json_encode() 0 13 3
A str_isubstr_first() 0 25 4
A is_base64() 0 17 5
A str_last_char() 0 16 4
A str_ireplace_beginning() 0 22 6
A hex_to_int() 0 14 3
A hex_to_chr() 0 4 1
A htmlentities() 0 28 3
A str_substr_before_first_separator() 0 32 6
F substr() 0 137 31
A wordwrap_per_line() 0 28 5
A str_surround() 0 3 1
A strncmp() 0 19 4
A filter_input_array() 0 15 3
A str_insert() 0 28 4
A getSupportInfo() 0 13 3
A replace_diamond_question_mark() 0 43 5
D is_utf8_string() 0 133 28
B str_delimit() 0 31 8
A to_utf8_convert_helper() 0 28 5
B strtoupper() 0 58 10
A min() 0 14 3
A is_punctuation() 0 3 1
C html_entity_decode() 0 58 13
A str_starts_with_any() 0 17 5
B strrichr() 0 54 11
A str_istarts_with_any() 0 17 5
B str_contains_any() 0 28 8
A initEmojiData() 0 29 4
A remove_duplicates() 0 16 4
B str_slice() 0 33 10
F strpos() 0 151 33
A str_shuffle() 0 33 6
A strcmp() 0 11 2
C file_get_contents() 0 60 12
A str_word_count() 0 23 5
A strripos_in_byte() 0 12 4
A str_to_binary() 0 10 2
A symfony_polyfill_used() 0 16 5
B str_to_words() 0 36 8

How to fix   Complexity   

Complex Class

Complex classes like UTF8 often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.

Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.

While breaking up the class, it is a good idea to analyze how other classes use UTF8, and based on these observations, apply Extract Interface, too.

1
<?php
2
3
declare(strict_types=1);
4
5
namespace voku\helper;
6
7
/**
8
 * @psalm-immutable
9
 */
10
final class UTF8
11
{
12
    /**
13
     * Bom => Byte-Length
14
     *
15
     * INFO: https://en.wikipedia.org/wiki/Byte_order_mark
16
     *
17
     * @var array<string, int>
18
     */
19
    private static $BOM = [
20
        "\xef\xbb\xbf"     => 3, // UTF-8 BOM
21
        ''              => 6, // UTF-8 BOM as "WINDOWS-1252" (one char has [maybe] more then one byte ...)
22
        "\x00\x00\xfe\xff" => 4, // UTF-32 (BE) BOM
23
        '  þÿ'             => 6, // UTF-32 (BE) BOM as "WINDOWS-1252"
24
        "\xff\xfe\x00\x00" => 4, // UTF-32 (LE) BOM
25
        'ÿþ  '             => 6, // UTF-32 (LE) BOM as "WINDOWS-1252"
26
        "\xfe\xff"         => 2, // UTF-16 (BE) BOM
27
        'þÿ'               => 4, // UTF-16 (BE) BOM as "WINDOWS-1252"
28
        "\xff\xfe"         => 2, // UTF-16 (LE) BOM
29
        'ÿþ'               => 4, // UTF-16 (LE) BOM as "WINDOWS-1252"
30
    ];
31
32
    /**
33
     * Numeric code point => UTF-8 Character
34
     *
35
     * url: http://www.w3schools.com/charsets/ref_utf_punctuation.asp
36
     *
37
     * @var array<int, string>
38
     */
39
    private static $WHITESPACE = [
40
        // NULL Byte
41
        0 => "\x0",
42
        // Tab
43
        9 => "\x9",
44
        // New Line
45
        10 => "\xa",
46
        // Vertical Tab
47
        11 => "\xb",
48
        // Carriage Return
49
        13 => "\xd",
50
        // Ordinary Space
51
        32 => "\x20",
52
        // NO-BREAK SPACE
53
        160 => "\xc2\xa0",
54
        // OGHAM SPACE MARK
55
        5760 => "\xe1\x9a\x80",
56
        // MONGOLIAN VOWEL SEPARATOR
57
        6158 => "\xe1\xa0\x8e",
58
        // EN QUAD
59
        8192 => "\xe2\x80\x80",
60
        // EM QUAD
61
        8193 => "\xe2\x80\x81",
62
        // EN SPACE
63
        8194 => "\xe2\x80\x82",
64
        // EM SPACE
65
        8195 => "\xe2\x80\x83",
66
        // THREE-PER-EM SPACE
67
        8196 => "\xe2\x80\x84",
68
        // FOUR-PER-EM SPACE
69
        8197 => "\xe2\x80\x85",
70
        // SIX-PER-EM SPACE
71
        8198 => "\xe2\x80\x86",
72
        // FIGURE SPACE
73
        8199 => "\xe2\x80\x87",
74
        // PUNCTUATION SPACE
75
        8200 => "\xe2\x80\x88",
76
        // THIN SPACE
77
        8201 => "\xe2\x80\x89",
78
        // HAIR SPACE
79
        8202 => "\xe2\x80\x8a",
80
        // LINE SEPARATOR
81
        8232 => "\xe2\x80\xa8",
82
        // PARAGRAPH SEPARATOR
83
        8233 => "\xe2\x80\xa9",
84
        // NARROW NO-BREAK SPACE
85
        8239 => "\xe2\x80\xaf",
86
        // MEDIUM MATHEMATICAL SPACE
87
        8287 => "\xe2\x81\x9f",
88
        // HALFWIDTH HANGUL FILLER
89
        65440 => "\xef\xbe\xa0",
90
        // IDEOGRAPHIC SPACE
91
        12288 => "\xe3\x80\x80",
92
    ];
93
94
    /**
95
     * @var array<string, string>
96
     */
97
    private static $WHITESPACE_TABLE = [
98
        'SPACE'                     => "\x20",
99
        'NO-BREAK SPACE'            => "\xc2\xa0",
100
        'OGHAM SPACE MARK'          => "\xe1\x9a\x80",
101
        'EN QUAD'                   => "\xe2\x80\x80",
102
        'EM QUAD'                   => "\xe2\x80\x81",
103
        'EN SPACE'                  => "\xe2\x80\x82",
104
        'EM SPACE'                  => "\xe2\x80\x83",
105
        'THREE-PER-EM SPACE'        => "\xe2\x80\x84",
106
        'FOUR-PER-EM SPACE'         => "\xe2\x80\x85",
107
        'SIX-PER-EM SPACE'          => "\xe2\x80\x86",
108
        'FIGURE SPACE'              => "\xe2\x80\x87",
109
        'PUNCTUATION SPACE'         => "\xe2\x80\x88",
110
        'THIN SPACE'                => "\xe2\x80\x89",
111
        'HAIR SPACE'                => "\xe2\x80\x8a",
112
        'LINE SEPARATOR'            => "\xe2\x80\xa8",
113
        'PARAGRAPH SEPARATOR'       => "\xe2\x80\xa9",
114
        'ZERO WIDTH SPACE'          => "\xe2\x80\x8b",
115
        'NARROW NO-BREAK SPACE'     => "\xe2\x80\xaf",
116
        'MEDIUM MATHEMATICAL SPACE' => "\xe2\x81\x9f",
117
        'IDEOGRAPHIC SPACE'         => "\xe3\x80\x80",
118
        'HALFWIDTH HANGUL FILLER'   => "\xef\xbe\xa0",
119
    ];
120
121
    /**
122
     * @var array
123
     *
124
     * @phpstan-var array{upper: string[], lower: string[]}
125
     */
126
    private static $COMMON_CASE_FOLD = [
127
        'upper' => [
128
            'µ',
129
            'ſ',
130
            "\xCD\x85",
131
            'ς',
132
            'ẞ',
133
            "\xCF\x90",
134
            "\xCF\x91",
135
            "\xCF\x95",
136
            "\xCF\x96",
137
            "\xCF\xB0",
138
            "\xCF\xB1",
139
            "\xCF\xB5",
140
            "\xE1\xBA\x9B",
141
            "\xE1\xBE\xBE",
142
        ],
143
        'lower' => [
144
            'μ',
145
            's',
146
            'ι',
147
            'σ',
148
            'ß',
149
            'β',
150
            'θ',
151
            'φ',
152
            'π',
153
            'κ',
154
            'ρ',
155
            'ε',
156
            "\xE1\xB9\xA1",
157
            'ι',
158
        ],
159
    ];
160
161
    /**
162
     * @var array
163
     *
164
     * @phpstan-var array<string, mixed>
165
     */
166
    private static $SUPPORT = [];
167
168
    /**
169
     * @var string[]|null
170
     *
171
     * @phpstan-var array<string, string>|null
172
     */
173
    private static $BROKEN_UTF8_FIX;
174
175
    /**
176
     * @var string[]|null
177
     *
178
     * @phpstan-var array<int, string>|null
179
     */
180
    private static $WIN1252_TO_UTF8;
181
182
    /**
183
     * @var string[]|null
184
     *
185
     * @phpstan-var array<int ,string>|null
186
     */
187
    private static $INTL_TRANSLITERATOR_LIST;
188
189
    /**
190
     * @var string[]|null
191
     *
192
     * @phpstan-var array<string>|null
193
     */
194
    private static $ENCODINGS;
195
196
    /**
197
     * @var int[]|null
198
     *
199
     * @phpstan-var array<string ,int>|null
200
     */
201
    private static $ORD;
202
203
    /**
204
     * @var string[]|null
205
     *
206
     * @phpstan-var array<string, string>|null
207
     */
208
    private static $EMOJI;
209
210
    /**
211
     * @var string[]|null
212
     *
213
     * @phpstan-var array<string>|null
214
     */
215
    private static $EMOJI_VALUES_CACHE;
216
217
    /**
218
     * @var string[]|null
219
     *
220
     * @phpstan-var array<string>|null
221
     */
222
    private static $EMOJI_KEYS_CACHE;
223
224
    /**
225
     * @var string[]|null
226
     *
227
     * @phpstan-var array<string>|null
228
     */
229
    private static $EMOJI_KEYS_REVERSIBLE_CACHE;
230
231
    /**
232
     * @var string[]|null
233
     *
234
     * @phpstan-var array<int, string>|null
235
     */
236
    private static $CHR;
237
238
    /**
239
     * __construct()
240
     */
241 34
    public function __construct()
242
    {
243 34
    }
244
245
    /**
246
     * Return the character at the specified position: $str[1] like functionality.
247
     *
248
     * EXAMPLE: <code>UTF8::access('fòô', 1); // 'ò'</code>
249
     *
250
     * @param string $str      <p>A UTF-8 string.</p>
251
     * @param int    $pos      <p>The position of character to return.</p>
252
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
253
     *
254
     * @psalm-pure
255
     *
256
     * @return string
257
     *                <p>Single multi-byte character.</p>
258
     */
259 3
    public static function access(string $str, int $pos, string $encoding = 'UTF-8'): string
260
    {
261 3
        if ($str === '' || $pos < 0) {
262 2
            return '';
263
        }
264
265 3
        if ($encoding === 'UTF-8') {
266 3
            return (string) \mb_substr($str, $pos, 1);
267
        }
268
269
        return (string) self::substr($str, $pos, 1, $encoding);
270
    }
271
272
    /**
273
     * Prepends UTF-8 BOM character to the string and returns the whole string.
274
     *
275
     * INFO: If BOM already existed there, the Input string is returned.
276
     *
277
     * EXAMPLE: <code>UTF8::add_bom_to_string('fòô'); // "\xEF\xBB\xBF" . 'fòô'</code>
278
     *
279
     * @param string $str <p>The input string.</p>
280
     *
281
     * @psalm-pure
282
     *
283
     * @return string
284
     *                <p>The output string that contains BOM.</p>
285
     */
286 2
    public static function add_bom_to_string(string $str): string
287
    {
288 2
        if (!self::string_has_bom($str)) {
289 2
            $str = self::bom() . $str;
290
        }
291
292 2
        return $str;
293
    }
294
295
    /**
296
     * Changes all keys in an array.
297
     *
298
     * @param array<string, mixed> $array    <p>The array to work on</p>
299
     * @param int                  $case     [optional] <p> Either <strong>CASE_UPPER</strong><br>
300
     *                                       or <strong>CASE_LOWER</strong> (default)</p>
301
     * @param string               $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
302
     *
303
     * @psalm-pure
304
     *
305
     * @return string[]
306
     *                  <p>An array with its keys lower- or uppercased.</p>
307
     */
308 2
    public static function array_change_key_case(
309
        array $array,
310
        int $case = \CASE_LOWER,
311
        string $encoding = 'UTF-8'
312
    ): array {
313
        if (
314 2
            $case !== \CASE_LOWER
315
            &&
316 2
            $case !== \CASE_UPPER
317
        ) {
318
            $case = \CASE_LOWER;
319
        }
320
321 2
        $return = [];
322 2
        foreach ($array as $key => &$value) {
323 2
            $key = $case === \CASE_LOWER
324 2
                ? self::strtolower($key, $encoding)
325 2
                : self::strtoupper($key, $encoding);
326
327 2
            $return[$key] = $value;
328
        }
329
330 2
        return $return;
331
    }
332
333
    /**
334
     * Returns the substring between $start and $end, if found, or an empty
335
     * string. An optional offset may be supplied from which to begin the
336
     * search for the start string.
337
     *
338
     * @param string $str
339
     * @param string $start    <p>Delimiter marking the start of the substring.</p>
340
     * @param string $end      <p>Delimiter marking the end of the substring.</p>
341
     * @param int    $offset   [optional] <p>Index from which to begin the search. Default: 0</p>
342
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
343
     *
344
     * @psalm-pure
345
     *
346
     * @return string
347
     */
348 16
    public static function between(
349
        string $str,
350
        string $start,
351
        string $end,
352
        int $offset = 0,
353
        string $encoding = 'UTF-8'
354
    ): string {
355 16
        if ($encoding === 'UTF-8') {
356 8
            $start_position = \mb_strpos($str, $start, $offset);
357 8
            if ($start_position === false) {
358 1
                return '';
359
            }
360
361 7
            $substr_index = $start_position + (int) \mb_strlen($start);
362 7
            $end_position = \mb_strpos($str, $end, $substr_index);
363
            if (
364 7
                $end_position === false
365
                ||
366 7
                $end_position === $substr_index
367
            ) {
368 2
                return '';
369
            }
370
371 5
            return (string) \mb_substr($str, $substr_index, $end_position - $substr_index);
372
        }
373
374 8
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
375
376 8
        $start_position = self::strpos($str, $start, $offset, $encoding);
377 8
        if ($start_position === false) {
378 1
            return '';
379
        }
380
381 7
        $substr_index = $start_position + (int) self::strlen($start, $encoding);
382 7
        $end_position = self::strpos($str, $end, $substr_index, $encoding);
383
        if (
384 7
            $end_position === false
385
            ||
386 7
            $end_position === $substr_index
387
        ) {
388 2
            return '';
389
        }
390
391 5
        return (string) self::substr(
392 5
            $str,
393
            $substr_index,
394 5
            $end_position - $substr_index,
395
            $encoding
396
        );
397
    }
398
399
    /**
400
     * Convert binary into a string.
401
     *
402
     * INFO: opposite to UTF8::str_to_binary()
403
     *
404
     * EXAMPLE: <code>UTF8::binary_to_str('11110000100111111001100010000011'); // '😃'</code>
405
     *
406
     * @param string $bin 1|0
407
     *
408
     * @psalm-pure
409
     *
410
     * @return string
411
     */
412 2
    public static function binary_to_str($bin): string
413
    {
414 2
        if (!isset($bin[0])) {
415
            return '';
416
        }
417
418 2
        $convert = \base_convert($bin, 2, 16);
419 2
        if ($convert === '0') {
420 1
            return '';
421
        }
422
423 2
        return \pack('H*', $convert);
424
    }
425
426
    /**
427
     * Returns the UTF-8 Byte Order Mark Character.
428
     *
429
     * INFO: take a look at UTF8::$bom for e.g. UTF-16 and UTF-32 BOM values
430
     *
431
     * EXAMPLE: <code>UTF8::bom(); // "\xEF\xBB\xBF"</code>
432
     *
433
     * @psalm-pure
434
     *
435
     * @return string
436
     *                <p>UTF-8 Byte Order Mark.</p>
437
     */
438 4
    public static function bom(): string
439
    {
440 4
        return "\xef\xbb\xbf";
441
    }
442
443
    /**
444
     * @alias of UTF8::chr_map()
445
     *
446
     * @param callable $callback
447
     * @param string   $str
448
     *
449
     * @psalm-pure
450
     *
451
     * @return string[]
452
     *
453
     * @see   UTF8::chr_map()
454
     */
455 2
    public static function callback($callback, string $str): array
456
    {
457 2
        return self::chr_map($callback, $str);
458
    }
459
460
    /**
461
     * Returns the character at $index, with indexes starting at 0.
462
     *
463
     * @param string $str      <p>The input string.</p>
464
     * @param int    $index    <p>Position of the character.</p>
465
     * @param string $encoding [optional] <p>Default is UTF-8</p>
466
     *
467
     * @psalm-pure
468
     *
469
     * @return string
470
     *                <p>The character at $index.</p>
471
     */
472 9
    public static function char_at(string $str, int $index, string $encoding = 'UTF-8'): string
473
    {
474 9
        if ($encoding === 'UTF-8') {
475 5
            return (string) \mb_substr($str, $index, 1);
476
        }
477
478 4
        return (string) self::substr($str, $index, 1, $encoding);
479
    }
480
481
    /**
482
     * Returns an array consisting of the characters in the string.
483
     *
484
     * @param string $str <p>The input string.</p>
485
     *
486
     * @psalm-pure
487
     *
488
     * @return string[]
489
     *                  <p>An array of chars.</p>
490
     */
491 4
    public static function chars(string $str): array
492
    {
493
        /** @var string[] */
494 4
        return self::str_split($str);
0 ignored issues
show
Bug Best Practice introduced by
The expression return self::str_split($str) returns an array which contains values of type string[] which are incompatible with the documented value type string.
Loading history...
495
    }
496
497
    /**
498
     * This method will auto-detect your server environment for UTF-8 support.
499
     *
500
     * @return true|null
501
     *
502
     * @internal <p>You don't need to run it manually, it will be triggered if it's needed.</p>
503
     */
504 4
    public static function checkForSupport()
505
    {
506 4
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
507
            self::$SUPPORT['already_checked_via_portable_utf8'] = true;
508
509
            // http://php.net/manual/en/book.mbstring.php
510
            self::$SUPPORT['mbstring'] = self::mbstring_loaded();
511
512
            self::$SUPPORT['mbstring_func_overload'] = self::mbstring_overloaded();
513
            if (self::$SUPPORT['mbstring'] === true) {
514
                \mb_internal_encoding('UTF-8');
515
                \mb_regex_encoding('UTF-8');
516
                self::$SUPPORT['mbstring_internal_encoding'] = 'UTF-8';
517
            }
518
519
            // http://php.net/manual/en/book.iconv.php
520
            self::$SUPPORT['iconv'] = self::iconv_loaded();
521
522
            // http://php.net/manual/en/book.intl.php
523
            self::$SUPPORT['intl'] = self::intl_loaded();
524
525
            // http://php.net/manual/en/class.intlchar.php
526
            self::$SUPPORT['intlChar'] = self::intlChar_loaded();
527
528
            // http://php.net/manual/en/book.ctype.php
529
            self::$SUPPORT['ctype'] = self::ctype_loaded();
530
531
            // http://php.net/manual/en/class.finfo.php
532
            self::$SUPPORT['finfo'] = self::finfo_loaded();
533
534
            // http://php.net/manual/en/book.json.php
535
            self::$SUPPORT['json'] = self::json_loaded();
536
537
            // http://php.net/manual/en/book.pcre.php
538
            self::$SUPPORT['pcre_utf8'] = self::pcre_utf8_support();
539
540
            self::$SUPPORT['symfony_polyfill_used'] = self::symfony_polyfill_used();
541
            if (self::$SUPPORT['symfony_polyfill_used'] === true) {
542
                \mb_internal_encoding('UTF-8');
543
                self::$SUPPORT['mbstring_internal_encoding'] = 'UTF-8';
544
            }
545
546
            return true;
547
        }
548
549 4
        return null;
550
    }
551
552
    /**
553
     * Generates a UTF-8 encoded character from the given code point.
554
     *
555
     * INFO: opposite to UTF8::ord()
556
     *
557
     * EXAMPLE: <code>UTF8::chr(0x2603); // '☃'</code>
558
     *
559
     * @param int    $code_point <p>The code point for which to generate a character.</p>
560
     * @param string $encoding   [optional] <p>Default is UTF-8</p>
561
     *
562
     * @psalm-pure
563
     *
564
     * @return string|null
565
     *                     <p>Multi-byte character, returns null on failure or empty input.</p>
566
     */
567 21
    public static function chr($code_point, string $encoding = 'UTF-8')
568
    {
569
        // init
570
        /**
571
         * @psalm-suppress ImpureStaticVariable
572
         *
573
         * @var array<string,string>
574
         */
575 21
        static $CHAR_CACHE = [];
576
577 21
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
578 5
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
579
        }
580
581
        if (
582 21
            $encoding !== 'UTF-8'
583
            &&
584 21
            $encoding !== 'ISO-8859-1'
585
            &&
586 21
            $encoding !== 'WINDOWS-1252'
587
            &&
588 21
            self::$SUPPORT['mbstring'] === false
589
        ) {
590
            /**
591
             * @psalm-suppress ImpureFunctionCall - is is only a warning
592
             */
593
            \trigger_error('UTF8::chr() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
594
        }
595
596 21
        if (!\is_int($code_point) || $code_point <= 0) {
0 ignored issues
show
introduced by
The condition is_int($code_point) is always true.
Loading history...
597 5
            return null;
598
        }
599
600 21
        $cache_key = $code_point . '_' . $encoding;
601 21
        if (isset($CHAR_CACHE[$cache_key])) {
602 19
            return $CHAR_CACHE[$cache_key];
603
        }
604
605 10
        if ($code_point <= 0x80) { // only for "simple"-chars
606
607 9
            if (self::$CHR === null) {
608 1
                self::$CHR = self::getData('chr');
609
            }
610
611
            /**
612
             * @psalm-suppress PossiblyNullArrayAccess
613
             */
614 9
            $chr = self::$CHR[$code_point];
615
616 9
            if ($encoding !== 'UTF-8') {
617 1
                $chr = self::encode($encoding, $chr);
618
            }
619
620 9
            return $CHAR_CACHE[$cache_key] = $chr;
621
        }
622
623
        //
624
        // fallback via "IntlChar"
625
        //
626
627 6
        if (self::$SUPPORT['intlChar'] === true) {
628 6
            $chr = \IntlChar::chr($code_point);
629
630 6
            if ($encoding !== 'UTF-8') {
631
                $chr = self::encode($encoding, $chr);
632
            }
633
634 6
            return $CHAR_CACHE[$cache_key] = $chr;
635
        }
636
637
        //
638
        // fallback via vanilla php
639
        //
640
641
        if (self::$CHR === null) {
642
            self::$CHR = self::getData('chr');
643
        }
644
645
        $code_point = (int) $code_point;
646
        if ($code_point <= 0x7FF) {
647
            /**
648
             * @psalm-suppress PossiblyNullArrayAccess
649
             */
650
            $chr = self::$CHR[($code_point >> 6) + 0xC0] .
651
                   self::$CHR[($code_point & 0x3F) + 0x80];
652
        } elseif ($code_point <= 0xFFFF) {
653
            /**
654
             * @psalm-suppress PossiblyNullArrayAccess
655
             */
656
            $chr = self::$CHR[($code_point >> 12) + 0xE0] .
657
                   self::$CHR[(($code_point >> 6) & 0x3F) + 0x80] .
658
                   self::$CHR[($code_point & 0x3F) + 0x80];
659
        } else {
660
            /**
661
             * @psalm-suppress PossiblyNullArrayAccess
662
             */
663
            $chr = self::$CHR[($code_point >> 18) + 0xF0] .
664
                   self::$CHR[(($code_point >> 12) & 0x3F) + 0x80] .
665
                   self::$CHR[(($code_point >> 6) & 0x3F) + 0x80] .
666
                   self::$CHR[($code_point & 0x3F) + 0x80];
667
        }
668
669
        if ($encoding !== 'UTF-8') {
670
            $chr = self::encode($encoding, $chr);
671
        }
672
673
        return $CHAR_CACHE[$cache_key] = $chr;
674
    }
675
676
    /**
677
     * Applies callback to all characters of a string.
678
     *
679
     * EXAMPLE: <code>UTF8::chr_map([UTF8::class, 'strtolower'], 'Κόσμε'); // ['κ','ό', 'σ', 'μ', 'ε']</code>
680
     *
681
     * @param callable $callback <p>The callback function.</p>
682
     * @param string   $str      <p>UTF-8 string to run callback on.</p>
683
     *
684
     * @psalm-pure
685
     *
686
     * @return string[]
687
     *                  <p>The outcome of the callback, as array.</p>
688
     */
689 2
    public static function chr_map($callback, string $str): array
690
    {
691 2
        return \array_map(
692 2
            $callback,
693 2
            self::str_split($str)
694
        );
695
    }
696
697
    /**
698
     * Generates an array of byte length of each character of a Unicode string.
699
     *
700
     * 1 byte => U+0000  - U+007F
701
     * 2 byte => U+0080  - U+07FF
702
     * 3 byte => U+0800  - U+FFFF
703
     * 4 byte => U+10000 - U+10FFFF
704
     *
705
     * EXAMPLE: <code>UTF8::chr_size_list('中文空白-test'); // [3, 3, 3, 3, 1, 1, 1, 1, 1]</code>
706
     *
707
     * @param string $str <p>The original unicode string.</p>
708
     *
709
     * @psalm-pure
710
     *
711
     * @return int[]
712
     *               <p>An array of byte lengths of each character.</p>
713
     */
714 4
    public static function chr_size_list(string $str): array
715
    {
716 4
        if ($str === '') {
717 4
            return [];
718
        }
719
720 4
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
721
            return \array_map(
722
                static function (string $data): int {
723
                    // "mb_" is available if overload is used, so use it ...
724
                    return \mb_strlen($data, 'CP850'); // 8-BIT
725
                },
726
                self::str_split($str)
727
            );
728
        }
729
730 4
        return \array_map('\strlen', self::str_split($str));
731
    }
732
733
    /**
734
     * Get a decimal code representation of a specific character.
735
     *
736
     * INFO: opposite to UTF8::decimal_to_chr()
737
     *
738
     * EXAMPLE: <code>UTF8::chr_to_decimal('§'); // 0xa7</code>
739
     *
740
     * @param string $char <p>The input character.</p>
741
     *
742
     * @psalm-pure
743
     *
744
     * @return int
745
     */
746 5
    public static function chr_to_decimal(string $char): int
747
    {
748 5
        if (self::$SUPPORT['iconv'] === true) {
749 5
            $chr_tmp = \iconv('UTF-8', 'UCS-4LE', $char);
750 5
            if ($chr_tmp !== false) {
751
                /** @phpstan-ignore-next-line - "unpack": only false if the format string contains errors */
752 5
                return \unpack('V', $chr_tmp)[1];
753
            }
754
        }
755
756
        $code = self::ord($char[0]);
757
        $bytes = 1;
758
759
        if (!($code & 0x80)) {
760
            // 0xxxxxxx
761
            return $code;
762
        }
763
764
        if (($code & 0xe0) === 0xc0) {
765
            // 110xxxxx
766
            $bytes = 2;
767
            $code &= ~0xc0;
768
        } elseif (($code & 0xf0) === 0xe0) {
769
            // 1110xxxx
770
            $bytes = 3;
771
            $code &= ~0xe0;
772
        } elseif (($code & 0xf8) === 0xf0) {
773
            // 11110xxx
774
            $bytes = 4;
775
            $code &= ~0xf0;
776
        }
777
778
        for ($i = 2; $i <= $bytes; ++$i) {
779
            // 10xxxxxx
780
            $code = ($code << 6) + (self::ord($char[$i - 1]) & ~0x80);
781
        }
782
783
        return $code;
784
    }
785
786
    /**
787
     * Get hexadecimal code point (U+xxxx) of a UTF-8 encoded character.
788
     *
789
     * EXAMPLE: <code>UTF8::chr_to_hex('§'); // U+00a7</code>
790
     *
791
     * @param int|string $char   <p>The input character</p>
792
     * @param string     $prefix [optional]
793
     *
794
     * @psalm-pure
795
     *
796
     * @return string
797
     *                <p>The code point encoded as U+xxxx.</p>
798
     */
799 2
    public static function chr_to_hex($char, string $prefix = 'U+'): string
800
    {
801 2
        if ($char === '') {
802 2
            return '';
803
        }
804
805 2
        if ($char === '&#0;') {
806
            $char = '';
807
        }
808
809 2
        return self::int_to_hex(self::ord((string) $char), $prefix);
810
    }
811
812
    /**
813
     * Splits a string into smaller chunks and multiple lines, using the specified line ending character.
814
     *
815
     * EXAMPLE: <code>UTF8::chunk_split('ABC-ÖÄÜ-中文空白-κόσμε', 3); // "ABC\r\n-ÖÄ\r\nÜ-中\r\n文空白\r\n-κό\r\nσμε"</code>
816
     *
817
     * @param string $body         <p>The original string to be split.</p>
818
     * @param int    $chunk_length [optional] <p>The maximum character length of a chunk.</p>
819
     * @param string $end          [optional] <p>The character(s) to be inserted at the end of each chunk.</p>
820
     *
821
     * @psalm-pure
822
     *
823
     * @return string
824
     *                <p>The chunked string.</p>
825
     */
826 4
    public static function chunk_split(string $body, int $chunk_length = 76, string $end = "\r\n"): string
827
    {
828 4
        return \implode($end, self::str_split($body, $chunk_length));
829
    }
830
831
    /**
832
     * Accepts a string and removes all non-UTF-8 characters from it + extras if needed.
833
     *
834
     * EXAMPLE: <code>UTF8::clean("\xEF\xBB\xBF„Abcdef\xc2\xa0\x20…” — 😃 - Düsseldorf", true, true); // '„Abcdef  …” — 😃 - Düsseldorf'</code>
835
     *
836
     * @param string $str                                     <p>The string to be sanitized.</p>
837
     * @param bool   $remove_bom                              [optional] <p>Set to true, if you need to remove
838
     *                                                        UTF-BOM.</p>
839
     * @param bool   $normalize_whitespace                    [optional] <p>Set to true, if you need to normalize the
840
     *                                                        whitespace.</p>
841
     * @param bool   $normalize_msword                        [optional] <p>Set to true, if you need to normalize MS
842
     *                                                        Word chars e.g.: "…"
843
     *                                                        => "..."</p>
844
     * @param bool   $keep_non_breaking_space                 [optional] <p>Set to true, to keep non-breaking-spaces,
845
     *                                                        in
846
     *                                                        combination with
847
     *                                                        $normalize_whitespace</p>
848
     * @param bool   $replace_diamond_question_mark           [optional] <p>Set to true, if you need to remove diamond
849
     *                                                        question mark e.g.: "�"</p>
850
     * @param bool   $remove_invisible_characters             [optional] <p>Set to false, if you not want to remove
851
     *                                                        invisible characters e.g.: "\0"</p>
852
     * @param bool   $remove_invisible_characters_url_encoded [optional] <p>Set to true, if you not want to remove
853
     *                                                        invisible url encoded characters e.g.: "%0B"<br> WARNING:
854
     *                                                        maybe contains false-positives e.g. aa%0Baa -> aaaa.
855
     *                                                        </p>
856
     *
857
     * @psalm-pure
858
     *
859
     * @return string
860
     *                <p>An clean UTF-8 encoded string.</p>
861
     */
862 94
    public static function clean(
863
        string $str,
864
        bool $remove_bom = false,
865
        bool $normalize_whitespace = false,
866
        bool $normalize_msword = false,
867
        bool $keep_non_breaking_space = false,
868
        bool $replace_diamond_question_mark = false,
869
        bool $remove_invisible_characters = true,
870
        bool $remove_invisible_characters_url_encoded = false
871
    ): string {
872
        // http://stackoverflow.com/questions/1401317/remove-non-utf8-characters-from-string
873
        // caused connection reset problem on larger strings
874
875 94
        $regex = '/
876
          (
877
            (?: [\x00-\x7F]               # single-byte sequences   0xxxxxxx
878
            |   [\xC0-\xDF][\x80-\xBF]    # double-byte sequences   110xxxxx 10xxxxxx
879
            |   [\xE0-\xEF][\x80-\xBF]{2} # triple-byte sequences   1110xxxx 10xxxxxx * 2
880
            |   [\xF0-\xF7][\x80-\xBF]{3} # quadruple-byte sequence 11110xxx 10xxxxxx * 3
881
            ){1,100}                      # ...one or more times
882
          )
883
        | ( [\x80-\xBF] )                 # invalid byte in range 10000000 - 10111111
884
        | ( [\xC0-\xFF] )                 # invalid byte in range 11000000 - 11111111
885
        /x';
886 94
        $str = (string) \preg_replace($regex, '$1', $str);
887
888 94
        if ($replace_diamond_question_mark) {
889 33
            $str = self::replace_diamond_question_mark($str);
890
        }
891
892 94
        if ($remove_invisible_characters) {
893 94
            $str = self::remove_invisible_characters($str, $remove_invisible_characters_url_encoded);
894
        }
895
896 94
        if ($normalize_whitespace) {
897 37
            $str = self::normalize_whitespace($str, $keep_non_breaking_space);
898
        }
899
900 94
        if ($normalize_msword) {
901 4
            $str = self::normalize_msword($str);
902
        }
903
904 94
        if ($remove_bom) {
905 37
            $str = self::remove_bom($str);
906
        }
907
908 94
        return $str;
909
    }
910
911
    /**
912
     * Clean-up a string and show only printable UTF-8 chars at the end  + fix UTF-8 encoding.
913
     *
914
     * EXAMPLE: <code>UTF8::cleanup("\xEF\xBB\xBF„Abcdef\xc2\xa0\x20…” — 😃 - Düsseldorf", true, true); // '„Abcdef  …” — 😃 - Düsseldorf'</code>
915
     *
916
     * @param string $str <p>The input string.</p>
917
     *
918
     * @psalm-pure
919
     *
920
     * @return string
921
     */
922 33
    public static function cleanup($str): string
923
    {
924
        // init
925 33
        $str = (string) $str;
926
927 33
        if ($str === '') {
928 5
            return '';
929
        }
930
931
        // fixed ISO <-> UTF-8 Errors
932 33
        $str = self::fix_simple_utf8($str);
933
934
        // remove all none UTF-8 symbols
935
        // && remove diamond question mark (�)
936
        // && remove remove invisible characters (e.g. "\0")
937
        // && remove BOM
938
        // && normalize whitespace chars (but keep non-breaking-spaces)
939 33
        return self::clean(
940 33
            $str,
941 33
            true,
942 33
            true,
943 33
            false,
944 33
            true,
945 33
            true
946
        );
947
    }
948
949
    /**
950
     * Accepts a string or a array of strings and returns an array of Unicode code points.
951
     *
952
     * INFO: opposite to UTF8::string()
953
     *
954
     * EXAMPLE: <code>
955
     * UTF8::codepoints('κöñ'); // array(954, 246, 241)
956
     * // ... OR ...
957
     * UTF8::codepoints('κöñ', true); // array('U+03ba', 'U+00f6', 'U+00f1')
958
     * </code>
959
     *
960
     * @param string|string[] $arg         <p>A UTF-8 encoded string or an array of such strings.</p>
961
     * @param bool            $use_u_style <p>If True, will return code points in U+xxxx format,
962
     *                                     default, code points will be returned as integers.</p>
963
     *
964
     * @psalm-pure
965
     *
966
     * @return int[]|string[]
967
     *                        <p>
968
     *                        The array of code points:<br>
969
     *                        int[] for $u_style === false<br>
970
     *                        string[] for $u_style === true<br>
971
     *                        </p>
972
     */
973 12
    public static function codepoints($arg, bool $use_u_style = false): array
974
    {
975 12
        if (\is_string($arg)) {
976 12
            $arg = self::str_split($arg);
977
        }
978
979
        /**
980
         * @psalm-suppress DocblockTypeContradiction
981
         */
982 12
        if (!\is_array($arg)) {
0 ignored issues
show
introduced by
The condition is_array($arg) is always true.
Loading history...
983 4
            return [];
984
        }
985
986 12
        if ($arg === []) {
987 7
            return [];
988
        }
989
990 11
        $arg = \array_map(
991
            [
992 11
                self::class,
993
                'ord',
994
            ],
995 11
            $arg
996
        );
997
998 11
        if ($use_u_style) {
999 2
            $arg = \array_map(
1000
                [
1001 2
                    self::class,
1002
                    'int_to_hex',
1003
                ],
1004 2
                $arg
1005
            );
1006
        }
1007
1008 11
        return $arg;
1009
    }
1010
1011
    /**
1012
     * Trims the string and replaces consecutive whitespace characters with a
1013
     * single space. This includes tabs and newline characters, as well as
1014
     * multibyte whitespace such as the thin space and ideographic space.
1015
     *
1016
     * @param string $str <p>The input string.</p>
1017
     *
1018
     * @psalm-pure
1019
     *
1020
     * @return string
1021
     *                <p>A string with trimmed $str and condensed whitespace.</p>
1022
     */
1023 13
    public static function collapse_whitespace(string $str): string
1024
    {
1025 13
        if (self::$SUPPORT['mbstring'] === true) {
1026 13
            return \trim((string) \mb_ereg_replace('[[:space:]]+', ' ', $str));
1027
        }
1028
1029
        return \trim(self::regex_replace($str, '[[:space:]]+', ' '));
1030
    }
1031
1032
    /**
1033
     * Returns count of characters used in a string.
1034
     *
1035
     * EXAMPLE: <code>UTF8::count_chars('κaκbκc'); // array('κ' => 3, 'a' => 1, 'b' => 1, 'c' => 1)</code>
1036
     *
1037
     * @param string $str                     <p>The input string.</p>
1038
     * @param bool   $clean_utf8              [optional] <p>Remove non UTF-8 chars from the string.</p>
1039
     * @param bool   $try_to_use_mb_functions [optional] <p>Set to false, if you don't want to use
1040
     *
1041
     * @psalm-pure
1042
     *
1043
     * @return int[]
1044
     *               <p>An associative array of Character as keys and
1045
     *               their count as values.</p>
1046
     */
1047 25
    public static function count_chars(
1048
        string $str,
1049
        bool $clean_utf8 = false,
1050
        bool $try_to_use_mb_functions = true
1051
    ): array {
1052 25
        return \array_count_values(
1053 25
            self::str_split(
1054 25
                $str,
1055 25
                1,
1056
                $clean_utf8,
1057
                $try_to_use_mb_functions
1058
            )
1059
        );
1060
    }
1061
1062
    /**
1063
     * Create a valid CSS identifier for e.g. "class"- or "id"-attributes.
1064
     *
1065
     * EXAMPLE: <code>UTF8::css_identifier('123foo/bar!!!'); // _23foo-bar</code>
1066
     *
1067
     * copy&past from https://github.com/drupal/core/blob/8.8.x/lib/Drupal/Component/Utility/Html.php#L95
1068
     *
1069
     * @param string   $str        <p>INFO: if no identifier is given e.g. " " or "", we will create a unique string automatically</p>
1070
     * @param string[] $filter
1071
     * @param bool     $strip_tags
1072
     * @param bool     $strtolower
1073
     *
1074
     * @psalm-pure
1075
     *
1076
     * @return string
1077
     *
1078
     * @phpstan-param array<string,string> $filter
1079
     */
1080 1
    public static function css_identifier(
1081
        string $str = '',
1082
        array $filter = [
1083
            ' ' => '-',
1084
            '/' => '-',
1085
            '[' => '',
1086
            ']' => '',
1087
        ],
1088
        bool $strip_tags = false,
1089
        bool $strtolower = true
1090
    ): string {
1091
        // We could also use strtr() here but its much slower than str_replace(). In
1092
        // order to keep '__' to stay '__' we first replace it with a different
1093
        // placeholder after checking that it is not defined as a filter.
1094 1
        $double_underscore_replacements = 0;
1095
1096
        // Fallback ...
1097 1
        if (\trim($str) === '') {
1098 1
            $str = \uniqid('auto-generated-css-class', true);
1099
        } else {
1100 1
            $str = self::clean($str);
1101
        }
1102
1103 1
        if ($strip_tags) {
1104
            $str = \strip_tags($str);
1105
        }
1106
1107 1
        if ($strtolower) {
1108 1
            $str = \strtolower($str);
1109
        }
1110
1111 1
        if (!isset($filter['__'])) {
1112 1
            $str = \str_replace('__', '##', $str, $double_underscore_replacements);
1113
        }
1114
1115 1
        $str = \str_replace(\array_keys($filter), \array_values($filter), $str);
1116
        // Replace temporary placeholder '##' with '__' only if the original
1117
        // $identifier contained '__'.
1118 1
        if ($double_underscore_replacements > 0) {
1119
            $str = \str_replace('##', '__', $str);
1120
        }
1121
1122
        // Valid characters in a CSS identifier are:
1123
        // - the hyphen (U+002D)
1124
        // - a-z (U+0030 - U+0039)
1125
        // - A-Z (U+0041 - U+005A)
1126
        // - the underscore (U+005F)
1127
        // - 0-9 (U+0061 - U+007A)
1128
        // - ISO 10646 characters U+00A1 and higher
1129
        // We strip out any character not in the above list.
1130 1
        $str = (string) \preg_replace('/[^\x{002D}\x{0030}-\x{0039}\x{0041}-\x{005A}\x{005F}\x{0061}-\x{007A}\x{00A1}-\x{FFFF}]/u', '', $str);
1131
        // Identifiers cannot start with a digit, two hyphens, or a hyphen followed by a digit.
1132 1
        $str = (string) \preg_replace(['/^[0-9]/', '/^(-[0-9])|^(--)/'], ['_', '__'], $str);
1133
1134 1
        return \trim($str, '-');
1135
    }
1136
1137
    /**
1138
     * Remove css media-queries.
1139
     *
1140
     * @param string $str
1141
     *
1142
     * @psalm-pure
1143
     *
1144
     * @return string
1145
     */
1146 1
    public static function css_stripe_media_queries(string $str): string
1147
    {
1148 1
        return (string) \preg_replace(
1149 1
            '#@media\\s+(?:only\\s)?(?:[\\s{(]|screen|all)\\s?[^{]+{.*}\\s*}\\s*#isumU',
1150 1
            '',
1151 1
            $str
1152
        );
1153
    }
1154
1155
    /**
1156
     * Checks whether ctype is available on the server.
1157
     *
1158
     * @psalm-pure
1159
     *
1160
     * @return bool
1161
     *              <p><strong>true</strong> if available, <strong>false</strong> otherwise</p>
1162
     *
1163
     * @internal <p>Please do not use it anymore, we will make is private in next major version.</p>
1164
     */
1165
    public static function ctype_loaded(): bool
1166
    {
1167
        return \extension_loaded('ctype');
1168
    }
1169
1170
    /**
1171
     * Converts an int value into a UTF-8 character.
1172
     *
1173
     * INFO: opposite to UTF8::string()
1174
     *
1175
     * EXAMPLE: <code>UTF8::decimal_to_chr(931); // 'Σ'</code>
1176
     *
1177
     * @param int|string $int
1178
     *
1179
     * @phpstan-param int|numeric-string $int
1180
     *
1181
     * @psalm-pure
1182
     *
1183
     * @return string
1184
     */
1185 20
    public static function decimal_to_chr($int): string
1186
    {
1187
        // We cannot use html_entity_decode() here, as it will not return
1188
        // characters for many values < 160.
1189 20
        return mb_convert_encoding('&#' . $int . ';', 'UTF-8', 'HTML-ENTITIES');
0 ignored issues
show
Bug Best Practice introduced by
The expression return mb_convert_encodi...TF-8', 'HTML-ENTITIES') could return the type array which is incompatible with the type-hinted return string. Consider adding an additional type-check to rule them out.
Loading history...
1190
    }
1191
1192
    /**
1193
     * Decodes a MIME header field
1194
     *
1195
     * @param string $str
1196
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
1197
     *
1198
     * @psalm-pure
1199
     *
1200
     * @return false|string
1201
     *                      <p>A decoded MIME field on success,
1202
     *                      or false if an error occurs during the decoding.</p>
1203
     */
1204 2
    public static function decode_mimeheader($str, string $encoding = 'UTF-8')
1205
    {
1206 2
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
1207 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
1208
        }
1209
1210
        // always fallback via symfony polyfill
1211 2
        return \iconv_mime_decode($str, \ICONV_MIME_DECODE_CONTINUE_ON_ERROR, $encoding);
1212
    }
1213
1214
    /**
1215
     * Convert any two-letter country code (ISO 3166-1) to the corresponding Emoji.
1216
     *
1217
     * @see https://en.wikipedia.org/wiki/ISO_3166-1
1218
     *
1219
     * @param string $country_code_iso_3166_1 <p>e.g. DE</p>
1220
     *
1221
     * @return string
1222
     *                <p>Emoji or empty string on error.</p>
1223
     */
1224 1
    public static function emoji_from_country_code(string $country_code_iso_3166_1): string
1225
    {
1226 1
        if ($country_code_iso_3166_1 === '') {
1227 1
            return '';
1228
        }
1229
1230 1
        if (self::strlen($country_code_iso_3166_1) !== 2) {
1231 1
            return '';
1232
        }
1233
1234 1
        $country_code_iso_3166_1 = \strtoupper($country_code_iso_3166_1);
1235
1236 1
        $flagOffset = 0x1F1E6;
1237 1
        $asciiOffset = 0x41;
1238
1239 1
        return (self::chr((self::ord($country_code_iso_3166_1[0]) - $asciiOffset + $flagOffset)) ?? '') .
1240 1
               (self::chr((self::ord($country_code_iso_3166_1[1]) - $asciiOffset + $flagOffset)) ?? '');
1241
    }
1242
1243
    /**
1244
     * Decodes a string which was encoded by "UTF8::emoji_encode()".
1245
     *
1246
     * INFO: opposite to UTF8::emoji_encode()
1247
     *
1248
     * EXAMPLE: <code>
1249
     * UTF8::emoji_decode('foo CHARACTER_OGRE', false); // 'foo 👹'
1250
     * //
1251
     * UTF8::emoji_decode('foo _-_PORTABLE_UTF8_-_308095726_-_627590803_-_8FTU_ELBATROP_-_', true); // 'foo 👹'
1252
     * </code>
1253
     *
1254
     * @param string $str                            <p>The input string.</p>
1255
     * @param bool   $use_reversible_string_mappings [optional] <p>
1256
     *                                               When <b>TRUE</b>, we se a reversible string mapping
1257
     *                                               between "emoji_encode" and "emoji_decode".</p>
1258
     *
1259
     * @psalm-pure
1260
     *
1261
     * @return string
1262
     */
1263 9
    public static function emoji_decode(
1264
        string $str,
1265
        bool $use_reversible_string_mappings = false
1266
    ): string {
1267 9
        if (self::$EMOJI_KEYS_CACHE === null) {
1268
            /** @phpstan-ignore-next-line - we need to load the data first */
1269
            self::initEmojiData();
1270
        }
1271
1272 9
        if ($use_reversible_string_mappings) {
1273 9
            return (string) \str_replace(
1274 9
                (array) self::$EMOJI_KEYS_REVERSIBLE_CACHE,
1275 9
                (array) self::$EMOJI_VALUES_CACHE,
1276 9
                $str
1277
            );
1278
        }
1279
1280 1
        return (string) \str_replace(
1281 1
            (array) self::$EMOJI_KEYS_CACHE,
1282 1
            (array) self::$EMOJI_VALUES_CACHE,
1283 1
            $str
1284
        );
1285
    }
1286
1287
    /**
1288
     * Encode a string with emoji chars into a non-emoji string.
1289
     *
1290
     * INFO: opposite to UTF8::emoji_decode()
1291
     *
1292
     * EXAMPLE: <code>
1293
     * UTF8::emoji_encode('foo 👹', false)); // 'foo CHARACTER_OGRE'
1294
     * //
1295
     * UTF8::emoji_encode('foo 👹', true)); // 'foo _-_PORTABLE_UTF8_-_308095726_-_627590803_-_8FTU_ELBATROP_-_'
1296
     * </code>
1297
     *
1298
     * @param string $str                            <p>The input string</p>
1299
     * @param bool   $use_reversible_string_mappings [optional] <p>
1300
     *                                               when <b>TRUE</b>, we use a reversible string mapping
1301
     *                                               between "emoji_encode" and "emoji_decode"</p>
1302
     *
1303
     * @psalm-pure
1304
     *
1305
     * @return string
1306
     */
1307 12
    public static function emoji_encode(
1308
        string $str,
1309
        bool $use_reversible_string_mappings = false
1310
    ): string {
1311 12
        if (self::$EMOJI_KEYS_CACHE === null) {
1312
            /** @phpstan-ignore-next-line - we need to load the data first */
1313 1
            self::initEmojiData();
1314
        }
1315
1316 12
        if ($use_reversible_string_mappings) {
1317 9
            return (string) \str_replace(
1318 9
                (array) self::$EMOJI_VALUES_CACHE,
1319 9
                (array) self::$EMOJI_KEYS_REVERSIBLE_CACHE,
1320 9
                $str
1321
            );
1322
        }
1323
1324 4
        return (string) \str_replace(
1325 4
            (array) self::$EMOJI_VALUES_CACHE,
1326 4
            (array) self::$EMOJI_KEYS_CACHE,
1327 4
            $str
1328
        );
1329
    }
1330
1331
    /**
1332
     * Encode a string with a new charset-encoding.
1333
     *
1334
     * INFO:  This function will also try to fix broken / double encoding,
1335
     *        so you can call this function also on a UTF-8 string and you don't mess up the string.
1336
     *
1337
     * EXAMPLE: <code>
1338
     * UTF8::encode('ISO-8859-1', '-ABC-中文空白-'); // '-ABC-????-'
1339
     * //
1340
     * UTF8::encode('UTF-8', '-ABC-中文空白-'); // '-ABC-中文空白-'
1341
     * //
1342
     * UTF8::encode('HTML', '-ABC-中文空白-'); // '-ABC-&#20013;&#25991;&#31354;&#30333;-'
1343
     * //
1344
     * UTF8::encode('BASE64', '-ABC-中文空白-'); // 'LUFCQy3kuK3mlofnqbrnmb0t'
1345
     * </code>
1346
     *
1347
     * @param string $to_encoding                   <p>e.g. 'UTF-16', 'UTF-8', 'ISO-8859-1', etc.</p>
1348
     * @param string $str                           <p>The input string</p>
1349
     * @param bool   $auto_detect_the_from_encoding [optional] <p>Force the new encoding (we try to fix broken / double
1350
     *                                              encoding for UTF-8)<br> otherwise we auto-detect the current
1351
     *                                              string-encoding</p>
1352
     * @param string $from_encoding                 [optional] <p>e.g. 'UTF-16', 'UTF-8', 'ISO-8859-1', etc.<br>
1353
     *                                              A empty string will trigger the autodetect anyway.</p>
1354
     *
1355
     * @psalm-pure
1356
     *
1357
     * @return string
1358
     *
1359
     * @psalm-suppress InvalidReturnStatement
1360
     */
1361 28
    public static function encode(
1362
        string $to_encoding,
1363
        string $str,
1364
        bool $auto_detect_the_from_encoding = true,
1365
        string $from_encoding = ''
1366
    ): string {
1367 28
        if ($str === '' || $to_encoding === '') {
1368 13
            return $str;
1369
        }
1370
1371 28
        if ($to_encoding !== 'UTF-8' && $to_encoding !== 'CP850') {
1372 7
            $to_encoding = self::normalize_encoding($to_encoding, 'UTF-8');
1373
        }
1374
1375 28
        if ($from_encoding && $from_encoding !== 'UTF-8' && $from_encoding !== 'CP850') {
1376 2
            $from_encoding = self::normalize_encoding($from_encoding);
1377
        }
1378
1379
        if (
1380 28
            $to_encoding
1381
            &&
1382 28
            $from_encoding
1383
            &&
1384 28
            $from_encoding === $to_encoding
1385
        ) {
1386
            return $str;
1387
        }
1388
1389 28
        if ($to_encoding === 'JSON') {
1390 1
            $return = self::json_encode($str);
1391 1
            if ($return === false) {
1392
                throw new \InvalidArgumentException('The input string [' . $str . '] can not be used for json_encode().');
1393
            }
1394
1395 1
            return $return;
1396
        }
1397 28
        if ($from_encoding === 'JSON') {
1398 1
            $str = self::json_decode($str);
1399 1
            $from_encoding = '';
1400
        }
1401
1402 28
        if ($to_encoding === 'BASE64') {
1403 2
            return \base64_encode($str);
1404
        }
1405 28
        if ($from_encoding === 'BASE64') {
1406 2
            $str = \base64_decode($str, true);
1407 2
            $from_encoding = '';
1408
        }
1409
1410 28
        if ($to_encoding === 'HTML-ENTITIES') {
1411 2
            return self::html_encode($str, true);
1412
        }
1413 28
        if ($from_encoding === 'HTML-ENTITIES') {
1414 2
            $str = self::html_entity_decode($str, \ENT_COMPAT);
1415 2
            $from_encoding = '';
1416
        }
1417
1418 28
        $from_encoding_auto_detected = false;
1419
        if (
1420 28
            $auto_detect_the_from_encoding
1421
            ||
1422 28
            !$from_encoding
1423
        ) {
1424 28
            $from_encoding_auto_detected = self::str_detect_encoding($str);
1425
        }
1426
1427
        // DEBUG
1428
        //var_dump($to_encoding, $from_encoding, $from_encoding_auto_detected, $str, "\n\n");
1429
1430 28
        if ($from_encoding_auto_detected !== false) {
1431 25
            $from_encoding = $from_encoding_auto_detected;
1432 6
        } elseif ($auto_detect_the_from_encoding) {
1433
            // fallback for the "autodetect"-mode
1434 6
            return self::to_utf8($str);
1435
        }
1436
1437
        if (
1438 25
            !$from_encoding
1439
            ||
1440 25
            $from_encoding === $to_encoding
1441
        ) {
1442 15
            return $str;
1443
        }
1444
1445
        if (
1446 20
            $to_encoding === 'UTF-8'
1447
            &&
1448
            (
1449 18
                $from_encoding === 'WINDOWS-1252'
1450
                ||
1451 20
                $from_encoding === 'ISO-8859-1'
1452
            )
1453
        ) {
1454 13
            return self::to_utf8($str);
1455
        }
1456
1457
        if (
1458 13
            $to_encoding === 'ISO-8859-1'
1459
            &&
1460
            (
1461 6
                $from_encoding === 'WINDOWS-1252'
1462
                ||
1463 13
                $from_encoding === 'UTF-8'
1464
            )
1465
        ) {
1466 6
            return self::to_iso8859($str);
1467
        }
1468
1469
        if (
1470 11
            $to_encoding !== 'UTF-8'
1471
            &&
1472 11
            $to_encoding !== 'ISO-8859-1'
1473
            &&
1474 11
            $to_encoding !== 'WINDOWS-1252'
1475
            &&
1476 11
            self::$SUPPORT['mbstring'] === false
1477
        ) {
1478
            /**
1479
             * @psalm-suppress ImpureFunctionCall - is is only a warning
1480
             */
1481
            \trigger_error('UTF8::encode() without mbstring cannot handle "' . $to_encoding . '" encoding', \E_USER_WARNING);
1482
        }
1483
1484 11
        if (self::$SUPPORT['mbstring'] === true) {
1485 11
            $str_encoded = \mb_convert_encoding(
1486 11
                $str,
1487 11
                $to_encoding,
1488 11
                $from_encoding
1489
            );
1490
1491 11
            if ($str_encoded) {
1492
                \assert(\is_string($str_encoded));
1493
1494 11
                return $str_encoded;
1495
            }
1496
        }
1497
1498
        /** @noinspection PhpUsageOfSilenceOperatorInspection - Detected an incomplete multibyte character in input string */
1499
        $return = @\iconv($from_encoding, $to_encoding, $str);
1500
        if ($return !== false) {
1501
            return $return;
1502
        }
1503
1504
        return $str;
1505
    }
1506
1507
    /**
1508
     * @param string $str
1509
     * @param string $from_charset      [optional] <p>Set the input charset.</p>
1510
     * @param string $to_charset        [optional] <p>Set the output charset.</p>
1511
     * @param string $transfer_encoding [optional] <p>Set the transfer encoding.</p>
1512
     * @param string $linefeed          [optional] <p>Set the used linefeed.</p>
1513
     * @param int    $indent            [optional] <p>Set the max length indent.</p>
1514
     *
1515
     * @psalm-pure
1516
     *
1517
     * @return false|string
1518
     *                      <p>An encoded MIME field on success,
1519
     *                      or false if an error occurs during the encoding.</p>
1520
     */
1521 1
    public static function encode_mimeheader(
1522
        string $str,
1523
        string $from_charset = 'UTF-8',
1524
        string $to_charset = 'UTF-8',
1525
        string $transfer_encoding = 'Q',
1526
        string $linefeed = "\r\n",
1527
        int $indent = 76
1528
    ) {
1529 1
        if ($from_charset !== 'UTF-8' && $from_charset !== 'CP850') {
1530
            $from_charset = self::normalize_encoding($from_charset, 'UTF-8');
1531
        }
1532
1533 1
        if ($to_charset !== 'UTF-8' && $to_charset !== 'CP850') {
1534 1
            $to_charset = self::normalize_encoding($to_charset, 'UTF-8');
1535
        }
1536
1537
        // always fallback via symfony polyfill
1538 1
        return \iconv_mime_encode(
1539 1
            '',
1540 1
            $str,
1541
            [
1542 1
                'scheme'           => $transfer_encoding,
1543 1
                'line-length'      => $indent,
1544 1
                'input-charset'    => $from_charset,
1545 1
                'output-charset'   => $to_charset,
1546 1
                'line-break-chars' => $linefeed,
1547
            ]
1548
        );
1549
    }
1550
1551
    /**
1552
     * Create an extract from a sentence, so if the search-string was found, it try to centered in the output.
1553
     *
1554
     * @param string   $str                       <p>The input string.</p>
1555
     * @param string   $search                    <p>The searched string.</p>
1556
     * @param int|null $length                    [optional] <p>Default: null === text->length / 2</p>
1557
     * @param string   $replacer_for_skipped_text [optional] <p>Default: …</p>
1558
     * @param string   $encoding                  [optional] <p>Set the charset for e.g. "mb_" function</p>
1559
     *
1560
     * @psalm-pure
1561
     *
1562
     * @return string
1563
     */
1564 1
    public static function extract_text(
1565
        string $str,
1566
        string $search = '',
1567
        int $length = null,
1568
        string $replacer_for_skipped_text = '…',
1569
        string $encoding = 'UTF-8'
1570
    ): string {
1571 1
        if ($str === '') {
1572 1
            return '';
1573
        }
1574
1575 1
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
1576
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
1577
        }
1578
1579 1
        $trim_chars = "\t\r\n -_()!~?=+/*\\,.:;\"'[]{}`&";
1580
1581 1
        if ($length === null) {
1582 1
            $length = (int) \round((int) self::strlen($str, $encoding) / 2);
1583
        }
1584
1585 1
        if ($search === '') {
1586 1
            if ($encoding === 'UTF-8') {
1587 1
                if ($length > 0) {
1588 1
                    $string_length = (int) \mb_strlen($str);
1589 1
                    $end = ($length - 1) > $string_length ? $string_length : ($length - 1);
1590
                } else {
1591 1
                    $end = 0;
1592
                }
1593
1594 1
                $pos = (int) \min(
1595 1
                    \mb_strpos($str, ' ', $end),
1596 1
                    \mb_strpos($str, '.', $end)
1597
                );
1598
            } else {
1599
                if ($length > 0) {
1600
                    $string_length = (int) self::strlen($str, $encoding);
1601
                    $end = ($length - 1) > $string_length ? $string_length : ($length - 1);
1602
                } else {
1603
                    $end = 0;
1604
                }
1605
1606
                $pos = (int) \min(
1607
                    self::strpos($str, ' ', $end, $encoding),
1608
                    self::strpos($str, '.', $end, $encoding)
1609
                );
1610
            }
1611
1612 1
            if ($pos) {
1613 1
                if ($encoding === 'UTF-8') {
1614 1
                    $str_sub = \mb_substr($str, 0, $pos);
1615
                } else {
1616
                    $str_sub = self::substr($str, 0, $pos, $encoding);
1617
                }
1618
1619 1
                if ($str_sub === false) {
1620
                    return '';
1621
                }
1622
1623 1
                return \rtrim($str_sub, $trim_chars) . $replacer_for_skipped_text;
1624
            }
1625
1626
            return $str;
1627
        }
1628
1629 1
        if ($encoding === 'UTF-8') {
1630 1
            $word_position = (int) \mb_stripos($str, $search);
1631 1
            $half_side = (int) ($word_position - $length / 2 + (int) \mb_strlen($search) / 2);
1632
        } else {
1633
            $word_position = (int) self::stripos($str, $search, 0, $encoding);
1634
            $half_side = (int) ($word_position - $length / 2 + (int) self::strlen($search, $encoding) / 2);
1635
        }
1636
1637 1
        $pos_start = 0;
1638 1
        if ($half_side > 0) {
1639 1
            if ($encoding === 'UTF-8') {
1640 1
                $half_text = \mb_substr($str, 0, $half_side);
1641
            } else {
1642
                $half_text = self::substr($str, 0, $half_side, $encoding);
1643
            }
1644 1
            if ($half_text !== false) {
1645 1
                if ($encoding === 'UTF-8') {
1646 1
                    $pos_start = (int) \max(
1647 1
                        \mb_strrpos($half_text, ' '),
1648 1
                        \mb_strrpos($half_text, '.')
1649
                    );
1650
                } else {
1651
                    $pos_start = (int) \max(
1652
                        self::strrpos($half_text, ' ', 0, $encoding),
1653
                        self::strrpos($half_text, '.', 0, $encoding)
1654
                    );
1655
                }
1656
            }
1657
        }
1658
1659 1
        if ($word_position && $half_side > 0) {
1660 1
            $offset = $pos_start + $length - 1;
1661 1
            $real_length = (int) self::strlen($str, $encoding);
1662
1663 1
            if ($offset > $real_length) {
1664
                $offset = $real_length;
1665
            }
1666
1667 1
            if ($encoding === 'UTF-8') {
1668 1
                $pos_end = (int) \min(
1669 1
                    \mb_strpos($str, ' ', $offset),
1670 1
                    \mb_strpos($str, '.', $offset)
1671 1
                ) - $pos_start;
1672
            } else {
1673
                $pos_end = (int) \min(
1674
                    self::strpos($str, ' ', $offset, $encoding),
1675
                    self::strpos($str, '.', $offset, $encoding)
1676
                ) - $pos_start;
1677
            }
1678
1679 1
            if (!$pos_end || $pos_end <= 0) {
1680 1
                if ($encoding === 'UTF-8') {
1681 1
                    $str_sub = \mb_substr($str, $pos_start, (int) \mb_strlen($str));
1682
                } else {
1683
                    $str_sub = self::substr($str, $pos_start, (int) self::strlen($str, $encoding), $encoding);
1684
                }
1685 1
                if ($str_sub !== false) {
1686 1
                    $extract = $replacer_for_skipped_text . \ltrim($str_sub, $trim_chars);
1687
                } else {
1688 1
                    $extract = '';
1689
                }
1690
            } else {
1691 1
                if ($encoding === 'UTF-8') {
1692 1
                    $str_sub = \mb_substr($str, $pos_start, $pos_end);
1693
                } else {
1694
                    $str_sub = self::substr($str, $pos_start, $pos_end, $encoding);
1695
                }
1696 1
                if ($str_sub !== false) {
1697 1
                    $extract = $replacer_for_skipped_text . \trim($str_sub, $trim_chars) . $replacer_for_skipped_text;
1698
                } else {
1699 1
                    $extract = '';
1700
                }
1701
            }
1702
        } else {
1703 1
            $offset = $length - 1;
1704 1
            $true_length = (int) self::strlen($str, $encoding);
1705
1706 1
            if ($offset > $true_length) {
1707
                $offset = $true_length;
1708
            }
1709
1710 1
            if ($encoding === 'UTF-8') {
1711 1
                $pos_end = (int) \min(
1712 1
                    \mb_strpos($str, ' ', $offset),
1713 1
                    \mb_strpos($str, '.', $offset)
1714
                );
1715
            } else {
1716
                $pos_end = (int) \min(
1717
                    self::strpos($str, ' ', $offset, $encoding),
1718
                    self::strpos($str, '.', $offset, $encoding)
1719
                );
1720
            }
1721
1722 1
            if ($pos_end) {
1723 1
                if ($encoding === 'UTF-8') {
1724 1
                    $str_sub = \mb_substr($str, 0, $pos_end);
1725
                } else {
1726
                    $str_sub = self::substr($str, 0, $pos_end, $encoding);
1727
                }
1728 1
                if ($str_sub !== false) {
1729 1
                    $extract = \rtrim($str_sub, $trim_chars) . $replacer_for_skipped_text;
1730
                } else {
1731 1
                    $extract = '';
1732
                }
1733
            } else {
1734 1
                $extract = $str;
1735
            }
1736
        }
1737
1738 1
        return $extract;
1739
    }
1740
1741
    /**
1742
     * Reads entire file into a string.
1743
     *
1744
     * EXAMPLE: <code>UTF8::file_get_contents('utf16le.txt'); // ...</code>
1745
     *
1746
     * WARNING: Do not use UTF-8 Option ($convert_to_utf8) for binary files (e.g.: images) !!!
1747
     *
1748
     * @see http://php.net/manual/en/function.file-get-contents.php
1749
     *
1750
     * @param string        $filename         <p>
1751
     *                                        Name of the file to read.
1752
     *                                        </p>
1753
     * @param bool          $use_include_path [optional] <p>
1754
     *                                        Prior to PHP 5, this parameter is called
1755
     *                                        use_include_path and is a bool.
1756
     *                                        As of PHP 5 the FILE_USE_INCLUDE_PATH can be used
1757
     *                                        to trigger include path
1758
     *                                        search.
1759
     *                                        </p>
1760
     * @param resource|null $context          [optional] <p>
1761
     *                                        A valid context resource created with
1762
     *                                        stream_context_create. If you don't need to use a
1763
     *                                        custom context, you can skip this parameter by &null;.
1764
     *                                        </p>
1765
     * @param int|null      $offset           [optional] <p>
1766
     *                                        The offset where the reading starts.
1767
     *                                        </p>
1768
     * @param int|null      $max_length       [optional] <p>
1769
     *                                        Maximum length of data read. The default is to read until end
1770
     *                                        of file is reached.
1771
     *                                        </p>
1772
     * @param int           $timeout          <p>The time in seconds for the timeout.</p>
1773
     * @param bool          $convert_to_utf8  <strong>WARNING!!!</strong> <p>Maybe you can't use this option for
1774
     *                                        some files, because they used non default utf-8 chars. Binary files
1775
     *                                        like images or pdf will not be converted.</p>
1776
     * @param string        $from_encoding    [optional] <p>e.g. 'UTF-16', 'UTF-8', 'ISO-8859-1', etc.<br>
1777
     *                                        A empty string will trigger the autodetect anyway.</p>
1778
     *
1779
     * @psalm-pure
1780
     *
1781
     * @return false|string
1782
     *                      <p>The function returns the read data as string or <b>false</b> on failure.</p>
1783
     */
1784 12
    public static function file_get_contents(
1785
        string $filename,
1786
        bool $use_include_path = false,
1787
        $context = null,
1788
        int $offset = null,
1789
        int $max_length = null,
1790
        int $timeout = 10,
1791
        bool $convert_to_utf8 = true,
1792
        string $from_encoding = ''
1793
    ) {
1794
        // init
1795
        /** @noinspection CallableParameterUseCaseInTypeContextInspection - is ok here */
1796 12
        $filename = Bootup::filter_sanitize_string_polyfill($filename);
1797 12
        if ($filename === false) {
1798
            return false;
1799
        }
1800
1801 12
        if ($timeout && $context === null) {
1802 9
            $context = \stream_context_create(
1803
                [
1804
                    'http' => [
1805 9
                        'timeout' => $timeout,
1806
                    ],
1807
                ]
1808
            );
1809
        }
1810
1811 12
        if ($offset === null) {
1812 12
            $offset = 0;
1813
        }
1814
1815 12
        if (\is_int($max_length)) {
1816
            if ($max_length < 0) {
1817 2
                $max_length = 0;
1818
            }
1819
1820
            $data = \file_get_contents($filename, $use_include_path, $context, $offset, $max_length);
1821 2
        } else {
1822
            $data = \file_get_contents($filename, $use_include_path, $context, $offset);
1823 12
        }
1824
1825
        // return false on error
1826
        if ($data === false) {
1827 12
            return false;
1828
        }
1829
1830
        if ($convert_to_utf8) {
1831 12
            if (
1832
                !self::is_binary($data, true)
1833 12
                ||
1834
                self::is_utf16($data, false) !== false
1835 9
                ||
1836
                self::is_utf32($data, false) !== false
1837 12
            ) {
1838
                $data = self::encode('UTF-8', $data, false, $from_encoding);
1839 9
                $data = self::cleanup($data);
1840 9
            }
1841
        }
1842
1843
        return $data;
1844 12
    }
1845
1846
    /**
1847
     * Checks if a file starts with BOM (Byte Order Mark) character.
1848
     *
1849
     * EXAMPLE: <code>UTF8::file_has_bom('utf8_with_bom.txt'); // true</code>
1850
     *
1851
     * @param string $file_path <p>Path to a valid file.</p>
1852
     *
1853
     * @throws \RuntimeException if file_get_contents() returned false
1854
     *
1855
     * @return bool
1856
     *              <p><strong>true</strong> if the file has BOM at the start, <strong>false</strong> otherwise</p>
1857
     *
1858
     * @psalm-pure
1859
     */
1860
    public static function file_has_bom(string $file_path): bool
1861 2
    {
1862
        $file_content = \file_get_contents($file_path);
1863 2
        if ($file_content === false) {
1864 2
            throw new \RuntimeException('file_get_contents() returned false for:' . $file_path);
1865
        }
1866
1867
        return self::string_has_bom($file_content);
1868 2
    }
1869
1870
    /**
1871
     * Normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1872
     *
1873
     * EXAMPLE: <code>UTF8::filter(array("\xE9", 'à', 'a')); // array('é', 'à', 'a')</code>
1874
     *
1875
     * @param array|object|string $var
1876
     * @param int                 $normalization_form
1877
     * @param string              $leading_combining
1878
     *
1879
     * @psalm-pure
1880
     *
1881
     * @return mixed
1882
     *
1883
     * @template TFilter
1884
     * @phpstan-param TFilter $var
1885
     * @phpstan-return TFilter
1886
     */
1887
    public static function filter(
1888 64
        $var,
1889
        int $normalization_form = \Normalizer::NFC,
1890
        string $leading_combining = '◌'
1891
    ) {
1892
        switch (\gettype($var)) {
1893 64
            case 'object':
1894 64
            case 'array':
1895 64
                foreach ($var as &$v) {
1896 6
                    $v = self::filter($v, $normalization_form, $leading_combining);
1897 6
                }
1898
                unset($v);
1899 6
1900
                break;
1901 6
            case 'string':
1902 64
1903
                if (\strpos($var, "\r") !== false) {
1904 62
                    $var = self::normalize_line_ending($var);
1905 2
                }
1906
1907
                if (!ASCII::is_ascii($var)) {
1908 62
                    if (\Normalizer::isNormalized($var, $normalization_form)) {
1909 32
                        $n = '-';
1910 26
                    } else {
1911
                        $n = \Normalizer::normalize($var, $normalization_form);
1912 12
1913
                        if ($n && isset($n[0])) {
1914 12
                            $var = $n;
1915 6
                        } else {
1916
                            $var = self::encode('UTF-8', $var);
1917 8
                        }
1918
                    }
1919
1920
                    \assert(\is_string($var));
1921
                    if (
1922
                        $n
1923 32
                        &&
1924
                        $var[0] >= "\x80"
1925 32
                        &&
1926
                        isset($n[0], $leading_combining[0])
1927 32
                        &&
1928
                        \preg_match('/^\\p{Mn}/u', $var)
1929 32
                    ) {
1930
                        // Prevent leading combining chars
1931
                        // for NFC-safe concatenations.
1932
                        $var = $leading_combining . $var;
1933 2
                    }
1934
                }
1935
1936
                break;
1937 62
            default:
1938
                // nothing
1939
        }
1940
1941
        /** @noinspection PhpSillyAssignmentInspection */
1942
        /** @phpstan-var TFilter $var */
1943
        $var = $var;
1944 64
1945
        return $var;
1946 64
    }
1947
1948
    /**
1949
     * "filter_input()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1950
     *
1951
     * Gets a specific external variable by name and optionally filters it.
1952
     *
1953
     * EXAMPLE: <code>
1954
     * // _GET['foo'] = 'bar';
1955
     * UTF8::filter_input(INPUT_GET, 'foo', FILTER_UNSAFE_RAW)); // 'bar'
1956
     * </code>
1957
     *
1958
     * @see http://php.net/manual/en/function.filter-input.php
1959
     *
1960
     * @param int            $type          <p>
1961
     *                                      One of <b>INPUT_GET</b>, <b>INPUT_POST</b>,
1962
     *                                      <b>INPUT_COOKIE</b>, <b>INPUT_SERVER</b>, or
1963
     *                                      <b>INPUT_ENV</b>.
1964
     *                                      </p>
1965
     * @param string         $variable_name <p>
1966
     *                                      Name of a variable to get.
1967
     *                                      </p>
1968
     * @param int            $filter        [optional] <p>
1969
     *                                      The ID of the filter to apply. The
1970
     *                                      manual page lists the available filters.
1971
     *                                      </p>
1972
     * @param int|int[]|null $options       [optional] <p>
1973
     *                                      Associative array of options or bitwise disjunction of flags. If filter
1974
     *                                      accepts options, flags can be provided in "flags" field of array.
1975
     *                                      </p>
1976
     *
1977
     * @psalm-pure
1978
     *
1979
     * @return mixed
1980
     *               <p>
1981
     *               Value of the requested variable on success, <b>FALSE</b> if the filter fails, or <b>NULL</b> if the
1982
     *               <i>variable_name</i> variable is not set. If the flag <b>FILTER_NULL_ON_FAILURE</b> is used, it
1983
     *               returns <b>FALSE</b> if the variable is not set and <b>NULL</b> if the filter fails.
1984
     *               </p>
1985
     */
1986
    public static function filter_input(
1987 1
        int $type,
1988
        string $variable_name,
1989
        int $filter = \FILTER_DEFAULT,
1990
        $options = null
1991
    ) {
1992
        /**
1993
         * @psalm-suppress ImpureFunctionCall - we use func_num_args only for args count matching here
1994
         */
1995
        if ($options === null || \func_num_args() < 4) {
1996 1
            $var = \filter_input($type, $variable_name, $filter);
1997 1
        } else {
1998
            $var = \filter_input($type, $variable_name, $filter, $options);
1999
        }
2000
2001
        return self::filter($var);
2002 1
    }
2003
2004
    /**
2005
     * "filter_input_array()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
2006
     *
2007
     * Gets external variables and optionally filters them.
2008
     *
2009
     * EXAMPLE: <code>
2010
     * // _GET['foo'] = 'bar';
2011
     * UTF8::filter_input_array(INPUT_GET, array('foo' => 'FILTER_UNSAFE_RAW')); // array('bar')
2012
     * </code>
2013
     *
2014
     * @see http://php.net/manual/en/function.filter-input-array.php
2015
     *
2016
     * @param int        $type       <p>
2017
     *                               One of <b>INPUT_GET</b>, <b>INPUT_POST</b>,
2018
     *                               <b>INPUT_COOKIE</b>, <b>INPUT_SERVER</b>, or
2019
     *                               <b>INPUT_ENV</b>.
2020
     *                               </p>
2021
     * @param array|null $definition [optional] <p>
2022
     *                               An array defining the arguments. A valid key is a string
2023
     *                               containing a variable name and a valid value is either a filter type, or an array
2024
     *                               optionally specifying the filter, flags and options. If the value is an
2025
     *                               array, valid keys are filter which specifies the
2026
     *                               filter type,
2027
     *                               flags which specifies any flags that apply to the
2028
     *                               filter, and options which specifies any options that
2029
     *                               apply to the filter. See the example below for a better understanding.
2030
     *                               </p>
2031
     *                               <p>
2032
     *                               This parameter can be also an integer holding a filter constant. Then all values in the
2033
     *                               input array are filtered by this filter.
2034
     *                               </p>
2035
     * @param bool       $add_empty  [optional] <p>
2036
     *                               Add missing keys as <b>NULL</b> to the return value.
2037
     *                               </p>
2038
     *
2039
     * @psalm-pure
2040
     *
2041
     * @return mixed
2042
     *               <p>
2043
     *               An array containing the values of the requested variables on success, or <b>FALSE</b> on failure.
2044
     *               An array value will be <b>FALSE</b> if the filter fails, or <b>NULL</b> if the variable is not
2045
     *               set. Or if the flag <b>FILTER_NULL_ON_FAILURE</b> is used, it returns <b>FALSE</b> if the variable
2046
     *               is not set and <b>NULL</b> if the filter fails.
2047
     *               </p>
2048
     */
2049
    public static function filter_input_array(
2050 1
        int $type,
2051
        $definition = null,
2052
        bool $add_empty = true
2053
    ) {
2054
        /**
2055
         * @psalm-suppress ImpureFunctionCall - we use func_num_args only for args count matching here
2056
         */
2057
        if ($definition === null || \func_num_args() < 2) {
2058 1
            $a = \filter_input_array($type);
2059
        } else {
2060
            $a = \filter_input_array($type, $definition, $add_empty);
2061 1
        }
2062
2063
        return self::filter($a);
2064 1
    }
2065
2066
    /**
2067
     * "filter_var()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
2068
     *
2069
     * Filters a variable with a specified filter.
2070
     *
2071
     * EXAMPLE: <code>UTF8::filter_var('-ABC-中文空白-', FILTER_VALIDATE_URL); // false</code>
2072
     *
2073
     * @see http://php.net/manual/en/function.filter-var.php
2074
     *
2075
     * @param float|int|string|null $variable <p>
2076
     *                                        Value to filter.
2077
     *                                        </p>
2078
     * @param int                   $filter   [optional] <p>
2079
     *                                        The ID of the filter to apply. The
2080
     *                                        manual page lists the available filters.
2081
     *                                        </p>
2082
     * @param int|int[]|null        $options  [optional] <p>
2083
     *                                        Associative array of options or bitwise disjunction of flags. If filter
2084
     *                                        accepts options, flags can be provided in "flags" field of array. For
2085
     *                                        the "callback" filter, callable type should be passed. The
2086
     *                                        callback must accept one argument, the value to be filtered, and return
2087
     *                                        the value after filtering/sanitizing it.
2088
     *                                        </p>
2089
     *                                        <p>
2090
     *                                        <code>
2091
     *                                        // for filters that accept options, use this format
2092
     *                                        $options = array(
2093
     *                                        'options' => array(
2094
     *                                        'default' => 3, // value to return if the filter fails
2095
     *                                        // other options here
2096
     *                                        'min_range' => 0
2097
     *                                        ),
2098
     *                                        'flags' => FILTER_FLAG_ALLOW_OCTAL,
2099
     *                                        );
2100
     *                                        $var = filter_var('0755', FILTER_VALIDATE_INT, $options);
2101
     *                                        // for filter that only accept flags, you can pass them directly
2102
     *                                        $var = filter_var('oops', FILTER_VALIDATE_BOOLEAN, FILTER_NULL_ON_FAILURE);
2103
     *                                        // for filter that only accept flags, you can also pass as an array
2104
     *                                        $var = filter_var('oops', FILTER_VALIDATE_BOOLEAN,
2105
     *                                        array('flags' => FILTER_NULL_ON_FAILURE));
2106
     *                                        // callback validate filter
2107
     *                                        function foo($value)
2108
     *                                        {
2109
     *                                        // Expected format: Surname, GivenNames
2110
     *                                        if (strpos($value, ", ") === false) return false;
2111
     *                                        list($surname, $givennames) = explode(", ", $value, 2);
2112
     *                                        $empty = (empty($surname) || empty($givennames));
2113
     *                                        $notstrings = (!is_string($surname) || !is_string($givennames));
2114
     *                                        if ($empty || $notstrings) {
2115
     *                                        return false;
2116
     *                                        } else {
2117
     *                                        return $value;
2118
     *                                        }
2119
     *                                        }
2120
     *                                        $var = filter_var('Doe, Jane Sue', FILTER_CALLBACK, array('options' => 'foo'));
2121
     *                                        </code>
2122
     *                                        </p>
2123
     *
2124
     * @psalm-pure
2125
     *
2126
     * @return mixed
2127
     *               <p>The filtered data, or <b>FALSE</b> if the filter fails.</p>
2128
     */
2129
    public static function filter_var(
2130 2
        $variable,
2131
        int $filter = \FILTER_DEFAULT,
2132
        $options = null
2133
    ) {
2134
        /**
2135
         * @psalm-suppress ImpureFunctionCall - we use func_num_args only for args count matching here
2136
         */
2137
        if (\func_num_args() < 3) {
2138 2
            $variable = \filter_var($variable, $filter);
2139 2
        } else {
2140
            $variable = \filter_var($variable, $filter, $options);
0 ignored issues
show
Bug introduced by
It seems like $options can also be of type null; however, parameter $options of filter_var() does only seem to accept array|integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

2140
            $variable = \filter_var($variable, $filter, /** @scrutinizer ignore-type */ $options);
Loading history...
2141 2
        }
2142
2143
        return self::filter($variable);
2144 2
    }
2145
2146
    /**
2147
     * "filter_var_array()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
2148
     *
2149
     * Gets multiple variables and optionally filters them.
2150
     *
2151
     * EXAMPLE: <code>
2152
     * $filters = [
2153
     *     'name'  => ['filter'  => FILTER_CALLBACK, 'options' => [UTF8::class, 'ucwords']],
2154
     *     'age'   => ['filter'  => FILTER_VALIDATE_INT, 'options' => ['min_range' => 1, 'max_range' => 120]],
2155
     *     'email' => FILTER_VALIDATE_EMAIL,
2156
     * ];
2157
     *
2158
     * $data = [
2159
     *     'name' => 'κόσμε',
2160
     *     'age' => '18',
2161
     *     'email' => '[email protected]'
2162
     * ];
2163
     *
2164
     * UTF8::filter_var_array($data, $filters, true); // ['name' => 'Κόσμε', 'age' => 18, 'email' => '[email protected]']
2165
     * </code>
2166
     *
2167
     * @see http://php.net/manual/en/function.filter-var-array.php
2168
     *
2169
     * @param array<mixed>   $data       <p>
2170
     *                                   An array with string keys containing the data to filter.
2171
     *                                   </p>
2172
     * @param array|int|null $definition [optional] <p>
2173
     *                                   An array defining the arguments. A valid key is a string
2174
     *                                   containing a variable name and a valid value is either a
2175
     *                                   filter type, or an
2176
     *                                   array optionally specifying the filter, flags and options.
2177
     *                                   If the value is an array, valid keys are filter
2178
     *                                   which specifies the filter type,
2179
     *                                   flags which specifies any flags that apply to the
2180
     *                                   filter, and options which specifies any options that
2181
     *                                   apply to the filter. See the example below for a better understanding.
2182
     *                                   </p>
2183
     *                                   <p>
2184
     *                                   This parameter can be also an integer holding a filter constant. Then all values
2185
     *                                   in the input array are filtered by this filter.
2186
     *                                   </p>
2187
     * @param bool           $add_empty  [optional] <p>
2188
     *                                   Add missing keys as <b>NULL</b> to the return value.
2189
     *                                   </p>
2190
     *
2191
     * @psalm-pure
2192
     *
2193
     * @return mixed
2194
     *               <p>
2195
     *               An array containing the values of the requested variables on success, or <b>FALSE</b> on failure.
2196
     *               An array value will be <b>FALSE</b> if the filter fails, or <b>NULL</b> if the variable is not
2197
     *               set.
2198
     *               </p>
2199
     */
2200
    public static function filter_var_array(
2201 2
        array $data,
2202
        $definition = null,
2203
        bool $add_empty = true
2204
    ) {
2205
        /**
2206
         * @psalm-suppress ImpureFunctionCall - we use func_num_args only for args count matching here
2207
         */
2208
        if (\func_num_args() < 2) {
2209 2
            $a = \filter_var_array($data);
2210 2
        } else {
2211
            $a = \filter_var_array($data, $definition, $add_empty);
0 ignored issues
show
Bug introduced by
It seems like $definition can also be of type null; however, parameter $options of filter_var_array() does only seem to accept array|integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

2211
            $a = \filter_var_array($data, /** @scrutinizer ignore-type */ $definition, $add_empty);
Loading history...
2212 2
        }
2213
2214
        return self::filter($a);
2215 2
    }
2216
2217
    /**
2218
     * Checks whether finfo is available on the server.
2219
     *
2220
     * @psalm-pure
2221
     *
2222
     * @return bool
2223
     *              <p><strong>true</strong> if available, <strong>false</strong> otherwise</p>
2224
     *
2225
     * @internal <p>Please do not use it anymore, we will make is private in next major version.</p>
2226
     */
2227
    public static function finfo_loaded(): bool
2228
    {
2229
        return \class_exists('finfo');
2230
    }
2231
2232
    /**
2233
     * Returns the first $n characters of the string.
2234
     *
2235
     * @param string $str      <p>The input string.</p>
2236
     * @param int    $n        <p>Number of characters to retrieve from the start.</p>
2237
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
2238
     *
2239
     * @psalm-pure
2240
     *
2241
     * @return string
2242
     */
2243
    public static function first_char(
2244 13
        string $str,
2245
        int $n = 1,
2246
        string $encoding = 'UTF-8'
2247
    ): string {
2248
        if ($str === '' || $n <= 0) {
2249 13
            return '';
2250 5
        }
2251
2252
        if ($encoding === 'UTF-8') {
2253 8
            return (string) \mb_substr($str, 0, $n);
2254 4
        }
2255
2256
        return (string) self::substr($str, 0, $n, $encoding);
2257 4
    }
2258
2259
    /**
2260
     * Check if the number of Unicode characters isn't greater than the specified integer.
2261
     *
2262
     * EXAMPLE: <code>UTF8::fits_inside('κόσμε', 6); // false</code>
2263
     *
2264
     * @param string $str      the original string to be checked
2265
     * @param int    $box_size the size in number of chars to be checked against string
2266
     *
2267
     * @psalm-pure
2268
     *
2269
     * @return bool
2270
     *              <p><strong>TRUE</strong> if string is less than or equal to $box_size, <strong>FALSE</strong> otherwise.</p>
2271
     */
2272
    public static function fits_inside(string $str, int $box_size): bool
2273 2
    {
2274
        return (int) self::strlen($str) <= $box_size;
2275 2
    }
2276
2277
    /**
2278
     * Try to fix simple broken UTF-8 strings.
2279
     *
2280
     * INFO: Take a look at "UTF8::fix_utf8()" if you need a more advanced fix for broken UTF-8 strings.
2281
     *
2282
     * EXAMPLE: <code>UTF8::fix_simple_utf8('Düsseldorf'); // 'Düsseldorf'</code>
2283
     *
2284
     * If you received an UTF-8 string that was converted from Windows-1252 as it was ISO-8859-1
2285
     * (ignoring Windows-1252 chars from 80 to 9F) use this function to fix it.
2286
     * See: http://en.wikipedia.org/wiki/Windows-1252
2287
     *
2288
     * @param string $str <p>The input string</p>
2289
     *
2290
     * @psalm-pure
2291
     *
2292
     * @return string
2293
     */
2294
    public static function fix_simple_utf8(string $str): string
2295 46
    {
2296
        if ($str === '') {
2297 46
            return '';
2298 4
        }
2299
2300
        /**
2301
         * @psalm-suppress ImpureStaticVariable
2302
         *
2303
         * @var array<mixed>|null
2304
         */
2305
        static $BROKEN_UTF8_TO_UTF8_KEYS_CACHE = null;
2306 46
2307
        /**
2308
         * @psalm-suppress ImpureStaticVariable
2309
         *
2310
         * @var array<mixed>|null
2311
         */
2312
        static $BROKEN_UTF8_TO_UTF8_VALUES_CACHE = null;
2313 46
2314
        if ($BROKEN_UTF8_TO_UTF8_KEYS_CACHE === null) {
2315 46
            if (self::$BROKEN_UTF8_FIX === null) {
2316 1
                self::$BROKEN_UTF8_FIX = self::getData('utf8_fix');
2317 1
            }
2318
2319
            $BROKEN_UTF8_TO_UTF8_KEYS_CACHE = \array_keys(self::$BROKEN_UTF8_FIX ?: []);
2320 1
            $BROKEN_UTF8_TO_UTF8_VALUES_CACHE = self::$BROKEN_UTF8_FIX;
2321 1
        }
2322
2323
        \assert(\is_array($BROKEN_UTF8_TO_UTF8_VALUES_CACHE));
2324
2325
        return \str_replace($BROKEN_UTF8_TO_UTF8_KEYS_CACHE, $BROKEN_UTF8_TO_UTF8_VALUES_CACHE, $str);
2326 46
    }
2327
2328
    /**
2329
     * Fix a double (or multiple) encoded UTF8 string.
2330
     *
2331
     * EXAMPLE: <code>UTF8::fix_utf8('Fédération'); // 'Fédération'</code>
2332
     *
2333
     * @param string|string[] $str you can use a string or an array of strings
2334
     *
2335
     * @psalm-pure
2336
     *
2337
     * @return string|string[]
2338
     *                         <p>Will return the fixed input-"array" or
2339
     *                         the fixed input-"string".</p>
2340
     *
2341
     * @template TFixUtf8
2342
     * @phpstan-param TFixUtf8 $str
2343
     * @phpstan-return TFixUtf8
2344
     */
2345
    public static function fix_utf8($str)
2346 2
    {
2347
        if (\is_array($str)) {
2348 2
            foreach ($str as &$v) {
2349 2
                $v = self::fix_utf8($v);
2350 2
            }
2351
            unset($v);
2352 2
2353
            /**
2354
             * @psalm-suppress InvalidReturnStatement
2355
             */
2356
            return $str;
2357 2
        }
2358
2359
        $str = (string) $str;
2360 2
        $last = '';
2361 2
        while ($last !== $str) {
2362 2
            $last = $str;
2363 2
            /**
2364
             * @psalm-suppress PossiblyInvalidArgument
2365
             */
2366
            $str = self::to_utf8(
2367 2
                self::utf8_decode($str, true)
2368 2
            );
2369
        }
2370
2371
        /**
2372
         * @psalm-suppress InvalidReturnStatement
2373
         */
2374
        return $str;
2375 2
    }
2376
2377
    /**
2378
     * Get character of a specific character.
2379
     *
2380
     * EXAMPLE: <code>UTF8::getCharDirection('ا'); // 'RTL'</code>
2381
     *
2382
     * @param string $char
2383
     *
2384
     * @psalm-pure
2385
     *
2386
     * @return string
2387
     *                <p>'RTL' or 'LTR'.</p>
2388
     */
2389
    public static function getCharDirection(string $char): string
2390 2
    {
2391
        if (self::$SUPPORT['intlChar'] === true) {
2392 2
            $tmp_return = \IntlChar::charDirection($char);
2393 2
2394
            // from "IntlChar"-Class
2395
            $char_direction = [
2396 2
                'RTL' => [1, 13, 14, 15, 21],
2397
                'LTR' => [0, 11, 12, 20],
2398
            ];
2399
2400
            if (\in_array($tmp_return, $char_direction['LTR'], true)) {
2401 2
                return 'LTR';
2402
            }
2403
2404
            if (\in_array($tmp_return, $char_direction['RTL'], true)) {
2405 2
                return 'RTL';
2406 2
            }
2407
        }
2408
2409
        $c = static::chr_to_decimal($char);
2410 2
2411
        if (!($c >= 0x5be && $c <= 0x10b7f)) {
2412 2
            return 'LTR';
2413 2
        }
2414
2415
        if ($c <= 0x85e) {
2416 2
            if ($c === 0x5be ||
2417 2
                $c === 0x5c0 ||
2418 2
                $c === 0x5c3 ||
2419 2
                $c === 0x5c6 ||
2420 2
                ($c >= 0x5d0 && $c <= 0x5ea) ||
2421 2
                ($c >= 0x5f0 && $c <= 0x5f4) ||
2422 2
                $c === 0x608 ||
2423 2
                $c === 0x60b ||
2424 2
                $c === 0x60d ||
2425 2
                $c === 0x61b ||
2426 2
                ($c >= 0x61e && $c <= 0x64a) ||
2427 2
                ($c >= 0x66d && $c <= 0x66f) ||
2428
                ($c >= 0x671 && $c <= 0x6d5) ||
2429
                ($c >= 0x6e5 && $c <= 0x6e6) ||
2430
                ($c >= 0x6ee && $c <= 0x6ef) ||
2431
                ($c >= 0x6fa && $c <= 0x70d) ||
2432
                $c === 0x710 ||
2433
                ($c >= 0x712 && $c <= 0x72f) ||
2434
                ($c >= 0x74d && $c <= 0x7a5) ||
2435
                $c === 0x7b1 ||
2436
                ($c >= 0x7c0 && $c <= 0x7ea) ||
2437
                ($c >= 0x7f4 && $c <= 0x7f5) ||
2438
                $c === 0x7fa ||
2439
                ($c >= 0x800 && $c <= 0x815) ||
2440
                $c === 0x81a ||
2441
                $c === 0x824 ||
2442
                $c === 0x828 ||
2443
                ($c >= 0x830 && $c <= 0x83e) ||
2444
                ($c >= 0x840 && $c <= 0x858) ||
2445
                $c === 0x85e
2446 2
            ) {
2447
                return 'RTL';
2448 2
            }
2449
        } elseif ($c === 0x200f) {
2450 2
            return 'RTL';
2451
        } elseif ($c >= 0xfb1d) {
2452 2
            if ($c === 0xfb1d ||
2453 2
                ($c >= 0xfb1f && $c <= 0xfb28) ||
2454 2
                ($c >= 0xfb2a && $c <= 0xfb36) ||
2455 2
                ($c >= 0xfb38 && $c <= 0xfb3c) ||
2456 2
                $c === 0xfb3e ||
2457 2
                ($c >= 0xfb40 && $c <= 0xfb41) ||
2458 2
                ($c >= 0xfb43 && $c <= 0xfb44) ||
2459 2
                ($c >= 0xfb46 && $c <= 0xfbc1) ||
2460 2
                ($c >= 0xfbd3 && $c <= 0xfd3d) ||
2461 2
                ($c >= 0xfd50 && $c <= 0xfd8f) ||
2462 2
                ($c >= 0xfd92 && $c <= 0xfdc7) ||
2463 2
                ($c >= 0xfdf0 && $c <= 0xfdfc) ||
2464 2
                ($c >= 0xfe70 && $c <= 0xfe74) ||
2465 2
                ($c >= 0xfe76 && $c <= 0xfefc) ||
2466 2
                ($c >= 0x10800 && $c <= 0x10805) ||
2467 2
                $c === 0x10808 ||
2468 2
                ($c >= 0x1080a && $c <= 0x10835) ||
2469 2
                ($c >= 0x10837 && $c <= 0x10838) ||
2470 2
                $c === 0x1083c ||
2471 2
                ($c >= 0x1083f && $c <= 0x10855) ||
2472 2
                ($c >= 0x10857 && $c <= 0x1085f) ||
2473 2
                ($c >= 0x10900 && $c <= 0x1091b) ||
2474 2
                ($c >= 0x10920 && $c <= 0x10939) ||
2475 2
                $c === 0x1093f ||
2476 2
                $c === 0x10a00 ||
2477 2
                ($c >= 0x10a10 && $c <= 0x10a13) ||
2478 2
                ($c >= 0x10a15 && $c <= 0x10a17) ||
2479 2
                ($c >= 0x10a19 && $c <= 0x10a33) ||
2480 2
                ($c >= 0x10a40 && $c <= 0x10a47) ||
2481 2
                ($c >= 0x10a50 && $c <= 0x10a58) ||
2482 2
                ($c >= 0x10a60 && $c <= 0x10a7f) ||
2483 2
                ($c >= 0x10b00 && $c <= 0x10b35) ||
2484 2
                ($c >= 0x10b40 && $c <= 0x10b55) ||
2485 2
                ($c >= 0x10b58 && $c <= 0x10b72) ||
2486 2
                ($c >= 0x10b78)
2487 2
            ) {
2488
                return 'RTL';
2489 2
            }
2490
        }
2491
2492
        return 'LTR';
2493 2
    }
2494
2495
    /**
2496
     * Check for php-support.
2497
     *
2498
     * @param string|null $key
2499
     *
2500
     * @psalm-pure
2501
     *
2502
     * @return mixed
2503
     *               Return the full support-"array", if $key === null<br>
2504
     *               return bool-value, if $key is used and available<br>
2505
     *               otherwise return <strong>null</strong>
2506
     */
2507
    public static function getSupportInfo(string $key = null)
2508 27
    {
2509
        if ($key === null) {
2510 27
            return self::$SUPPORT;
2511 4
        }
2512
2513
        if (self::$INTL_TRANSLITERATOR_LIST === null) {
2514 25
            self::$INTL_TRANSLITERATOR_LIST = self::getData('transliterator_list');
2515 1
        }
2516
        // compatibility fix for old versions
2517
        self::$SUPPORT['intl__transliterator_list_ids'] = self::$INTL_TRANSLITERATOR_LIST;
2518 25
2519
        return self::$SUPPORT[$key] ?? null;
2520 25
    }
2521
2522
    /**
2523
     * Warning: this method only works for some file-types (png, jpg)
2524
     *          if you need more supported types, please use e.g. "finfo"
2525
     *
2526
     * @param string $str
2527
     * @param array  $fallback <p>with this keys: 'ext', 'mime', 'type'
2528
     *
2529
     * @psalm-pure
2530
     *
2531
     * @return null[]|string[]
2532
     *                         <p>with this keys: 'ext', 'mime', 'type'</p>
2533
     *
2534
     * @phpstan-param array{ext: null|string, mime: null|string, type: null|string} $fallback
2535
     */
2536
    public static function get_file_type(
2537 39
        string $str,
2538
        array $fallback = [
2539
            'ext'  => null,
2540
            'mime' => 'application/octet-stream',
2541
            'type' => null,
2542
        ]
2543
    ): array {
2544
        if ($str === '') {
2545 39
            return $fallback;
2546
        }
2547
2548
        /** @var false|string $str_info - needed for PhpStan (stubs error) */
2549
        $str_info = \substr($str, 0, 2);
2550 39
        if ($str_info === false || \strlen($str_info) !== 2) {
2551 39
            return $fallback;
2552 10
        }
2553
2554
        // DEBUG
2555
        //var_dump($str_info);
2556
2557
        $str_info = \unpack('C2chars', $str_info);
2558 36
2559
        if ($str_info === false) {
2560 36
            return $fallback;
2561
        }
2562
        $type_code = (int) ($str_info['chars1'] . $str_info['chars2']);
2563 36
2564
        // DEBUG
2565
        //var_dump($type_code);
2566
2567
        //
2568
        // info: https://en.wikipedia.org/wiki/Magic_number_%28programming%29#Format_indicator
2569
        //
2570
        switch ($type_code) {
2571
            // WARNING: do not add too simple comparisons, because of false-positive results:
2572
            //
2573
            // 3780 => 'pdf', 7790 => 'exe', 7784 => 'midi', 8075 => 'zip',
2574
            // 8297 => 'rar', 7173 => 'gif', 7373 => 'tiff' 6677 => 'bmp', ...
2575
            //
2576
            case 255216:
2577 36
                $ext = 'jpg';
2578
                $mime = 'image/jpeg';
2579
                $type = 'binary';
2580
2581
                break;
2582
            case 13780:
2583 36
                $ext = 'png';
2584 7
                $mime = 'image/png';
2585 7
                $type = 'binary';
2586 7
2587
                break;
2588 7
            default:
2589
                return $fallback;
2590 35
        }
2591
2592
        return [
2593
            'ext'  => $ext,
2594 7
            'mime' => $mime,
2595 7
            'type' => $type,
2596 7
        ];
2597
    }
2598
2599
    /**
2600
     * @param int    $length         <p>Length of the random string.</p>
2601
     * @param string $possible_chars [optional] <p>Characters string for the random selection.</p>
2602
     * @param string $encoding       [optional] <p>Set the charset for e.g. "mb_" function</p>
2603
     *
2604
     * @return string
2605
     */
2606
    public static function get_random_string(
2607 1
        int $length,
2608
        string $possible_chars = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789',
2609
        string $encoding = 'UTF-8'
2610
    ): string {
2611
        // init
2612
        $i = 0;
2613 1
        $str = '';
2614 1
2615
        //
2616
        // add random chars
2617
        //
2618
2619
        if ($encoding === 'UTF-8') {
2620 1
            $max_length = (int) \mb_strlen($possible_chars);
2621 1
            if ($max_length === 0) {
2622 1
                return '';
2623 1
            }
2624
2625
            while ($i < $length) {
2626 1
                try {
2627
                    $rand_int = \random_int(0, $max_length - 1);
2628 1
                } catch (\Exception $e) {
2629
                    $rand_int = \mt_rand(0, $max_length - 1);
2630
                }
2631
                $char = \mb_substr($possible_chars, $rand_int, 1);
2632 1
                if ($char !== false) {
2633 1
                    $str .= $char;
2634 1
                    ++$i;
2635 1
                }
2636
            }
2637
        } else {
2638
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
2639
2640
            $max_length = (int) self::strlen($possible_chars, $encoding);
2641
            if ($max_length === 0) {
2642
                return '';
2643
            }
2644
2645
            while ($i < $length) {
2646
                try {
2647
                    $rand_int = \random_int(0, $max_length - 1);
2648
                } catch (\Exception $e) {
2649
                    $rand_int = \mt_rand(0, $max_length - 1);
2650
                }
2651
                $char = self::substr($possible_chars, $rand_int, 1, $encoding);
2652
                if ($char !== false) {
2653
                    $str .= $char;
2654
                    ++$i;
2655
                }
2656
            }
2657
        }
2658
2659
        return $str;
2660 1
    }
2661
2662
    /**
2663
     * @param int|string $extra_entropy [optional] <p>Extra entropy via a string or int value.</p>
2664
     * @param bool       $use_md5       [optional] <p>Return the unique identifier as md5-hash? Default: true</p>
2665
     *
2666
     * @return string
2667
     */
2668
    public static function get_unique_string($extra_entropy = '', bool $use_md5 = true): string
2669 1
    {
2670
        try {
2671
            $rand_int = \random_int(0, \mt_getrandmax());
2672 1
        } catch (\Exception $e) {
2673
            $rand_int = \mt_rand(0, \mt_getrandmax());
2674
        }
2675
2676
        $unique_helper = $rand_int .
2677 1
                         \session_id() .
2678 1
                         ($_SERVER['REMOTE_ADDR'] ?? '') .
2679 1
                         ($_SERVER['SERVER_ADDR'] ?? '') .
2680 1
                         $extra_entropy;
2681 1
2682
        $unique_string = \uniqid($unique_helper, true);
2683 1
2684
        if ($use_md5) {
2685 1
            $unique_string = \md5($unique_string . $unique_helper);
2686 1
        }
2687
2688
        return $unique_string;
2689 1
    }
2690
2691
    /**
2692
     * Returns true if the string contains a lower case char, false otherwise.
2693
     *
2694
     * @param string $str <p>The input string.</p>
2695
     *
2696
     * @psalm-pure
2697
     *
2698
     * @return bool
2699
     *              <p>Whether or not the string contains a lower case character.</p>
2700
     */
2701
    public static function has_lowercase(string $str): bool
2702 47
    {
2703
        if (self::$SUPPORT['mbstring'] === true) {
2704 47
            return \mb_ereg_match('.*[[:lower:]]', $str);
2705 47
        }
2706
2707
        return self::str_matches_pattern($str, '.*[[:lower:]]');
2708
    }
2709
2710
    /**
2711
     * Returns true if the string contains whitespace, false otherwise.
2712
     *
2713
     * @param string $str <p>The input string.</p>
2714
     *
2715
     * @psalm-pure
2716
     *
2717
     * @return bool
2718
     *              <p>Whether or not the string contains whitespace.</p>
2719
     */
2720
    public static function has_whitespace(string $str): bool
2721 11
    {
2722
        if (self::$SUPPORT['mbstring'] === true) {
2723 11
            return \mb_ereg_match('.*[[:space:]]', $str);
2724 11
        }
2725
2726
        return self::str_matches_pattern($str, '.*[[:space:]]');
2727
    }
2728
2729
    /**
2730
     * Returns true if the string contains an upper case char, false otherwise.
2731
     *
2732
     * @param string $str <p>The input string.</p>
2733
     *
2734
     * @psalm-pure
2735
     *
2736
     * @return bool
2737
     *              <p>Whether or not the string contains an upper case character.</p>
2738
     */
2739
    public static function has_uppercase(string $str): bool
2740 12
    {
2741
        if (self::$SUPPORT['mbstring'] === true) {
2742 12
            return \mb_ereg_match('.*[[:upper:]]', $str);
2743 12
        }
2744
2745
        return self::str_matches_pattern($str, '.*[[:upper:]]');
2746
    }
2747
2748
    /**
2749
     * Converts a hexadecimal value into a UTF-8 character.
2750
     *
2751
     * INFO: opposite to UTF8::chr_to_hex()
2752
     *
2753
     * EXAMPLE: <code>UTF8::hex_to_chr('U+00a7'); // '§'</code>
2754
     *
2755
     * @param string $hexdec <p>The hexadecimal value.</p>
2756
     *
2757
     * @psalm-pure
2758
     *
2759
     * @return false|string one single UTF-8 character
2760
     */
2761
    public static function hex_to_chr(string $hexdec)
2762 4
    {
2763
        /** @noinspection PhpUsageOfSilenceOperatorInspection - Invalid characters passed for attempted conversion, these have been ignored */
2764
        return self::decimal_to_chr((int) @\hexdec($hexdec));
2765 4
    }
2766
2767
    /**
2768
     * Converts hexadecimal U+xxxx code point representation to integer.
2769
     *
2770
     * INFO: opposite to UTF8::int_to_hex()
2771
     *
2772
     * EXAMPLE: <code>UTF8::hex_to_int('U+00f1'); // 241</code>
2773
     *
2774
     * @param string $hexdec <p>The hexadecimal code point representation.</p>
2775
     *
2776
     * @psalm-pure
2777
     *
2778
     * @return false|int
2779
     *                   <p>The code point, or false on failure.</p>
2780
     */
2781
    public static function hex_to_int($hexdec)
2782 2
    {
2783
        // init
2784
        $hexdec = (string) $hexdec;
2785 2
2786
        if ($hexdec === '') {
2787 2
            return false;
2788 2
        }
2789
2790
        if (\preg_match('/^(?:\\\u|U\+|)([a-zA-Z0-9]{4,6})$/', $hexdec, $match)) {
2791 2
            return \intval($match[1], 16);
2792 2
        }
2793
2794
        return false;
2795 2
    }
2796
2797
    /**
2798
     * Converts a UTF-8 string to a series of HTML numbered entities.
2799
     *
2800
     * INFO: opposite to UTF8::html_decode()
2801
     *
2802
     * EXAMPLE: <code>UTF8::html_encode('中文空白'); // '&#20013;&#25991;&#31354;&#30333;'</code>
2803
     *
2804
     * @param string $str              <p>The Unicode string to be encoded as numbered entities.</p>
2805
     * @param bool   $keep_ascii_chars [optional] <p>Keep ASCII chars.</p>
2806
     * @param string $encoding         [optional] <p>Set the charset for e.g. "mb_" function</p>
2807
     *
2808
     * @psalm-pure
2809
     *
2810
     * @return string HTML numbered entities
2811
     */
2812
    public static function html_encode(
2813 14
        string $str,
2814
        bool $keep_ascii_chars = false,
2815
        string $encoding = 'UTF-8'
2816
    ): string {
2817
        if ($str === '') {
2818 14
            return '';
2819 4
        }
2820
2821
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
2822 14
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
2823 4
        }
2824
2825
        // INFO: http://stackoverflow.com/questions/35854535/better-explanation-of-convmap-in-mb-encode-numericentity
2826
        if (self::$SUPPORT['mbstring'] === true) {
2827 14
            if ($keep_ascii_chars) {
2828 14
                $start_code = 0x80;
2829 13
            } else {
2830
                $start_code = 0x00;
2831 3
            }
2832
2833
            if ($encoding === 'UTF-8') {
2834 14
                /** @var false|string|null $return - needed for PhpStan (stubs error) */
2835
                $return = \mb_encode_numericentity(
0 ignored issues
show
Bug introduced by
The call to mb_encode_numericentity() has too few arguments starting with encoding. ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-call  annotation

2835
                $return = /** @scrutinizer ignore-call */ \mb_encode_numericentity(

This check compares calls to functions or methods with their respective definitions. If the call has less arguments than are defined, it raises an issue.

If a function is defined several times with a different number of parameters, the check may pick up the wrong definition and report false positives. One codebase where this has been known to happen is Wordpress. Please note the @ignore annotation hint above.

Loading history...
2836 14
                    $str,
2837 14
                    [$start_code, 0xfffff, 0, 0xfffff]
2838 14
                );
2839
                if ($return !== null && $return !== false) {
2840 14
                    return $return;
2841 14
                }
2842
            }
2843
2844
            /** @var false|string|null $return - needed for PhpStan (stubs error) */
2845
            $return = \mb_encode_numericentity(
2846 4
                $str,
2847 4
                [$start_code, 0xfffff, 0, 0xfffff],
2848 4
                $encoding
2849 4
            );
2850
            if ($return !== null && $return !== false) {
2851 4
                return $return;
2852 4
            }
2853
        }
2854
2855
        //
2856
        // fallback via vanilla php
2857
        //
2858
2859
        return \implode(
2860
            '',
2861
            \array_map(
2862
                static function (string $chr) use ($keep_ascii_chars, $encoding): string {
2863
                    return self::single_chr_html_encode($chr, $keep_ascii_chars, $encoding);
2864
                },
2865
                self::str_split($str)
2866
            )
2867
        );
2868
    }
2869
2870
    /**
2871
     * UTF-8 version of html_entity_decode()
2872
     *
2873
     * The reason we are not using html_entity_decode() by itself is because
2874
     * while it is not technically correct to leave out the semicolon
2875
     * at the end of an entity most browsers will still interpret the entity
2876
     * correctly. html_entity_decode() does not convert entities without
2877
     * semicolons, so we are left with our own little solution here. Bummer.
2878
     *
2879
     * Convert all HTML entities to their applicable characters.
2880
     *
2881
     * INFO: opposite to UTF8::html_encode()
2882
     *
2883
     * EXAMPLE: <code>UTF8::html_entity_decode('&#20013;&#25991;&#31354;&#30333;'); // '中文空白'</code>
2884
     *
2885
     * @see http://php.net/manual/en/function.html-entity-decode.php
2886
     *
2887
     * @param string   $str      <p>
2888
     *                           The input string.
2889
     *                           </p>
2890
     * @param int|null $flags    [optional] <p>
2891
     *                           A bitmask of one or more of the following flags, which specify how to handle quotes
2892
     *                           and which document type to use. The default is ENT_COMPAT | ENT_HTML401.
2893
     *                           <table>
2894
     *                           Available <i>flags</i> constants
2895
     *                           <tr valign="top">
2896
     *                           <td>Constant Name</td>
2897
     *                           <td>Description</td>
2898
     *                           </tr>
2899
     *                           <tr valign="top">
2900
     *                           <td><b>ENT_COMPAT</b></td>
2901
     *                           <td>Will convert double-quotes and leave single-quotes alone.</td>
2902
     *                           </tr>
2903
     *                           <tr valign="top">
2904
     *                           <td><b>ENT_QUOTES</b></td>
2905
     *                           <td>Will convert both double and single quotes.</td>
2906
     *                           </tr>
2907
     *                           <tr valign="top">
2908
     *                           <td><b>ENT_NOQUOTES</b></td>
2909
     *                           <td>Will leave both double and single quotes unconverted.</td>
2910
     *                           </tr>
2911
     *                           <tr valign="top">
2912
     *                           <td><b>ENT_HTML401</b></td>
2913
     *                           <td>
2914
     *                           Handle code as HTML 4.01.
2915
     *                           </td>
2916
     *                           </tr>
2917
     *                           <tr valign="top">
2918
     *                           <td><b>ENT_XML1</b></td>
2919
     *                           <td>
2920
     *                           Handle code as XML 1.
2921
     *                           </td>
2922
     *                           </tr>
2923
     *                           <tr valign="top">
2924
     *                           <td><b>ENT_XHTML</b></td>
2925
     *                           <td>
2926
     *                           Handle code as XHTML.
2927
     *                           </td>
2928
     *                           </tr>
2929
     *                           <tr valign="top">
2930
     *                           <td><b>ENT_HTML5</b></td>
2931
     *                           <td>
2932
     *                           Handle code as HTML 5.
2933
     *                           </td>
2934
     *                           </tr>
2935
     *                           </table>
2936
     *                           </p>
2937
     * @param string   $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
2938
     *
2939
     * @psalm-pure
2940
     *
2941
     * @return string the decoded string
2942
     */
2943
    public static function html_entity_decode(
2944 34
        string $str,
2945
        int $flags = null,
2946
        string $encoding = 'UTF-8'
2947
    ): string {
2948
        if (
2949
            !isset($str[3]) // examples: &; || &x;
2950 34
            ||
2951
            \strpos($str, '&') === false // no "&"
2952 34
        ) {
2953
            return $str;
2954 23
        }
2955
2956
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
2957 34
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
2958 9
        }
2959
2960
        if ($flags === null) {
2961 34
            $flags = \ENT_QUOTES | \ENT_HTML5;
2962 11
        }
2963
2964
        if (
2965
            $encoding !== 'UTF-8'
2966 34
            &&
2967
            $encoding !== 'ISO-8859-1'
2968 34
            &&
2969
            $encoding !== 'WINDOWS-1252'
2970 34
            &&
2971
            self::$SUPPORT['mbstring'] === false
2972 34
        ) {
2973
            /**
2974
             * @psalm-suppress ImpureFunctionCall - is is only a warning
2975
             */
2976
            \trigger_error('UTF8::html_entity_decode() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
2977
        }
2978
2979
        do {
2980
            $str_compare = $str;
2981 34
2982
            if (\strpos($str, '&') !== false) {
2983 34
                if (\strpos($str, '&#') !== false) {
2984 34
                    // decode also numeric & UTF16 two byte entities
2985
                    $str = (string) \preg_replace(
2986 25
                        '/(&#(?:x0*[0-9a-fA-F]{2,6}(?![0-9a-fA-F;])|(?:0*\d{2,6}(?![0-9;]))))/S',
2987 25
                        '$1;',
2988 25
                        $str
2989 25
                    );
2990
                }
2991
2992
                $str = \html_entity_decode(
2993 34
                    $str,
2994 34
                    $flags,
2995 34
                    $encoding
2996 34
                );
2997
            }
2998
        } while ($str_compare !== $str);
2999 34
3000
        return $str;
3001 34
    }
3002
3003
    /**
3004
     * Create a escape html version of the string via "UTF8::htmlspecialchars()".
3005
     *
3006
     * @param string $str
3007
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
3008
     *
3009
     * @psalm-pure
3010
     *
3011
     * @return string
3012
     */
3013
    public static function html_escape(string $str, string $encoding = 'UTF-8'): string
3014 6
    {
3015
        return self::htmlspecialchars(
3016 6
            $str,
3017 6
            \ENT_QUOTES | \ENT_SUBSTITUTE,
3018 6
            $encoding
3019
        );
3020
    }
3021
3022
    /**
3023
     * Remove empty html-tag.
3024
     *
3025
     * e.g.: <pre><tag></tag></pre>
3026
     *
3027
     * @param string $str
3028
     *
3029
     * @psalm-pure
3030
     *
3031
     * @return string
3032
     */
3033
    public static function html_stripe_empty_tags(string $str): string
3034 1
    {
3035
        return (string) \preg_replace(
3036 1
            '/<[^\\/>]*?>\\s*?<\\/[^>]*?>/u',
3037 1
            '',
3038 1
            $str
3039 1
        );
3040
    }
3041
3042
    /**
3043
     * Convert all applicable characters to HTML entities: UTF-8 version of htmlentities().
3044
     *
3045
     * EXAMPLE: <code>UTF8::htmlentities('<白-öäü>'); // '&lt;&#30333;-&ouml;&auml;&uuml;&gt;'</code>
3046
     *
3047
     * @see http://php.net/manual/en/function.htmlentities.php
3048
     *
3049
     * @param string $str           <p>
3050
     *                              The input string.
3051
     *                              </p>
3052
     * @param int    $flags         [optional] <p>
3053
     *                              A bitmask of one or more of the following flags, which specify how to handle
3054
     *                              quotes, invalid code unit sequences and the used document type. The default is
3055
     *                              ENT_COMPAT | ENT_HTML401.
3056
     *                              <table>
3057
     *                              Available <i>flags</i> constants
3058
     *                              <tr valign="top">
3059
     *                              <td>Constant Name</td>
3060
     *                              <td>Description</td>
3061
     *                              </tr>
3062
     *                              <tr valign="top">
3063
     *                              <td><b>ENT_COMPAT</b></td>
3064
     *                              <td>Will convert double-quotes and leave single-quotes alone.</td>
3065
     *                              </tr>
3066
     *                              <tr valign="top">
3067
     *                              <td><b>ENT_QUOTES</b></td>
3068
     *                              <td>Will convert both double and single quotes.</td>
3069
     *                              </tr>
3070
     *                              <tr valign="top">
3071
     *                              <td><b>ENT_NOQUOTES</b></td>
3072
     *                              <td>Will leave both double and single quotes unconverted.</td>
3073
     *                              </tr>
3074
     *                              <tr valign="top">
3075
     *                              <td><b>ENT_IGNORE</b></td>
3076
     *                              <td>
3077
     *                              Silently discard invalid code unit sequences instead of returning
3078
     *                              an empty string. Using this flag is discouraged as it
3079
     *                              may have security implications.
3080
     *                              </td>
3081
     *                              </tr>
3082
     *                              <tr valign="top">
3083
     *                              <td><b>ENT_SUBSTITUTE</b></td>
3084
     *                              <td>
3085
     *                              Replace invalid code unit sequences with a Unicode Replacement Character
3086
     *                              U+FFFD (UTF-8) or &#38;#38;#FFFD; (otherwise) instead of returning an empty
3087
     *                              string.
3088
     *                              </td>
3089
     *                              </tr>
3090
     *                              <tr valign="top">
3091
     *                              <td><b>ENT_DISALLOWED</b></td>
3092
     *                              <td>
3093
     *                              Replace invalid code points for the given document type with a
3094
     *                              Unicode Replacement Character U+FFFD (UTF-8) or &#38;#38;#FFFD;
3095
     *                              (otherwise) instead of leaving them as is. This may be useful, for
3096
     *                              instance, to ensure the well-formedness of XML documents with
3097
     *                              embedded external content.
3098
     *                              </td>
3099
     *                              </tr>
3100
     *                              <tr valign="top">
3101
     *                              <td><b>ENT_HTML401</b></td>
3102
     *                              <td>
3103
     *                              Handle code as HTML 4.01.
3104
     *                              </td>
3105
     *                              </tr>
3106
     *                              <tr valign="top">
3107
     *                              <td><b>ENT_XML1</b></td>
3108
     *                              <td>
3109
     *                              Handle code as XML 1.
3110
     *                              </td>
3111
     *                              </tr>
3112
     *                              <tr valign="top">
3113
     *                              <td><b>ENT_XHTML</b></td>
3114
     *                              <td>
3115
     *                              Handle code as XHTML.
3116
     *                              </td>
3117
     *                              </tr>
3118
     *                              <tr valign="top">
3119
     *                              <td><b>ENT_HTML5</b></td>
3120
     *                              <td>
3121
     *                              Handle code as HTML 5.
3122
     *                              </td>
3123
     *                              </tr>
3124
     *                              </table>
3125
     *                              </p>
3126
     * @param string $encoding      [optional] <p>
3127
     *                              Like <b>htmlspecialchars</b>,
3128
     *                              <b>htmlentities</b> takes an optional third argument
3129
     *                              <i>encoding</i> which defines encoding used in
3130
     *                              conversion.
3131
     *                              Although this argument is technically optional, you are highly
3132
     *                              encouraged to specify the correct value for your code.
3133
     *                              </p>
3134
     * @param bool   $double_encode [optional] <p>
3135
     *                              When <i>double_encode</i> is turned off PHP will not
3136
     *                              encode existing html entities. The default is to convert everything.
3137
     *                              </p>
3138
     *
3139
     * @psalm-pure
3140
     *
3141
     * @return string
3142
     *                <p>
3143
     *                The encoded string.
3144
     *                <br><br>
3145
     *                If the input <i>string</i> contains an invalid code unit
3146
     *                sequence within the given <i>encoding</i> an empty string
3147
     *                will be returned, unless either the <b>ENT_IGNORE</b> or
3148
     *                <b>ENT_SUBSTITUTE</b> flags are set.
3149
     *                </p>
3150
     */
3151
    public static function htmlentities(
3152 9
        string $str,
3153
        int $flags = \ENT_COMPAT,
3154
        string $encoding = 'UTF-8',
3155
        bool $double_encode = true
3156
    ): string {
3157
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
3158 9
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
3159 7
        }
3160
3161
        $str = \htmlentities(
3162 9
            $str,
3163 9
            $flags,
3164 9
            $encoding,
3165 9
            $double_encode
3166 9
        );
3167
3168
        /**
3169
         * PHP doesn't replace a backslash to its html entity since this is something
3170
         * that's mostly used to escape characters when inserting in a database. Since
3171
         * we're using a decent database layer, we don't need this shit and we're replacing
3172
         * the double backslashes by its' html entity equivalent.
3173
         *
3174
         * https://github.com/forkcms/library/blob/master/spoon/filter/filter.php#L303
3175
         */
3176
        $str = \str_replace('\\', '&#92;', $str);
3177 9
3178
        return self::html_encode($str, true, $encoding);
3179 9
    }
3180
3181
    /**
3182
     * Convert only special characters to HTML entities: UTF-8 version of htmlspecialchars()
3183
     *
3184
     * INFO: Take a look at "UTF8::htmlentities()"
3185
     *
3186
     * EXAMPLE: <code>UTF8::htmlspecialchars('<白-öäü>'); // '&lt;白-öäü&gt;'</code>
3187
     *
3188
     * @see http://php.net/manual/en/function.htmlspecialchars.php
3189
     *
3190
     * @param string $str           <p>
3191
     *                              The string being converted.
3192
     *                              </p>
3193
     * @param int    $flags         [optional] <p>
3194
     *                              A bitmask of one or more of the following flags, which specify how to handle
3195
     *                              quotes, invalid code unit sequences and the used document type. The default is
3196
     *                              ENT_COMPAT | ENT_HTML401.
3197
     *                              <table>
3198
     *                              Available <i>flags</i> constants
3199
     *                              <tr valign="top">
3200
     *                              <td>Constant Name</td>
3201
     *                              <td>Description</td>
3202
     *                              </tr>
3203
     *                              <tr valign="top">
3204
     *                              <td><b>ENT_COMPAT</b></td>
3205
     *                              <td>Will convert double-quotes and leave single-quotes alone.</td>
3206
     *                              </tr>
3207
     *                              <tr valign="top">
3208
     *                              <td><b>ENT_QUOTES</b></td>
3209
     *                              <td>Will convert both double and single quotes.</td>
3210
     *                              </tr>
3211
     *                              <tr valign="top">
3212
     *                              <td><b>ENT_NOQUOTES</b></td>
3213
     *                              <td>Will leave both double and single quotes unconverted.</td>
3214
     *                              </tr>
3215
     *                              <tr valign="top">
3216
     *                              <td><b>ENT_IGNORE</b></td>
3217
     *                              <td>
3218
     *                              Silently discard invalid code unit sequences instead of returning
3219
     *                              an empty string. Using this flag is discouraged as it
3220
     *                              may have security implications.
3221
     *                              </td>
3222
     *                              </tr>
3223
     *                              <tr valign="top">
3224
     *                              <td><b>ENT_SUBSTITUTE</b></td>
3225
     *                              <td>
3226
     *                              Replace invalid code unit sequences with a Unicode Replacement Character
3227
     *                              U+FFFD (UTF-8) or &#38;#38;#FFFD; (otherwise) instead of returning an empty
3228
     *                              string.
3229
     *                              </td>
3230
     *                              </tr>
3231
     *                              <tr valign="top">
3232
     *                              <td><b>ENT_DISALLOWED</b></td>
3233
     *                              <td>
3234
     *                              Replace invalid code points for the given document type with a
3235
     *                              Unicode Replacement Character U+FFFD (UTF-8) or &#38;#38;#FFFD;
3236
     *                              (otherwise) instead of leaving them as is. This may be useful, for
3237
     *                              instance, to ensure the well-formedness of XML documents with
3238
     *                              embedded external content.
3239
     *                              </td>
3240
     *                              </tr>
3241
     *                              <tr valign="top">
3242
     *                              <td><b>ENT_HTML401</b></td>
3243
     *                              <td>
3244
     *                              Handle code as HTML 4.01.
3245
     *                              </td>
3246
     *                              </tr>
3247
     *                              <tr valign="top">
3248
     *                              <td><b>ENT_XML1</b></td>
3249
     *                              <td>
3250
     *                              Handle code as XML 1.
3251
     *                              </td>
3252
     *                              </tr>
3253
     *                              <tr valign="top">
3254
     *                              <td><b>ENT_XHTML</b></td>
3255
     *                              <td>
3256
     *                              Handle code as XHTML.
3257
     *                              </td>
3258
     *                              </tr>
3259
     *                              <tr valign="top">
3260
     *                              <td><b>ENT_HTML5</b></td>
3261
     *                              <td>
3262
     *                              Handle code as HTML 5.
3263
     *                              </td>
3264
     *                              </tr>
3265
     *                              </table>
3266
     *                              </p>
3267
     * @param string $encoding      [optional] <p>
3268
     *                              Defines encoding used in conversion.
3269
     *                              </p>
3270
     *                              <p>
3271
     *                              For the purposes of this function, the encodings
3272
     *                              ISO-8859-1, ISO-8859-15,
3273
     *                              UTF-8, cp866,
3274
     *                              cp1251, cp1252, and
3275
     *                              KOI8-R are effectively equivalent, provided the
3276
     *                              <i>string</i> itself is valid for the encoding, as
3277
     *                              the characters affected by <b>htmlspecialchars</b> occupy
3278
     *                              the same positions in all of these encodings.
3279
     *                              </p>
3280
     * @param bool   $double_encode [optional] <p>
3281
     *                              When <i>double_encode</i> is turned off PHP will not
3282
     *                              encode existing html entities, the default is to convert everything.
3283
     *                              </p>
3284
     *
3285
     * @psalm-pure
3286
     *
3287
     * @return string the converted string.
3288
     *                </p>
3289
     *                <p>
3290
     *                If the input <i>string</i> contains an invalid code unit
3291
     *                sequence within the given <i>encoding</i> an empty string
3292
     *                will be returned, unless either the <b>ENT_IGNORE</b> or
3293
     *                <b>ENT_SUBSTITUTE</b> flags are set
3294
     */
3295
    public static function htmlspecialchars(
3296 8
        string $str,
3297
        int $flags = \ENT_COMPAT,
3298
        string $encoding = 'UTF-8',
3299
        bool $double_encode = true
3300
    ): string {
3301
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
3302 8
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
3303 8
        }
3304
3305
        return \htmlspecialchars(
3306 8
            $str,
3307 8
            $flags,
3308 8
            $encoding,
3309 8
            $double_encode
3310 8
        );
3311
    }
3312
3313
    /**
3314
     * Checks whether iconv is available on the server.
3315
     *
3316
     * @psalm-pure
3317
     *
3318
     * @return bool
3319
     *              <p><strong>true</strong> if available, <strong>false</strong> otherwise</p>
3320
     *
3321
     * @internal <p>Please do not use it anymore, we will make is private in next major version.</p>
3322
     */
3323
    public static function iconv_loaded(): bool
3324
    {
3325
        return \extension_loaded('iconv');
3326
    }
3327
3328
    /**
3329
     * Converts Integer to hexadecimal U+xxxx code point representation.
3330
     *
3331
     * INFO: opposite to UTF8::hex_to_int()
3332
     *
3333
     * EXAMPLE: <code>UTF8::int_to_hex(241); // 'U+00f1'</code>
3334
     *
3335
     * @param int    $int    <p>The integer to be converted to hexadecimal code point.</p>
3336
     * @param string $prefix [optional]
3337
     *
3338
     * @psalm-pure
3339
     *
3340
     * @return string the code point, or empty string on failure
3341
     */
3342
    public static function int_to_hex(int $int, string $prefix = 'U+'): string
3343 6
    {
3344
        $hex = \dechex($int);
3345 6
3346
        $hex = (\strlen($hex) < 4 ? \substr('0000' . $hex, -4) : $hex);
3347 6
3348
        return $prefix . $hex . '';
3349 6
    }
3350
3351
    /**
3352
     * Checks whether intl-char is available on the server.
3353
     *
3354
     * @psalm-pure
3355
     *
3356
     * @return bool
3357
     *              <p><strong>true</strong> if available, <strong>false</strong> otherwise</p>
3358
     *
3359
     * @internal <p>Please do not use it anymore, we will make is private in next major version.</p>
3360
     */
3361
    public static function intlChar_loaded(): bool
3362
    {
3363
        return \class_exists('IntlChar');
3364
    }
3365
3366
    /**
3367
     * Checks whether intl is available on the server.
3368
     *
3369
     * @psalm-pure
3370
     *
3371
     * @return bool
3372
     *              <p><strong>true</strong> if available, <strong>false</strong> otherwise</p>
3373
     *
3374
     * @internal <p>Please do not use it anymore, we will make is private in next major version.</p>
3375
     */
3376
    public static function intl_loaded(): bool
3377 5
    {
3378
        return \extension_loaded('intl');
3379 5
    }
3380
3381
    /**
3382
     * Returns true if the string contains only alphabetic chars, false otherwise.
3383
     *
3384
     * @param string $str <p>The input string.</p>
3385
     *
3386
     * @psalm-pure
3387
     *
3388
     * @return bool
3389
     *              <p>Whether or not $str contains only alphabetic chars.</p>
3390
     */
3391
    public static function is_alpha(string $str): bool
3392 10
    {
3393
        if (self::$SUPPORT['mbstring'] === true) {
3394 10
            return \mb_ereg_match('^[[:alpha:]]*$', $str);
3395 10
        }
3396
3397
        return self::str_matches_pattern($str, '^[[:alpha:]]*$');
3398
    }
3399
3400
    /**
3401
     * Returns true if the string contains only alphabetic and numeric chars, false otherwise.
3402
     *
3403
     * @param string $str <p>The input string.</p>
3404
     *
3405
     * @psalm-pure
3406
     *
3407
     * @return bool
3408
     *              <p>Whether or not $str contains only alphanumeric chars.</p>
3409
     */
3410
    public static function is_alphanumeric(string $str): bool
3411 13
    {
3412
        if (self::$SUPPORT['mbstring'] === true) {
3413 13
            return \mb_ereg_match('^[[:alnum:]]*$', $str);
3414 13
        }
3415
3416
        return self::str_matches_pattern($str, '^[[:alnum:]]*$');
3417
    }
3418
3419
    /**
3420
     * Returns true if the string contains only punctuation chars, false otherwise.
3421
     *
3422
     * @param string $str <p>The input string.</p>
3423
     *
3424
     * @psalm-pure
3425
     *
3426
     * @return bool
3427
     *              <p>Whether or not $str contains only punctuation chars.</p>
3428
     */
3429
    public static function is_punctuation(string $str): bool
3430 10
    {
3431
        return self::str_matches_pattern($str, '^[[:punct:]]*$');
3432 10
    }
3433
3434
    /**
3435
     * Returns true if the string contains only printable (non-invisible) chars, false otherwise.
3436
     *
3437
     * @param string $str                       <p>The input string.</p>
3438
     * @param bool   $ignore_control_characters [optional] <p>Ignore control characters like [LRM] or [LSEP].</p>
3439
     *
3440
     * @psalm-pure
3441
     *
3442
     * @return bool
3443
     *              <p>Whether or not $str contains only printable (non-invisible) chars.</p>
3444
     */
3445
    public static function is_printable(string $str, bool $ignore_control_characters = false): bool
3446 1
    {
3447
        return self::remove_invisible_characters($str, false, '', $ignore_control_characters) === $str;
3448 1
    }
3449
3450
    /**
3451
     * Checks if a string is 7 bit ASCII.
3452
     *
3453
     * EXAMPLE: <code>UTF8::is_ascii('白'); // false</code>
3454
     *
3455
     * @param string $str <p>The string to check.</p>
3456
     *
3457
     * @psalm-pure
3458
     *
3459
     * @return bool
3460
     *              <p>
3461
     *              <strong>true</strong> if it is ASCII<br>
3462
     *              <strong>false</strong> otherwise
3463
     *              </p>
3464
     */
3465
    public static function is_ascii(string $str): bool
3466 8
    {
3467
        return ASCII::is_ascii($str);
3468 8
    }
3469
3470
    /**
3471
     * Returns true if the string is base64 encoded, false otherwise.
3472
     *
3473
     * EXAMPLE: <code>UTF8::is_base64('4KSu4KWL4KSo4KS/4KSa'); // true</code>
3474
     *
3475
     * @param string|null $str                   <p>The input string.</p>
3476
     * @param bool        $empty_string_is_valid [optional] <p>Is an empty string valid base64 or not?</p>
3477
     *
3478
     * @psalm-pure
3479
     *
3480
     * @return bool
3481
     *              <p>Whether or not $str is base64 encoded.</p>
3482
     */
3483
    public static function is_base64($str, bool $empty_string_is_valid = false): bool
3484 16
    {
3485
        if (
3486
            !$empty_string_is_valid
3487 16
            &&
3488
            $str === ''
3489 16
        ) {
3490
            return false;
3491 3
        }
3492
3493
        if (!\is_string($str)) {
3494 15
            return false;
3495 2
        }
3496
3497
        $base64String = \base64_decode($str, true);
3498 15
3499
        return $base64String !== false && \base64_encode($base64String) === $str;
3500 15
    }
3501
3502
    /**
3503
     * Check if the input is binary... (is look like a hack).
3504
     *
3505
     * EXAMPLE: <code>UTF8::is_binary(01); // true</code>
3506
     *
3507
     * @param int|string $input
3508
     * @param bool       $strict
3509
     *
3510
     * @psalm-pure
3511
     *
3512
     * @return bool
3513
     */
3514
    public static function is_binary($input, bool $strict = false): bool
3515 39
    {
3516
        $input = (string) $input;
3517 39
        if ($input === '') {
3518 39
            return false;
3519 10
        }
3520
3521
        if (\preg_match('~^[01]+$~', $input)) {
3522 39
            return true;
3523 13
        }
3524
3525
        $ext = self::get_file_type($input);
3526 39
        if ($ext['type'] === 'binary') {
3527 39
            return true;
3528 7
        }
3529
3530
        if (!$strict) {
3531 38
            $test_length = \strlen($input);
3532 7
            $test_null_counting = \substr_count($input, "\x0", 0, $test_length);
3533 7
            if (($test_null_counting / $test_length) > 0.25) {
3534 7
                return true;
3535 5
            }
3536
        }
3537
3538
        if ($strict) {
3539 38
            if (self::$SUPPORT['finfo'] === false) {
3540 38
                throw new \RuntimeException('ext-fileinfo: is not installed');
3541
            }
3542
3543
            /**
3544
             * @psalm-suppress ImpureMethodCall - it will return the same result for the same file ...
3545
             */
3546
            $finfo_encoding = (new \finfo(\FILEINFO_MIME_ENCODING))->buffer($input);
3547 38
            if ($finfo_encoding && $finfo_encoding === 'binary') {
3548 38
                return true;
3549 20
            }
3550
        }
3551
3552
        return false;
3553 33
    }
3554
3555
    /**
3556
     * Check if the file is binary.
3557
     *
3558
     * EXAMPLE: <code>UTF8::is_binary('./utf32.txt'); // true</code>
3559
     *
3560
     * @param string $file
3561
     *
3562
     * @return bool
3563
     */
3564
    public static function is_binary_file($file): bool
3565 6
    {
3566
        // init
3567
        $block = '';
3568 6
3569
        $fp = \fopen($file, 'rb');
3570 6
        if (\is_resource($fp)) {
3571 6
            $block = \fread($fp, 512);
3572 6
            \fclose($fp);
3573 6
        }
3574
3575
        if ($block === '' || $block === false) {
3576 6
            return false;
3577 2
        }
3578
3579
        return self::is_binary($block, true);
3580 6
    }
3581
3582
    /**
3583
     * Returns true if the string contains only whitespace chars, false otherwise.
3584
     *
3585
     * @param string $str <p>The input string.</p>
3586
     *
3587
     * @psalm-pure
3588
     *
3589
     * @return bool
3590
     *              <p>Whether or not $str contains only whitespace characters.</p>
3591
     */
3592
    public static function is_blank(string $str): bool
3593 15
    {
3594
        if (self::$SUPPORT['mbstring'] === true) {
3595 15
            return \mb_ereg_match('^[[:space:]]*$', $str);
3596 15
        }
3597
3598
        return self::str_matches_pattern($str, '^[[:space:]]*$');
3599
    }
3600
3601
    /**
3602
     * Checks if the given string is equal to any "Byte Order Mark".
3603
     *
3604
     * WARNING: Use "UTF8::string_has_bom()" if you will check BOM in a string.
3605
     *
3606
     * EXAMPLE: <code>UTF8::is_bom("\xef\xbb\xbf"); // true</code>
3607
     *
3608
     * @param string $str <p>The input string.</p>
3609
     *
3610
     * @psalm-pure
3611
     *
3612
     * @return bool
3613
     *              <p><strong>true</strong> if the $utf8_chr is Byte Order Mark, <strong>false</strong> otherwise.</p>
3614
     */
3615
    public static function is_bom($str): bool
3616 2
    {
3617
        /** @noinspection PhpUnusedLocalVariableInspection */
3618
        foreach (self::$BOM as $bom_string => &$bom_byte_length) {
3619 2
            if ($str === $bom_string) {
3620 2
                return true;
3621 2
            }
3622
        }
3623
3624
        return false;
3625 2
    }
3626
3627
    /**
3628
     * Determine whether the string is considered to be empty.
3629
     *
3630
     * A variable is considered empty if it does not exist or if its value equals FALSE.
3631
     * empty() does not generate a warning if the variable does not exist.
3632
     *
3633
     * @param array|float|int|string $str
3634
     *
3635
     * @psalm-pure
3636
     *
3637
     * @return bool
3638
     *              <p>Whether or not $str is empty().</p>
3639
     */
3640
    public static function is_empty($str): bool
3641 1
    {
3642
        return empty($str);
3643 1
    }
3644
3645
    /**
3646
     * Returns true if the string contains only hexadecimal chars, false otherwise.
3647
     *
3648
     * @param string $str <p>The input string.</p>
3649
     *
3650
     * @psalm-pure
3651
     *
3652
     * @return bool
3653
     *              <p>Whether or not $str contains only hexadecimal chars.</p>
3654
     */
3655
    public static function is_hexadecimal(string $str): bool
3656 13
    {
3657
        if (self::$SUPPORT['mbstring'] === true) {
3658 13
            return \mb_ereg_match('^[[:xdigit:]]*$', $str);
3659 13
        }
3660
3661
        return self::str_matches_pattern($str, '^[[:xdigit:]]*$');
3662
    }
3663
3664
    /**
3665
     * Check if the string contains any HTML tags.
3666
     *
3667
     * EXAMPLE: <code>UTF8::is_html('<b>lall</b>'); // true</code>
3668
     *
3669
     * @param string $str <p>The input string.</p>
3670
     *
3671
     * @psalm-pure
3672
     *
3673
     * @return bool
3674
     *              <p>Whether or not $str contains html elements.</p>
3675
     */
3676
    public static function is_html(string $str): bool
3677 3
    {
3678
        if ($str === '') {
3679 3
            return false;
3680 3
        }
3681
3682
        // init
3683
        $matches = [];
3684 3
3685
        $str = self::emoji_encode($str); // hack for emoji support :/
3686 3
3687
        \preg_match("/<\\/?\\w+(?:(?:\\s+\\w+(?:\\s*=\\s*(?:\".*?\"|'.*?'|[^'\">\\s]+))?)*\\s*|\\s*)\\/?>/u", $str, $matches);
3688 3
3689
        return $matches !== [];
3690 3
    }
3691
3692
    /**
3693
     * Check if $url is an correct url.
3694
     *
3695
     * @param string $url
3696
     * @param bool   $disallow_localhost
3697
     *
3698
     * @psalm-pure
3699
     *
3700
     * @return bool
3701
     */
3702
    public static function is_url(string $url, bool $disallow_localhost = false): bool
3703 1
    {
3704
        if ($url === '') {
3705 1
            return false;
3706 1
        }
3707
3708
        // WARNING: keep this as hack protection
3709
        if (!self::str_istarts_with_any($url, ['http://', 'https://'])) {
3710 1
            return false;
3711 1
        }
3712
3713
        // e.g. -> the server itself connect to "https://foo.localhost/phpmyadmin/...
3714
        if ($disallow_localhost) {
3715 1
            if (self::str_istarts_with_any(
3716 1
                $url,
3717 1
                [
3718
                    'http://localhost',
3719 1
                    'https://localhost',
3720
                    'http://127.0.0.1',
3721
                    'https://127.0.0.1',
3722
                    'http://::1',
3723
                    'https://::1',
3724
                ]
3725
            )) {
3726
                return false;
3727 1
            }
3728
3729
            $regex = '/^(?:http(?:s)?:\/\/).*?(?:\.localhost)/iu';
3730 1
            if (\preg_match($regex, $url)) {
3731 1
                return false;
3732 1
            }
3733
        }
3734
3735
        // INFO: this is needed for e.g. "http://müller.de/" (internationalized domain names) and non ASCII-parameters
3736
        $regex = '/^(?:http(?:s)?:\\/\\/)(?:[\p{L}0-9][\p{L}0-9_-]*(?:\\.[\p{L}0-9][\p{L}0-9_-]*))(?:\\d+)?(?:\\/\\.*)?/iu';
3737 1
        if (\preg_match($regex, $url)) {
3738 1
            return true;
3739 1
        }
3740
3741
        return \filter_var($url, \FILTER_VALIDATE_URL) !== false;
3742 1
    }
3743
3744
    /**
3745
     * Try to check if "$str" is a JSON-string.
3746
     *
3747
     * EXAMPLE: <code>UTF8::is_json('{"array":[1,"¥","ä"]}'); // true</code>
3748
     *
3749
     * @param string $str                                    <p>The input string.</p>
3750
     * @param bool   $only_array_or_object_results_are_valid [optional] <p>Only array and objects are valid json
3751
     *                                                       results.</p>
3752
     *
3753
     * @return bool
3754
     *              <p>Whether or not the $str is in JSON format.</p>
3755
     */
3756
    public static function is_json(string $str, bool $only_array_or_object_results_are_valid = true): bool
3757 42
    {
3758
        if ($str === '') {
3759 42
            return false;
3760 4
        }
3761
3762
        if (self::$SUPPORT['json'] === false) {
3763 40
            throw new \RuntimeException('ext-json: is not installed');
3764
        }
3765
3766
        $jsonOrNull = self::json_decode($str);
3767 40
        if ($jsonOrNull === null && \strtoupper($str) !== 'NULL') {
3768 40
            return false;
3769 18
        }
3770
3771
        if (
3772
            $only_array_or_object_results_are_valid
3773 24
            &&
3774
            !\is_object($jsonOrNull)
3775 24
            &&
3776
            !\is_array($jsonOrNull)
3777 24
        ) {
3778
            return false;
3779 5
        }
3780
3781
        return \json_last_error() === \JSON_ERROR_NONE;
3782 19
    }
3783
3784
    /**
3785
     * @param string $str <p>The input string.</p>
3786
     *
3787
     * @psalm-pure
3788
     *
3789
     * @return bool
3790
     *              <p>Whether or not $str contains only lowercase chars.</p>
3791
     */
3792
    public static function is_lowercase(string $str): bool
3793 8
    {
3794
        if (self::$SUPPORT['mbstring'] === true) {
3795 8
            return \mb_ereg_match('^[[:lower:]]*$', $str);
3796 8
        }
3797
3798
        return self::str_matches_pattern($str, '^[[:lower:]]*$');
3799
    }
3800
3801
    /**
3802
     * Returns true if the string is serialized, false otherwise.
3803
     *
3804
     * @param string $str <p>The input string.</p>
3805
     *
3806
     * @psalm-pure
3807
     *
3808
     * @return bool
3809
     *              <p>Whether or not $str is serialized.</p>
3810
     */
3811
    public static function is_serialized(string $str): bool
3812 7
    {
3813
        if ($str === '') {
3814 7
            return false;
3815 1
        }
3816
3817
        /** @noinspection PhpUsageOfSilenceOperatorInspection */
3818
        /** @noinspection UnserializeExploitsInspection */
3819
        return $str === 'b:0;'
3820 6
               ||
3821
               @\unserialize($str, []) !== false;
3822 6
    }
3823
3824
    /**
3825
     * Returns true if the string contains only lower case chars, false
3826
     * otherwise.
3827
     *
3828
     * @param string $str <p>The input string.</p>
3829
     *
3830
     * @psalm-pure
3831
     *
3832
     * @return bool
3833
     *              <p>Whether or not $str contains only lower case characters.</p>
3834
     */
3835
    public static function is_uppercase(string $str): bool
3836 8
    {
3837
        if (self::$SUPPORT['mbstring'] === true) {
3838 8
            return \mb_ereg_match('^[[:upper:]]*$', $str);
3839 8
        }
3840
3841
        return self::str_matches_pattern($str, '^[[:upper:]]*$');
3842
    }
3843
3844
    /**
3845
     * Check if the string is UTF-16.
3846
     *
3847
     * EXAMPLE: <code>
3848
     * UTF8::is_utf16(file_get_contents('utf-16-le.txt')); // 1
3849
     * //
3850
     * UTF8::is_utf16(file_get_contents('utf-16-be.txt')); // 2
3851
     * //
3852
     * UTF8::is_utf16(file_get_contents('utf-8.txt')); // false
3853
     * </code>
3854
     *
3855
     * @param string $str                       <p>The input string.</p>
3856
     * @param bool   $check_if_string_is_binary
3857
     *
3858
     * @psalm-pure
3859
     *
3860
     * @return false|int
3861
     *                   <strong>false</strong> if is't not UTF-16,<br>
3862
     *                   <strong>1</strong> for UTF-16LE,<br>
3863
     *                   <strong>2</strong> for UTF-16BE
3864
     */
3865
    public static function is_utf16($str, bool $check_if_string_is_binary = true)
3866 21
    {
3867
        // init
3868
        $str = (string) $str;
3869 21
        $str_chars = [];
3870 21
3871
        // fix for the "binary"-check
3872
        if ($check_if_string_is_binary !== false && self::string_has_bom($str)) {
3873 21
            $check_if_string_is_binary = false;
3874 2
        }
3875
3876
        if (
3877
            $check_if_string_is_binary
3878 21
            &&
3879
            !self::is_binary($str, true)
3880 21
        ) {
3881
            return false;
3882 2
        }
3883
3884
        if (self::$SUPPORT['mbstring'] === false) {
3885 21
            /**
3886
             * @psalm-suppress ImpureFunctionCall - is is only a warning
3887
             */
3888
            \trigger_error('UTF8::is_utf16() without mbstring may did not work correctly', \E_USER_WARNING);
3889 3
        }
3890
3891
        $str = self::remove_bom($str);
3892 21
3893
        $maybe_utf16le = 0;
3894 21
        $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-16LE');
3895 21
        if ($test) {
3896 21
            $test2 = \mb_convert_encoding($test, 'UTF-16LE', 'UTF-8');
3897 21
            $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-16LE');
3898 21
            if ($test3 === $test) {
3899 21
                /**
3900
                 * @psalm-suppress RedundantCondition
3901
                 */
3902
                if ($str_chars === []) {
3903 21
                    $str_chars = self::count_chars($str, true, false);
3904 21
                }
3905
                foreach (self::count_chars($test3) as $test3char => &$test3charEmpty) {
0 ignored issues
show
Bug introduced by
The expression self::count_chars($test3) cannot be used as a reference.

Let?s assume that you have the following foreach statement:

foreach ($array as &$itemValue) { }

$itemValue is assigned by reference. This is possible because the expression (in the example $array) can be used as a reference target.

However, if we were to replace $array with something different like the result of a function call as in

foreach (getArray() as &$itemValue) { }

then assigning by reference is not possible anymore as there is no target that could be modified.

Available Fixes

1. Do not assign by reference
foreach (getArray() as $itemValue) { }
2. Assign to a local variable first
$array = getArray();
foreach ($array as &$itemValue) {}
3. Return a reference
function &getArray() { $array = array(); return $array; }

foreach (getArray() as &$itemValue) { }
Loading history...
Bug introduced by
It seems like $test3 can also be of type array; however, parameter $str of voku\helper\UTF8::count_chars() does only seem to accept string, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

3905
                foreach (self::count_chars(/** @scrutinizer ignore-type */ $test3) as $test3char => &$test3charEmpty) {
Loading history...
3906 21
                    if (\in_array($test3char, $str_chars, true)) {
3907 21
                        ++$maybe_utf16le;
3908 5
                    }
3909
                }
3910
                unset($test3charEmpty);
3911 21
            }
3912
        }
3913
3914
        $maybe_utf16be = 0;
3915 21
        $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-16BE');
3916 21
        if ($test) {
3917 21
            $test2 = \mb_convert_encoding($test, 'UTF-16BE', 'UTF-8');
3918 21
            $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-16BE');
3919 21
            if ($test3 === $test) {
3920 21
                if ($str_chars === []) {
3921 21
                    $str_chars = self::count_chars($str, true, false);
3922 11
                }
3923
                foreach (self::count_chars($test3) as $test3char => &$test3charEmpty) {
0 ignored issues
show
Bug introduced by
The expression self::count_chars($test3) cannot be used as a reference.

Let?s assume that you have the following foreach statement:

foreach ($array as &$itemValue) { }

$itemValue is assigned by reference. This is possible because the expression (in the example $array) can be used as a reference target.

However, if we were to replace $array with something different like the result of a function call as in

foreach (getArray() as &$itemValue) { }

then assigning by reference is not possible anymore as there is no target that could be modified.

Available Fixes

1. Do not assign by reference
foreach (getArray() as $itemValue) { }
2. Assign to a local variable first
$array = getArray();
foreach ($array as &$itemValue) {}
3. Return a reference
function &getArray() { $array = array(); return $array; }

foreach (getArray() as &$itemValue) { }
Loading history...
3924 21
                    if (\in_array($test3char, $str_chars, true)) {
3925 21
                        ++$maybe_utf16be;
3926 6
                    }
3927
                }
3928
                unset($test3charEmpty);
3929 21
            }
3930
        }
3931
3932
        if ($maybe_utf16be !== $maybe_utf16le) {
3933 21
            if ($maybe_utf16le > $maybe_utf16be) {
3934 7
                return 1;
3935 5
            }
3936
3937
            return 2;
3938 6
        }
3939
3940
        return false;
3941 17
    }
3942
3943
    /**
3944
     * Check if the string is UTF-32.
3945
     *
3946
     * EXAMPLE: <code>
3947
     * UTF8::is_utf32(file_get_contents('utf-32-le.txt')); // 1
3948
     * //
3949
     * UTF8::is_utf32(file_get_contents('utf-32-be.txt')); // 2
3950
     * //
3951
     * UTF8::is_utf32(file_get_contents('utf-8.txt')); // false
3952
     * </code>
3953
     *
3954
     * @param string $str                       <p>The input string.</p>
3955
     * @param bool   $check_if_string_is_binary
3956
     *
3957
     * @psalm-pure
3958
     *
3959
     * @return false|int
3960
     *                   <strong>false</strong> if is't not UTF-32,<br>
3961
     *                   <strong>1</strong> for UTF-32LE,<br>
3962
     *                   <strong>2</strong> for UTF-32BE
3963
     */
3964
    public static function is_utf32($str, bool $check_if_string_is_binary = true)
3965 19
    {
3966
        // init
3967
        $str = (string) $str;
3968 19
        $str_chars = [];
3969 19
3970
        // fix for the "binary"-check
3971
        if ($check_if_string_is_binary !== false && self::string_has_bom($str)) {
3972 19
            $check_if_string_is_binary = false;
3973 2
        }
3974
3975
        if (
3976
            $check_if_string_is_binary
3977 19
            &&
3978
            !self::is_binary($str, true)
3979 19
        ) {
3980
            return false;
3981 2
        }
3982
3983
        if (self::$SUPPORT['mbstring'] === false) {
3984 19
            /**
3985
             * @psalm-suppress ImpureFunctionCall - is is only a warning
3986
             */
3987
            \trigger_error('UTF8::is_utf32() without mbstring may did not work correctly', \E_USER_WARNING);
3988 3
        }
3989
3990
        $str = self::remove_bom($str);
3991 19
3992
        $maybe_utf32le = 0;
3993 19
        $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-32LE');
3994 19
        if ($test) {
3995 19
            $test2 = \mb_convert_encoding($test, 'UTF-32LE', 'UTF-8');
3996 19
            $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-32LE');
3997 19
            if ($test3 === $test) {
3998 19
                /**
3999
                 * @psalm-suppress RedundantCondition
4000
                 */
4001
                if ($str_chars === []) {
4002 19
                    $str_chars = self::count_chars($str, true, false);
4003 19
                }
4004
                foreach (self::count_chars($test3) as $test3char => &$test3charEmpty) {
0 ignored issues
show
Bug introduced by
The expression self::count_chars($test3) cannot be used as a reference.

Let?s assume that you have the following foreach statement:

foreach ($array as &$itemValue) { }

$itemValue is assigned by reference. This is possible because the expression (in the example $array) can be used as a reference target.

However, if we were to replace $array with something different like the result of a function call as in

foreach (getArray() as &$itemValue) { }

then assigning by reference is not possible anymore as there is no target that could be modified.

Available Fixes

1. Do not assign by reference
foreach (getArray() as $itemValue) { }
2. Assign to a local variable first
$array = getArray();
foreach ($array as &$itemValue) {}
3. Return a reference
function &getArray() { $array = array(); return $array; }

foreach (getArray() as &$itemValue) { }
Loading history...
Bug introduced by
It seems like $test3 can also be of type array; however, parameter $str of voku\helper\UTF8::count_chars() does only seem to accept string, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

4004
                foreach (self::count_chars(/** @scrutinizer ignore-type */ $test3) as $test3char => &$test3charEmpty) {
Loading history...
4005 19
                    if (\in_array($test3char, $str_chars, true)) {
4006 19
                        ++$maybe_utf32le;
4007 2
                    }
4008
                }
4009
                unset($test3charEmpty);
4010 19
            }
4011
        }
4012
4013
        $maybe_utf32be = 0;
4014 19
        $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-32BE');
4015 19
        if ($test) {
4016 19
            $test2 = \mb_convert_encoding($test, 'UTF-32BE', 'UTF-8');
4017 19
            $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-32BE');
4018 19
            if ($test3 === $test) {
4019 19
                if ($str_chars === []) {
4020 19
                    $str_chars = self::count_chars($str, true, false);
4021 11
                }
4022
                foreach (self::count_chars($test3) as $test3char => &$test3charEmpty) {
0 ignored issues
show
Bug introduced by
The expression self::count_chars($test3) cannot be used as a reference.

Let?s assume that you have the following foreach statement:

foreach ($array as &$itemValue) { }

$itemValue is assigned by reference. This is possible because the expression (in the example $array) can be used as a reference target.

However, if we were to replace $array with something different like the result of a function call as in

foreach (getArray() as &$itemValue) { }

then assigning by reference is not possible anymore as there is no target that could be modified.

Available Fixes

1. Do not assign by reference
foreach (getArray() as $itemValue) { }
2. Assign to a local variable first
$array = getArray();
foreach ($array as &$itemValue) {}
3. Return a reference
function &getArray() { $array = array(); return $array; }

foreach (getArray() as &$itemValue) { }
Loading history...
4023 19
                    if (\in_array($test3char, $str_chars, true)) {
4024 19
                        ++$maybe_utf32be;
4025 3
                    }
4026
                }
4027
                unset($test3charEmpty);
4028 19
            }
4029
        }
4030
4031
        if ($maybe_utf32be !== $maybe_utf32le) {
4032 19
            if ($maybe_utf32le > $maybe_utf32be) {
4033 3
                return 1;
4034 2
            }
4035
4036
            return 2;
4037 3
        }
4038
4039
        return false;
4040 19
    }
4041
4042
    /**
4043
     * Checks whether the passed input contains only byte sequences that appear valid UTF-8.
4044
     *
4045
     * EXAMPLE: <code>
4046
     * UTF8::is_utf8(['Iñtërnâtiônàlizætiøn', 'foo']); // true
4047
     * //
4048
     * UTF8::is_utf8(["Iñtërnâtiônàlizætiøn\xA0\xA1", 'bar']); // false
4049
     * </code>
4050
     *
4051
     * @param int|string|string[]|null $str    <p>The input to be checked.</p>
4052
     * @param bool                     $strict <p>Check also if the string is not UTF-16 or UTF-32.</p>
4053
     *
4054
     * @psalm-pure
4055
     *
4056
     * @return bool
4057
     */
4058
    public static function is_utf8($str, bool $strict = false): bool
4059 83
    {
4060
        if (\is_array($str)) {
4061 83
            foreach ($str as &$v) {
4062 2
                if (!self::is_utf8($v, $strict)) {
4063 2
                    return false;
4064 2
                }
4065
            }
4066
4067
            return true;
4068
        }
4069
4070
        return self::is_utf8_string((string) $str, $strict);
4071 83
    }
4072
4073
    /**
4074
     * (PHP 5 &gt;= 5.2.0, PECL json &gt;= 1.2.0)<br/>
4075
     * Decodes a JSON string
4076
     *
4077
     * EXAMPLE: <code>UTF8::json_decode('[1,"\u00a5","\u00e4"]'); // array(1, '¥', 'ä')</code>
4078
     *
4079
     * @see http://php.net/manual/en/function.json-decode.php
4080
     *
4081
     * @param string $json    <p>
4082
     *                        The <i>json</i> string being decoded.
4083
     *                        </p>
4084
     *                        <p>
4085
     *                        This function only works with UTF-8 encoded strings.
4086
     *                        </p>
4087
     *                        <p>PHP implements a superset of
4088
     *                        JSON - it will also encode and decode scalar types and <b>NULL</b>. The JSON standard
4089
     *                        only supports these values when they are nested inside an array or an object.
4090
     *                        </p>
4091
     * @param bool   $assoc   [optional] <p>
4092
     *                        When <b>TRUE</b>, returned objects will be converted into
4093
     *                        associative arrays.
4094
     *                        </p>
4095
     * @param int    $depth   [optional] <p>
4096
     *                        User specified recursion depth.
4097
     *                        </p>
4098
     * @param int    $options [optional] <p>
4099
     *                        Bitmask of JSON decode options. Currently only
4100
     *                        <b>JSON_BIGINT_AS_STRING</b>
4101
     *                        is supported (default is to cast large integers as floats)
4102
     *                        </p>
4103
     *
4104
     * @psalm-pure
4105
     *
4106
     * @return mixed
4107
     *               <p>The value encoded in <i>json</i> in appropriate PHP type. Values true, false and
4108
     *               null (case-insensitive) are returned as <b>TRUE</b>, <b>FALSE</b> and <b>NULL</b> respectively.
4109
     *               <b>NULL</b> is returned if the <i>json</i> cannot be decoded or if the encoded data
4110
     *               is deeper than the recursion limit.</p>
4111
     */
4112
    public static function json_decode(
4113 43
        string $json,
4114
        bool $assoc = false,
4115
        int $depth = 512,
4116
        int $options = 0
4117
    ) {
4118
        $json = self::filter($json);
4119 43
4120
        if (self::$SUPPORT['json'] === false) {
4121 43
            throw new \RuntimeException('ext-json: is not installed');
4122
        }
4123
4124
        if ($depth < 1) {
4125 43
            $depth = 1;
4126
        }
4127
4128
        return \json_decode($json, $assoc, $depth, $options);
4129 43
    }
4130
4131
    /**
4132
     * (PHP 5 &gt;= 5.2.0, PECL json &gt;= 1.2.0)<br/>
4133
     * Returns the JSON representation of a value.
4134
     *
4135
     * EXAMPLE: <code>UTF8::json_enocde(array(1, '¥', 'ä')); // '[1,"\u00a5","\u00e4"]'</code>
4136
     *
4137
     * @see http://php.net/manual/en/function.json-encode.php
4138
     *
4139
     * @param mixed $value   <p>
4140
     *                       The <i>value</i> being encoded. Can be any type except
4141
     *                       a resource.
4142
     *                       </p>
4143
     *                       <p>
4144
     *                       All string data must be UTF-8 encoded.
4145
     *                       </p>
4146
     *                       <p>PHP implements a superset of
4147
     *                       JSON - it will also encode and decode scalar types and <b>NULL</b>. The JSON standard
4148
     *                       only supports these values when they are nested inside an array or an object.
4149
     *                       </p>
4150
     * @param int   $options [optional] <p>
4151
     *                       Bitmask consisting of <b>JSON_HEX_QUOT</b>,
4152
     *                       <b>JSON_HEX_TAG</b>,
4153
     *                       <b>JSON_HEX_AMP</b>,
4154
     *                       <b>JSON_HEX_APOS</b>,
4155
     *                       <b>JSON_NUMERIC_CHECK</b>,
4156
     *                       <b>JSON_PRETTY_PRINT</b>,
4157
     *                       <b>JSON_UNESCAPED_SLASHES</b>,
4158
     *                       <b>JSON_FORCE_OBJECT</b>,
4159
     *                       <b>JSON_UNESCAPED_UNICODE</b>. The behaviour of these
4160
     *                       constants is described on
4161
     *                       the JSON constants page.
4162
     *                       </p>
4163
     * @param int   $depth   [optional] <p>
4164
     *                       Set the maximum depth. Must be greater than zero.
4165
     *                       </p>
4166
     *
4167
     * @psalm-pure
4168
     *
4169
     * @return false|string
4170
     *                      A JSON encoded <strong>string</strong> on success or<br>
4171
     *                      <strong>FALSE</strong> on failure
4172
     */
4173
    public static function json_encode($value, int $options = 0, int $depth = 512)
4174 5
    {
4175
        $value = self::filter($value);
4176 5
4177
        if (self::$SUPPORT['json'] === false) {
4178 5
            throw new \RuntimeException('ext-json: is not installed');
4179
        }
4180
4181
        if ($depth < 1) {
4182 5
            $depth = 1;
4183
        }
4184
4185
        return \json_encode($value, $options, $depth);
4186 5
    }
4187
4188
    /**
4189
     * Checks whether JSON is available on the server.
4190
     *
4191
     * @psalm-pure
4192
     *
4193
     * @return bool
4194
     *              <p><strong>true</strong> if available, <strong>false</strong> otherwise</p>
4195
     *
4196
     * @internal <p>Please do not use it anymore, we will make is private in next major version.</p>
4197
     */
4198
    public static function json_loaded(): bool
4199
    {
4200
        return \function_exists('json_decode');
4201
    }
4202
4203
    /**
4204
     * Makes string's first char lowercase.
4205
     *
4206
     * EXAMPLE: <code>UTF8::lcfirst('ÑTËRNÂTIÔNÀLIZÆTIØN'); // ñTËRNÂTIÔNÀLIZÆTIØN</code>
4207
     *
4208
     * @param string      $str                           <p>The input string</p>
4209
     * @param string      $encoding                      [optional] <p>Set the charset for e.g. "mb_" function</p>
4210
     * @param bool        $clean_utf8                    [optional] <p>Remove non UTF-8 chars from the string.</p>
4211
     * @param string|null $lang                          [optional] <p>Set the language for special cases: az, el, lt,
4212
     *                                                   tr</p>
4213
     * @param bool        $try_to_keep_the_string_length [optional] <p>true === try to keep the string length: e.g. ẞ
4214
     *                                                   -> ß</p>
4215
     *
4216
     * @psalm-pure
4217
     *
4218
     * @return string the resulting string
4219
     */
4220
    public static function lcfirst(
4221 46
        string $str,
4222
        string $encoding = 'UTF-8',
4223
        bool $clean_utf8 = false,
4224
        string $lang = null,
4225
        bool $try_to_keep_the_string_length = false
4226
    ): string {
4227
        if ($clean_utf8) {
4228 46
            $str = self::clean($str);
4229
        }
4230
4231
        $use_mb_functions = ($lang === null && !$try_to_keep_the_string_length);
4232 46
4233
        if ($encoding === 'UTF-8') {
4234 46
            $str_part_two = (string) \mb_substr($str, 1);
4235 43
4236
            if ($use_mb_functions) {
4237 43
                $str_part_one = \mb_strtolower(
4238 43
                    (string) \mb_substr($str, 0, 1)
4239 43
                );
4240
            } else {
4241
                $str_part_one = self::strtolower(
4242 43
                    (string) \mb_substr($str, 0, 1),
4243
                    $encoding,
4244
                    false,
4245
                    $lang,
4246
                    $try_to_keep_the_string_length
4247
                );
4248
            }
4249
        } else {
4250
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
4251 3
4252
            $str_part_two = (string) self::substr($str, 1, null, $encoding);
4253 3
4254
            $str_part_one = self::strtolower(
4255 3
                (string) self::substr($str, 0, 1, $encoding),
4256 3
                $encoding,
4257
                false,
4258 3
                $lang,
4259
                $try_to_keep_the_string_length
4260
            );
4261
        }
4262
4263
        return $str_part_one . $str_part_two;
4264 46
    }
4265
4266
    /**
4267
     * Lowercase for all words in the string.
4268
     *
4269
     * @param string      $str                           <p>The input string.</p>
4270
     * @param string[]    $exceptions                    [optional] <p>Exclusion for some words.</p>
4271
     * @param string      $char_list                     [optional] <p>Additional chars that contains to words and do
4272
     *                                                   not start a new word.</p>
4273
     * @param string      $encoding                      [optional] <p>Set the charset.</p>
4274
     * @param bool        $clean_utf8                    [optional] <p>Remove non UTF-8 chars from the string.</p>
4275
     * @param string|null $lang                          [optional] <p>Set the language for special cases: az, el, lt,
4276
     *                                                   tr</p>
4277
     * @param bool        $try_to_keep_the_string_length [optional] <p>true === try to keep the string length: e.g. ẞ
4278
     *                                                   -> ß</p>
4279
     *
4280
     * @psalm-pure
4281
     *
4282
     * @return string
4283
     */
4284
    public static function lcwords(
4285 4
        string $str,
4286
        array $exceptions = [],
4287
        string $char_list = '',
4288
        string $encoding = 'UTF-8',
4289
        bool $clean_utf8 = false,
4290
        string $lang = null,
4291
        bool $try_to_keep_the_string_length = false
4292
    ): string {
4293
        if (!$str) {
4294 4
            return '';
4295 2
        }
4296
4297
        $words = self::str_to_words($str, $char_list);
4298 4
        $use_exceptions = $exceptions !== [];
4299 4
4300
        $words_str = '';
4301 4
        foreach ($words as &$word) {
4302 4
            if (!$word) {
4303 4
                continue;
4304 4
            }
4305
4306
            if (
4307
                !$use_exceptions
4308 4
                ||
4309
                !\in_array($word, $exceptions, true)
4310 4
            ) {
4311
                $words_str .= self::lcfirst($word, $encoding, $clean_utf8, $lang, $try_to_keep_the_string_length);
4312 4
            } else {
4313
                $words_str .= $word;
4314 2
            }
4315
        }
4316
4317
        return $words_str;
4318 4
    }
4319
4320
    /**
4321
     * Levenshtein function with UTF-8 support.
4322
     *
4323
     * For better performance, in a real application with a single input string
4324
     * matched against many strings from a database, you will probably want to pre-
4325
     * encode the input only once and use \levenshtein().
4326
     *
4327
     * Source: https://github.com/KEINOS/mb_levenshtein
4328
     * @see https://www.php.net/manual/en/function.levenshtein
4329
     *
4330
     * @param  string  $str1            <p>One of the strings being evaluated for Levenshtein distance.</p>
4331
     * @param  string  $str2            <p>One of the strings being evaluated for Levenshtein distance.</p>
4332
     * @param  integer $insertionCost   [optional] <p>Defines the cost of insertion.</p>
4333 23
     * @param  integer $replacementCost [optional] <p>Defines the cost of replacement.</p>
4334
     * @param  integer $deletionCost    [optional] <p>Defines the cost of deletion.</p>
4335 23
     *
4336 3
     * @return int
4337
     */
4338
    public static function levenshtein(string $str1, string $str2, int $insertionCost = 1, int $replacementCost = 1, int $deletionCost = 1)
4339 22
    {
4340 22
        $charMap = [];
4341
        self::convertMbAscii($str1, $charMap);
4342 11
        self::convertMbAscii($str2, $charMap);
4343 11
4344
        return \levenshtein($str1, $str2, $insertionCost, $replacementCost, $deletionCost);
4345 14
    }
4346
4347
    /**
4348 22
     * Strip whitespace or other characters from the beginning of a UTF-8 string.
4349
     *
4350
     * EXAMPLE: <code>UTF8::ltrim(' 中文空白  '); // '中文空白  '</code>
4351
     *
4352
     * @param string      $str   <p>The string to be trimmed</p>
4353
     * @param string|null $chars <p>Optional characters to be stripped</p>
4354
     *
4355
     * @psalm-pure
4356
     *
4357
     * @return string the string with unwanted characters stripped from the left
4358
     */
4359
    public static function ltrim(string $str = '', string $chars = null): string
4360
    {
4361
        if ($str === '') {
4362
            return '';
4363
        }
4364
4365
        if (self::$SUPPORT['mbstring'] === true) {
4366
            if ($chars !== null) {
4367
                /** @noinspection PregQuoteUsageInspection */
4368
                $chars = \preg_quote($chars);
4369
                $pattern = "^[${chars}]+";
4370
            } else {
4371
                $pattern = '^[\\s]+';
4372 2
            }
4373
4374 2
            return (string) \mb_ereg_replace($pattern, '', $str);
4375 2
        }
4376
4377
        if ($chars !== null) {
4378 2
            $chars = \preg_quote($chars, '/');
4379 2
            $pattern = "^[${chars}]+";
4380 2
        } else {
4381
            $pattern = '^[\\s]+';
4382
        }
4383 2
4384
        return self::regex_replace($str, $pattern, '');
4385 2
    }
4386
4387
    /**
4388
     * Returns the UTF-8 character with the maximum code point in the given data.
4389
     *
4390
     * EXAMPLE: <code>UTF8::max('abc-äöü-中文空白'); // 'ø'</code>
4391
     *
4392
     * @param array<string>|string $arg <p>A UTF-8 encoded string or an array of such strings.</p>
4393
     *
4394
     * @psalm-pure
4395
     *
4396
     * @return string|null the character with the highest code point than others, returns null on failure or empty input
4397
     */
4398
    public static function max($arg)
4399
    {
4400
        if (\is_array($arg)) {
4401 2
            $arg = \implode('', $arg);
4402
        }
4403 2
4404 2
        $codepoints = self::codepoints($arg);
4405 2
        if ($codepoints === []) {
4406
            return null;
4407
        }
4408 2
4409
        $codepoint_max = \max($codepoints);
4410
4411
        return self::chr((int) $codepoint_max);
4412
    }
4413
4414
    /**
4415
     * Calculates and returns the maximum number of bytes taken by any
4416
     * UTF-8 encoded character in the given string.
4417
     *
4418
     * EXAMPLE: <code>UTF8::max_chr_width('Intërnâtiônàlizætiøn'); // 2</code>
4419
     *
4420
     * @param string $str <p>The original Unicode string.</p>
4421 29
     *
4422
     * @psalm-pure
4423 29
     *
4424
     * @return int
4425
     *             <p>Max byte lengths of the given chars.</p>
4426
     */
4427
    public static function max_chr_width(string $str): int
4428
    {
4429
        $bytes = self::chr_size_list($str);
4430
        if ($bytes !== []) {
4431
            return (int) \max($bytes);
4432
        }
4433
4434
        return 0;
4435
    }
4436
4437
    /**
4438 2
     * Checks whether mbstring is available on the server.
4439
     *
4440 2
     * @psalm-pure
4441 2
     *
4442
     * @return bool
4443
     *              <p><strong>true</strong> if available, <strong>false</strong> otherwise</p>
4444 2
     *
4445 2
     * @internal <p>Please do not use it anymore, we will make is private in next major version.</p>
4446 2
     */
4447
    public static function mbstring_loaded(): bool
4448
    {
4449 2
        return \extension_loaded('mbstring');
4450
    }
4451 2
4452
    /**
4453
     * Returns the UTF-8 character with the minimum code point in the given data.
4454
     *
4455
     * EXAMPLE: <code>UTF8::min('abc-äöü-中文空白'); // '-'</code>
4456
     *
4457
     * @param string|string[] $arg <strong>A UTF-8 encoded string or an array of such strings.</strong>
4458
     *
4459
     * @psalm-pure
4460
     *
4461
     * @return string|null
4462
     *                     <p>The character with the lowest code point than others, returns null on failure or empty input.</p>
4463
     */
4464
    public static function min($arg)
4465
    {
4466
        if (\is_array($arg)) {
4467
            $arg = \implode('', $arg);
4468
        }
4469
4470
        $codepoints = self::codepoints($arg);
4471 339
        if ($codepoints === []) {
4472
            return null;
4473
        }
4474
4475
        $codepoint_min = \min($codepoints);
4476
4477
        return self::chr((int) $codepoint_min);
4478 339
    }
4479
4480
    /**
4481 339
     * Normalize the encoding-"name" input.
4482
     *
4483 339
     * EXAMPLE: <code>UTF8::normalize_encoding('UTF8'); // 'UTF-8'</code>
4484 290
     *
4485
     * @param mixed $encoding <p>e.g.: ISO, UTF8, WINDOWS-1251 etc.</p>
4486
     * @param mixed $fallback <p>e.g.: UTF-8</p>
4487
     *
4488 53
     * @psalm-pure
4489
     *
4490 53
     * @return mixed|string
4491
     *                      <p>e.g.: ISO-8859-1, UTF-8, WINDOWS-1251 etc.<br>Will return a empty string as fallback (by default)</p>
4492 29
     *
4493
     * @template TNormalizeEncodingFallback
4494
     * @phpstan-param string|TNormalizeEncodingFallback $fallback
4495
     * @phpstan-return string|TNormalizeEncodingFallback
4496 44
     */
4497
    public static function normalize_encoding($encoding, $fallback = '')
4498 44
    {
4499
        /**
4500
         * @psalm-suppress ImpureStaticVariable
4501
         *
4502
         * @var array<string,string>
4503
         */
4504 44
        static $STATIC_NORMALIZE_ENCODING_CACHE = [];
4505
4506 44
        // init
4507
        $encoding = (string) $encoding;
4508 2
4509
        if (!$encoding) {
4510
            return $fallback;
4511
        }
4512 44
4513
        if (
4514 44
            $encoding === 'UTF-8'
4515
            ||
4516 39
            $encoding === 'UTF8'
4517
        ) {
4518
            return 'UTF-8';
4519
        }
4520 11
4521
        if (
4522 11
            $encoding === '8BIT'
4523
            ||
4524
            $encoding === 'BINARY'
4525
        ) {
4526
            return 'CP850';
4527 11
        }
4528 8
4529
        if (
4530
            $encoding === 'HTML'
4531 5
            ||
4532 1
            $encoding === 'HTML-ENTITIES'
4533
        ) {
4534
            return 'HTML-ENTITIES';
4535 5
        }
4536 3
4537
        if (
4538 3
            $encoding === 'ISO'
4539
            ||
4540
            $encoding === 'ISO-8859-1'
4541 4
        ) {
4542 4
            return 'ISO-8859-1';
4543 4
        }
4544
4545 4
        if (
4546
            $encoding === '1' // only a fallback, for non "strict_types" usage ...
4547
            ||
4548
            $encoding === '0' // only a fallback, for non "strict_types" usage ...
4549
        ) {
4550
            return $fallback;
4551
        }
4552
4553
        if (isset($STATIC_NORMALIZE_ENCODING_CACHE[$encoding])) {
4554
            return $STATIC_NORMALIZE_ENCODING_CACHE[$encoding];
4555
        }
4556
4557
        if (self::$ENCODINGS === null) {
4558
            self::$ENCODINGS = self::getData('encodings');
4559
        }
4560
4561
        if (\in_array($encoding, self::$ENCODINGS, true)) {
4562
            $STATIC_NORMALIZE_ENCODING_CACHE[$encoding] = $encoding;
4563
4564
            return $encoding;
4565
        }
4566
4567
        $encoding_original = $encoding;
4568
        $encoding = \strtoupper($encoding);
4569
        $encoding_upper_helper = (string) \preg_replace('/[^a-zA-Z0-9]/u', '', $encoding);
4570
4571
        $equivalences = [
4572
            'ISO8859'     => 'ISO-8859-1',
4573
            'ISO88591'    => 'ISO-8859-1',
4574
            'ISO'         => 'ISO-8859-1',
4575
            'LATIN'       => 'ISO-8859-1',
4576
            'LATIN1'      => 'ISO-8859-1', // Western European
4577
            'ISO88592'    => 'ISO-8859-2',
4578
            'LATIN2'      => 'ISO-8859-2', // Central European
4579
            'ISO88593'    => 'ISO-8859-3',
4580
            'LATIN3'      => 'ISO-8859-3', // Southern European
4581
            'ISO88594'    => 'ISO-8859-4',
4582
            'LATIN4'      => 'ISO-8859-4', // Northern European
4583
            'ISO88595'    => 'ISO-8859-5',
4584
            'ISO88596'    => 'ISO-8859-6', // Greek
4585
            'ISO88597'    => 'ISO-8859-7',
4586
            'ISO88598'    => 'ISO-8859-8', // Hebrew
4587
            'ISO88599'    => 'ISO-8859-9',
4588
            'LATIN5'      => 'ISO-8859-9', // Turkish
4589
            'ISO885911'   => 'ISO-8859-11',
4590
            'TIS620'      => 'ISO-8859-11', // Thai
4591
            'ISO885910'   => 'ISO-8859-10',
4592
            'LATIN6'      => 'ISO-8859-10', // Nordic
4593
            'ISO885913'   => 'ISO-8859-13',
4594
            'LATIN7'      => 'ISO-8859-13', // Baltic
4595
            'ISO885914'   => 'ISO-8859-14',
4596
            'LATIN8'      => 'ISO-8859-14', // Celtic
4597
            'ISO885915'   => 'ISO-8859-15',
4598
            'LATIN9'      => 'ISO-8859-15', // Western European (with some extra chars e.g. €)
4599
            'ISO885916'   => 'ISO-8859-16',
4600
            'LATIN10'     => 'ISO-8859-16', // Southeast European
4601
            'CP1250'      => 'WINDOWS-1250',
4602
            'WIN1250'     => 'WINDOWS-1250',
4603
            'WINDOWS1250' => 'WINDOWS-1250',
4604
            'CP1251'      => 'WINDOWS-1251',
4605
            'WIN1251'     => 'WINDOWS-1251',
4606
            'WINDOWS1251' => 'WINDOWS-1251',
4607
            'CP1252'      => 'WINDOWS-1252',
4608
            'WIN1252'     => 'WINDOWS-1252',
4609
            'WINDOWS1252' => 'WINDOWS-1252',
4610
            'CP1253'      => 'WINDOWS-1253',
4611 4
            'WIN1253'     => 'WINDOWS-1253',
4612 3
            'WINDOWS1253' => 'WINDOWS-1253',
4613
            'CP1254'      => 'WINDOWS-1254',
4614
            'WIN1254'     => 'WINDOWS-1254',
4615 4
            'WINDOWS1254' => 'WINDOWS-1254',
4616
            'CP1255'      => 'WINDOWS-1255',
4617 4
            'WIN1255'     => 'WINDOWS-1255',
4618
            'WINDOWS1255' => 'WINDOWS-1255',
4619
            'CP1256'      => 'WINDOWS-1256',
4620
            'WIN1256'     => 'WINDOWS-1256',
4621
            'WINDOWS1256' => 'WINDOWS-1256',
4622
            'CP1257'      => 'WINDOWS-1257',
4623
            'WIN1257'     => 'WINDOWS-1257',
4624
            'WINDOWS1257' => 'WINDOWS-1257',
4625
            'CP1258'      => 'WINDOWS-1258',
4626
            'WIN1258'     => 'WINDOWS-1258',
4627
            'WINDOWS1258' => 'WINDOWS-1258',
4628
            'UTF16'       => 'UTF-16',
4629
            'UTF32'       => 'UTF-32',
4630
            'UTF8'        => 'UTF-8',
4631
            'UTF'         => 'UTF-8',
4632 4
            'UTF7'        => 'UTF-7',
4633
            '8BIT'        => 'CP850',
4634 4
            'BINARY'      => 'CP850',
4635
        ];
4636
4637
        if (!empty($equivalences[$encoding_upper_helper])) {
4638
            $encoding = $equivalences[$encoding_upper_helper];
4639
        }
4640
4641
        $STATIC_NORMALIZE_ENCODING_CACHE[$encoding_original] = $encoding;
4642
4643
        return $encoding;
4644
    }
4645
4646
    /**
4647
     * Standardize line ending to unix-like.
4648
     *
4649 10
     * @param string          $str      <p>The input string.</p>
4650
     * @param string|string[] $replacer <p>The replacer char e.g. "\n" (Linux) or "\r\n" (Windows). You can also use \PHP_EOL
4651 10
     *                                  here.</p>
4652
     *
4653
     * @psalm-pure
4654
     *
4655
     * @return string
4656
     *                <p>A string with normalized line ending.</p>
4657
     */
4658
    public static function normalize_line_ending(string $str, $replacer = "\n"): string
4659
    {
4660
        return \str_replace(["\r\n", "\r", "\n"], $replacer, $str);
4661
    }
4662
4663
    /**
4664
     * Normalize some MS Word special characters.
4665
     *
4666
     * EXAMPLE: <code>UTF8::normalize_msword('„Abcdef…”'); // '"Abcdef..."'</code>
4667
     *
4668
     * @param string $str <p>The string to be normalized.</p>
4669
     *
4670 61
     * @psalm-pure
4671
     *
4672
     * @return string
4673
     *                <p>A string with normalized characters for commonly used chars in Word documents.</p>
4674
     */
4675
    public static function normalize_msword(string $str): string
4676 61
    {
4677 61
        return ASCII::normalize_msword($str);
4678
    }
4679
4680
    /**
4681
     * Normalize the whitespace.
4682
     *
4683
     * EXAMPLE: <code>UTF8::normalize_whitespace("abc-\xc2\xa0-öäü-\xe2\x80\xaf-\xE2\x80\xAC", true); // "abc-\xc2\xa0-öäü- -"</code>
4684
     *
4685
     * @param string $str                          <p>The string to be normalized.</p>
4686
     * @param bool   $keep_non_breaking_space      [optional] <p>Set to true, to keep non-breaking-spaces.</p>
4687
     * @param bool   $keep_bidi_unicode_controls   [optional] <p>Set to true, to keep non-printable (for the web)
4688
     *                                             bidirectional text chars.</p>
4689
     * @param bool   $normalize_control_characters [optional] <p>Set to true, to convert e.g. LINE-, PARAGRAPH-SEPARATOR with "\n" and LINE TABULATION with "\t".</p>
4690
     *
4691
     * @psalm-pure
4692
     *
4693
     * @return string
4694
     *                <p>A string with normalized whitespace.</p>
4695
     */
4696
    public static function normalize_whitespace(
4697
        string $str,
4698
        bool $keep_non_breaking_space = false,
4699
        bool $keep_bidi_unicode_controls = false,
4700 27
        bool $normalize_control_characters = false
4701
    ): string {
4702
        return ASCII::normalize_whitespace(
4703
            $str,
4704
            $keep_non_breaking_space,
4705
            $keep_bidi_unicode_controls,
4706
            $normalize_control_characters
4707 27
        );
4708
    }
4709
4710 27
    /**
4711
     * Calculates Unicode code point of the given UTF-8 encoded character.
4712 27
     *
4713 5
     * INFO: opposite to UTF8::chr()
4714
     *
4715
     * EXAMPLE: <code>UTF8::ord('☃'); // 0x2603</code>
4716 27
     *
4717 27
     * @param string $chr      <p>The character of which to calculate code point.<p/>
4718 27
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
4719
     *
4720
     * @psalm-pure
4721
     *
4722 11
     * @return int
4723 3
     *             <p>Unicode code point of the given character,<br>
4724
     *             0 on invalid UTF-8 byte sequence</p>
4725
     */
4726 11
    public static function ord($chr, string $encoding = 'UTF-8'): int
4727 1
    {
4728
        /**
4729
         * @psalm-suppress ImpureStaticVariable
4730 11
         *
4731 11
         * @var array<string,int>
4732
         */
4733
        static $CHAR_CACHE = [];
4734
4735
        // init
4736
        $chr = (string) $chr;
4737
4738 6
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
4739 5
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
4740 5
        }
4741 5
4742
        $cache_key = $chr . '_' . $encoding;
4743
        if (isset($CHAR_CACHE[$cache_key])) {
4744
            return $CHAR_CACHE[$cache_key];
4745
        }
4746
4747
        // check again, if it's still not UTF-8
4748
        if ($encoding !== 'UTF-8') {
4749 1
            $chr = self::encode($encoding, $chr);
4750
        }
4751
4752 1
        if (self::$ORD === null) {
4753 1
            self::$ORD = self::getData('ord');
4754
        }
4755 1
4756
        if (isset(self::$ORD[$chr])) {
4757
            return $CHAR_CACHE[$cache_key] = self::$ORD[$chr];
4758
        }
4759 1
4760 1
        //
4761
        // fallback via "IntlChar"
4762
        //
4763 1
4764 1
        if (self::$SUPPORT['intlChar'] === true) {
4765
            $code = \IntlChar::ord($chr);
4766
            if ($code) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $code of type integer|null is loosely compared to true; this is ambiguous if the integer can be 0. You might want to explicitly use !== null instead.

In PHP, under loose comparison (like ==, or !=, or switch conditions), values of different types might be equal.

For integer values, zero is a special case, in particular the following results might be unexpected:

0   == false // true
0   == null  // true
123 == false // false
123 == null  // false

// It is often better to use strict comparison
0 === false // false
0 === null  // false
Loading history...
4767
                return $CHAR_CACHE[$cache_key] = $code;
4768
            }
4769
        }
4770
4771
        //
4772
        // fallback via vanilla php
4773
        //
4774
4775
        $chr = \unpack('C*', (string) \substr($chr, 0, 4));
4776
        /** @noinspection PhpSillyAssignmentInspection - hack for phpstan */
4777
        /** @var int[] $chr - "unpack": only false if the format string contains errors */
4778
        $chr = $chr;
4779
        $code = $chr ? $chr[1] : 0;
4780
4781
        if ($code >= 0xF0 && isset($chr[4])) {
4782
            return $CHAR_CACHE[$cache_key] = ((($code - 0xF0) << 18) + (($chr[2] - 0x80) << 12) + (($chr[3] - 0x80) << 6) + $chr[4] - 0x80);
4783
        }
4784
4785
        if ($code >= 0xE0 && isset($chr[3])) {
4786
            return $CHAR_CACHE[$cache_key] = ((($code - 0xE0) << 12) + (($chr[2] - 0x80) << 6) + $chr[3] - 0x80);
4787
        }
4788
4789
        if ($code >= 0xC0 && isset($chr[2])) {
4790
            return $CHAR_CACHE[$cache_key] = ((($code - 0xC0) << 6) + $chr[2] - 0x80);
4791
        }
4792 2
4793
        return $CHAR_CACHE[$cache_key] = $code;
4794 2
    }
4795 2
4796
    /**
4797
     * Parses the string into an array (into the the second parameter).
4798 2
     *
4799 2
     * WARNING: Unlike "parse_str()", this method does not (re-)place variables in the current scope,
4800
     *          if the second parameter is not set!
4801 2
     *
4802
     * EXAMPLE: <code>
4803
     * UTF8::parse_str('Iñtërnâtiônéàlizætiøn=測試&arr[]=foo+測試&arr[]=ການທົດສອບ', $array);
4804
     * echo $array['Iñtërnâtiônéàlizætiøn']; // '測試'
4805
     * </code>
4806
     *
4807
     * @see http://php.net/manual/en/function.parse-str.php
4808
     *
4809
     * @param string $str        <p>The input string.</p>
4810
     * @param array  $result     <p>The result will be returned into this reference parameter.</p>
4811
     * @param bool   $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
4812
     *
4813
     * @psalm-pure
4814
     *
4815
     * @return bool
4816
     *              <p>Will return <strong>false</strong> if php can't parse the string and we haven't any $result.</p>
4817
     */
4818
    public static function parse_str(string $str, &$result, bool $clean_utf8 = false): bool
4819
    {
4820
        if ($clean_utf8) {
4821
            $str = self::clean($str);
4822
        }
4823
4824
        if (self::$SUPPORT['mbstring'] === true) {
4825
            $return = \mb_parse_str($str, $result);
4826
4827
            return $return !== false && $result !== [];
4828
        }
4829
4830
        /**
4831
         * @psalm-suppress ImpureFunctionCall - we use the second parameter, so we don't change variables by magic
4832
         */
4833
        \parse_str($str, $result);
4834
4835
        return $result !== [];
4836
    }
4837
4838
    /**
4839
     * Checks if \u modifier is available that enables Unicode support in PCRE.
4840
     *
4841
     * @psalm-pure
4842
     *
4843
     * @return bool
4844
     *              <p>
4845
     *              <strong>true</strong> if support is available,<br>
4846
     *              <strong>false</strong> otherwise
4847
     *              </p>
4848
     */
4849
    public static function pcre_utf8_support(): bool
4850 2
    {
4851
        /** @noinspection PhpUsageOfSilenceOperatorInspection */
4852
        return (bool) @\preg_match('//u', '');
4853
    }
4854
4855
    /**
4856
     * Create an array containing a range of UTF-8 characters.
4857 2
     *
4858 2
     * EXAMPLE: <code>UTF8::range('κ', 'ζ'); // array('κ', 'ι', 'θ', 'η', 'ζ',)</code>
4859
     *
4860
     * @param int|string $var1      <p>Numeric or hexadecimal code points, or a UTF-8 character to start from.</p>
4861 2
     * @param int|string $var2      <p>Numeric or hexadecimal code points, or a UTF-8 character to end at.</p>
4862
     * @param bool       $use_ctype <p>use ctype to detect numeric and hexadecimal, otherwise we will use a simple
4863
     *                              "is_numeric"</p>
4864
     * @param string     $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
4865
     * @param float|int  $step      [optional] <p>
4866 1
     *                              If a step value is given, it will be used as the
4867
     *                              increment between elements in the sequence. step
4868
     *                              should be given as a positive number. If not specified,
4869
     *                              step will default to 1.
4870
     *                              </p>
4871
     *
4872
     * @psalm-pure
4873 1
     *
4874
     * @return string[]
4875
     */
4876
    public static function range(
4877
        $var1,
4878 2
        $var2,
4879
        bool $use_ctype = true,
4880
        string $encoding = 'UTF-8',
4881
        $step = 1
4882 2
    ): array {
4883 2
        if (!$var1 || !$var2) {
4884
            return [];
4885 2
        }
4886 2
4887 2
        if ($step !== 1) {
4888 2
            /**
4889
             * @psalm-suppress RedundantConditionGivenDocblockType
4890
             * @psalm-suppress DocblockTypeContradiction
4891 2
             */
4892 1
            if (!\is_numeric($step)) {
0 ignored issues
show
introduced by
The condition is_numeric($step) is always true.
Loading history...
4893
                throw new \InvalidArgumentException('$step need to be a number, type given: ' . \gettype($step));
4894 2
            }
4895
4896
            /**
4897 2
             * @psalm-suppress RedundantConditionGivenDocblockType - false-positive from psalm?
4898
             */
4899
            if ($step <= 0) {
4900
                throw new \InvalidArgumentException('$step need to be a positive number, given: ' . $step);
4901 2
            }
4902 2
        }
4903 2
4904
        if ($use_ctype && self::$SUPPORT['ctype'] === false) {
4905 2
            throw new \RuntimeException('ext-ctype: is not installed');
4906 1
        }
4907
4908 2
        $is_digit = false;
4909
        $is_xdigit = false;
4910
4911 2
        if ($use_ctype && \ctype_digit((string) $var1) && \ctype_digit((string) $var2)) {
4912
            $is_digit = true;
4913
            $start = (int) $var1;
4914
        } elseif ($use_ctype && \ctype_xdigit($var1) && \ctype_xdigit($var2)) {
4915 2
            $is_xdigit = true;
4916 2
            $start = (int) self::hex_to_int((string) $var1);
4917 2
        } elseif (!$use_ctype && \is_numeric($var1)) {
4918
            $start = (int) $var1;
4919
        } else {
4920 2
            $start = self::ord((string) $var1);
4921
        }
4922
4923
        if (!$start) {
4924
            return [];
4925
        }
4926
4927
        if ($is_digit) {
4928
            $end = (int) $var2;
4929
        } elseif ($is_xdigit) {
4930
            $end = (int) self::hex_to_int((string) $var2);
4931
        } elseif (!$use_ctype && \is_numeric($var2)) {
4932
            $end = (int) $var2;
4933
        } else {
4934
            $end = self::ord((string) $var2);
4935
        }
4936
4937
        if (!$end) {
4938
            return [];
4939
        }
4940
4941
        $array = [];
4942
        foreach (\range($start, $end, $step) as $i) {
4943
            $array[] = (string) self::chr((int) $i, $encoding);
4944
        }
4945
4946
        return $array;
4947 6
    }
4948
4949 6
    /**
4950 4
     * Multi decode HTML entity + fix urlencoded-win1252-chars.
4951
     *
4952
     * EXAMPLE: <code>UTF8::rawurldecode('tes%20öäü%20\u00edtest+test'); // 'tes öäü ítest+test'</code>
4953 6
     *
4954
     * e.g:
4955 6
     * 'test+test'                     => 'test+test'
4956
     * 'D&#252;sseldorf'               => 'Düsseldorf'
4957 5
     * 'D%FCsseldorf'                  => 'Düsseldorf'
4958
     * 'D&#xFC;sseldorf'               => 'Düsseldorf'
4959
     * 'D%26%23xFC%3Bsseldorf'         => 'Düsseldorf'
4960
     * 'Düsseldorf'                   => 'Düsseldorf'
4961
     * 'D%C3%BCsseldorf'               => 'Düsseldorf'
4962 5
     * 'D%C3%83%C2%BCsseldorf'         => 'Düsseldorf'
4963 5
     * 'D%25C3%2583%25C2%25BCsseldorf' => 'Düsseldorf'
4964 5
     *
4965 5
     * @param string $str          <p>The input string.</p>
4966
     * @param bool   $multi_decode <p>Decode as often as possible.</p>
4967
     *
4968 5
     * @psalm-pure
4969
     *
4970
     * @return string
4971
     *                <p>The decoded URL, as a string.</p>
4972
     */
4973 1
    public static function rawurldecode(string $str, bool $multi_decode = true): string
4974 1
    {
4975 1
        if ($str === '') {
4976 1
            return '';
4977
        }
4978
4979
        $str = self::urldecode_unicode_helper($str);
4980
4981 6
        if ($multi_decode) {
4982
            do {
4983
                $str_compare = $str;
4984
4985
                /**
4986
                 * @psalm-suppress PossiblyInvalidArgument
4987
                 */
4988
                $str = \rawurldecode(
4989
                    self::html_entity_decode(
4990
                        self::to_utf8($str),
4991
                        \ENT_QUOTES | \ENT_HTML5
4992
                    )
4993
                );
4994
            } while ($str_compare !== $str);
4995
        } else {
4996
            /**
4997 18
             * @psalm-suppress PossiblyInvalidArgument
4998
             */
4999
            $str = \rawurldecode(
5000
                self::html_entity_decode(
5001
                    self::to_utf8($str),
5002
                    \ENT_QUOTES | \ENT_HTML5
5003
                )
5004 18
            );
5005 9
        }
5006
5007
        return self::fix_simple_utf8($str);
5008
    }
5009 18
5010
    /**
5011
     * Replaces all occurrences of $pattern in $str by $replacement.
5012
     *
5013 18
     * @param string $str         <p>The input string.</p>
5014 18
     * @param string $pattern     <p>The regular expression pattern.</p>
5015 18
     * @param string $replacement <p>The string to replace with.</p>
5016 18
     * @param string $options     [optional] <p>Matching conditions to be used.</p>
5017
     * @param string $delimiter   [optional] <p>Delimiter the the regex. Default: '/'</p>
5018
     *
5019
     * @psalm-pure
5020
     *
5021
     * @return string
5022
     */
5023
    public static function regex_replace(
5024
        string $str,
5025
        string $pattern,
5026
        string $replacement,
5027
        string $options = '',
5028
        string $delimiter = '/'
5029
    ): string {
5030
        if ($options === 'msr') {
5031
            $options = 'ms';
5032 54
        }
5033
5034 54
        // fallback
5035 9
        if (!$delimiter) {
5036
            $delimiter = '/';
5037
        }
5038 54
5039 54
        return (string) \preg_replace(
5040 54
            $delimiter . $pattern . $delimiter . 'u' . $options,
5041
            $replacement,
5042 9
            $str
5043 9
        );
5044
    }
5045
5046
    /**
5047 9
     * Remove the BOM from UTF-8 / UTF-16 / UTF-32 strings.
5048
     *
5049 9
     * EXAMPLE: <code>UTF8::remove_bom("\xEF\xBB\xBFΜπορώ να"); // 'Μπορώ να'</code>
5050
     *
5051
     * @param string $str <p>The input string.</p>
5052
     *
5053 54
     * @psalm-pure
5054
     *
5055
     * @return string
5056
     *                <p>A string without UTF-BOM.</p>
5057
     */
5058
    public static function remove_bom(string $str): string
5059
    {
5060
        if ($str === '') {
5061
            return '';
5062
        }
5063
5064
        $str_length = \strlen($str);
5065
        foreach (self::$BOM as $bom_string => $bom_byte_length) {
5066
            if (\strncmp($str, $bom_string, $bom_byte_length) === 0) {
5067
                /** @var false|string $str_tmp - needed for PhpStan (stubs error) */
5068
                $str_tmp = \substr($str, $bom_byte_length, $str_length);
5069 2
                if ($str_tmp === false) {
5070
                    return '';
5071 2
                }
5072 2
5073
                $str_length -= $bom_byte_length;
5074
5075
                $str = (string) $str_tmp;
5076
            }
5077
        }
5078 2
5079 2
        return $str;
5080 2
    }
5081
5082
    /**
5083
     * Removes duplicate occurrences of a string in another string.
5084 2
     *
5085
     * EXAMPLE: <code>UTF8::remove_duplicates('öäü-κόσμεκόσμε-äöü', 'κόσμε'); // 'öäü-κόσμε-äöü'</code>
5086
     *
5087
     * @param string          $str  <p>The base string.</p>
5088
     * @param string|string[] $what <p>String to search for in the base string.</p>
5089
     *
5090
     * @psalm-pure
5091
     *
5092
     * @return string
5093
     *                <p>A string with removed duplicates.</p>
5094
     */
5095
    public static function remove_duplicates(string $str, $what = ' '): string
5096
    {
5097
        if (\is_string($what)) {
5098
            $what = [$what];
5099
        }
5100 6
5101
        /**
5102 6
         * @psalm-suppress RedundantConditionGivenDocblockType
5103
         */
5104
        if (\is_array($what)) {
0 ignored issues
show
introduced by
The condition is_array($what) is always true.
Loading history...
5105
            foreach ($what as $item) {
5106
                $str = (string) \preg_replace('/(' . \preg_quote($item, '/') . ')+/u', $item, $str);
5107
            }
5108
        }
5109
5110
        return $str;
5111
    }
5112
5113
    /**
5114
     * Remove html via "strip_tags()" from the string.
5115
     *
5116 6
     * @param string $str            <p>The input string.</p>
5117
     * @param string $allowable_tags [optional] <p>You can use the optional second parameter to specify tags which
5118 6
     *                               should not be stripped. Default: null
5119
     *                               </p>
5120
     *
5121
     * @psalm-pure
5122
     *
5123
     * @return string
5124
     *                <p>A string with without html tags.</p>
5125
     */
5126
    public static function remove_html(string $str, string $allowable_tags = ''): string
5127
    {
5128
        return \strip_tags($str, $allowable_tags);
5129
    }
5130
5131
    /**
5132
     * Remove all breaks [<br> | \r\n | \r | \n | ...] from the string.
5133
     *
5134
     * @param string $str         <p>The input string.</p>
5135
     * @param string $replacement [optional] <p>Default is a empty string.</p>
5136
     *
5137
     * @psalm-pure
5138
     *
5139
     * @return string
5140
     *                <p>A string without breaks.</p>
5141
     */
5142
    public static function remove_html_breaks(string $str, string $replacement = ''): string
5143
    {
5144
        return (string) \preg_replace("#/\r\n|\r|\n|<br.*/?>#isU", $replacement, $str);
5145 96
    }
5146
5147
    /**
5148
     * Remove invisible characters from a string.
5149
     *
5150
     * e.g.: This prevents sandwiching null characters between ascii characters, like Java\0script.
5151 96
     *
5152 96
     * EXAMPLE: <code>UTF8::remove_invisible_characters("κόσ\0με"); // 'κόσμε'</code>
5153
     *
5154
     * copy&past from https://github.com/bcit-ci/CodeIgniter/blob/develop/system/core/Common.php
5155
     *
5156
     * @param string $str                           <p>The input string.</p>
5157
     * @param bool   $url_encoded                   [optional] <p>
5158
     *                                              Try to remove url encoded control character.
5159
     *                                              WARNING: maybe contains false-positives e.g. aa%0Baa -> aaaa.
5160
     *                                              <br>
5161
     *                                              Default: false
5162
     *                                              </p>
5163
     * @param string $replacement                   [optional] <p>The replacement character.</p>
5164
     * @param bool   $keep_basic_control_characters [optional] <p>Keep control characters like [LRM] or [LSEP].</p>
5165
     *
5166
     * @psalm-pure
5167
     *
5168
     * @return string
5169
     *                <p>A string without invisible chars.</p>
5170
     */
5171 12
    public static function remove_invisible_characters(
5172
        string $str,
5173
        bool $url_encoded = false,
5174
        string $replacement = '',
5175
        bool $keep_basic_control_characters = true
5176
    ): string {
5177 12
        return ASCII::remove_invisible_characters(
5178
            $str,
5179 12
            $url_encoded,
5180
            $replacement,
5181 6
            $keep_basic_control_characters
5182 4
        );
5183 4
    }
5184 4
5185
    /**
5186
     * Returns a new string with the prefix $substring removed, if present.
5187
     *
5188 2
     * @param string $str       <p>The input string.</p>
5189
     * @param string $substring <p>The prefix to remove.</p>
5190 2
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
5191 2
     *
5192 2
     * @psalm-pure
5193 2
     *
5194
     * @return string
5195
     *                <p>A string without the prefix $substring.</p>
5196
     */
5197
    public static function remove_left(
5198 6
        string $str,
5199
        string $substring,
5200
        string $encoding = 'UTF-8'
5201
    ): string {
5202
        if (
5203
            $substring
5204
            &&
5205
            \strpos($str, $substring) === 0
5206
        ) {
5207
            if ($encoding === 'UTF-8') {
5208
                return (string) \mb_substr(
5209
                    $str,
5210
                    (int) \mb_strlen($substring)
5211
                );
5212
            }
5213 12
5214
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
5215
5216
            return (string) self::substr(
5217
                $str,
5218 12
                (int) self::strlen($substring, $encoding),
5219 6
                null,
5220 4
                $encoding
5221 4
            );
5222 4
        }
5223 4
5224
        return $str;
5225
    }
5226
5227 2
    /**
5228
     * Returns a new string with the suffix $substring removed, if present.
5229 2
     *
5230 2
     * @param string $str
5231 2
     * @param string $substring <p>The suffix to remove.</p>
5232 2
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
5233
     *
5234
     * @psalm-pure
5235
     *
5236
     * @return string
5237 6
     *                <p>A string having a $str without the suffix $substring.</p>
5238
     */
5239
    public static function remove_right(
5240
        string $str,
5241
        string $substring,
5242
        string $encoding = 'UTF-8'
5243
    ): string {
5244
        if ($substring && \substr($str, -\strlen($substring)) === $substring) {
5245
            if ($encoding === 'UTF-8') {
5246
                return (string) \mb_substr(
5247
                    $str,
5248
                    0,
5249
                    (int) \mb_strlen($str) - (int) \mb_strlen($substring)
5250
                );
5251
            }
5252
5253 29
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
5254
5255
            return (string) self::substr(
5256
                $str,
5257
                0,
5258
                (int) self::strlen($str, $encoding) - (int) self::strlen($substring, $encoding),
5259 29
                $encoding
5260 22
            );
5261
        }
5262
5263 7
        return $str;
5264
    }
5265
5266
    /**
5267
     * Replaces all occurrences of $search in $str by $replacement.
5268
     *
5269
     * @param string $str            <p>The input string.</p>
5270
     * @param string $search         <p>The needle to search for.</p>
5271
     * @param string $replacement    <p>The string to replace with.</p>
5272
     * @param bool   $case_sensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
5273
     *
5274
     * @psalm-pure
5275
     *
5276
     * @return string
5277
     *                <p>A string with replaced parts.</p>
5278
     */
5279 30
    public static function replace(
5280
        string $str,
5281
        string $search,
5282
        string $replacement,
5283
        bool $case_sensitive = true
5284
    ): string {
5285 30
        if ($case_sensitive) {
5286 23
            return \str_replace($search, $replacement, $str);
5287
        }
5288
5289 7
        return self::str_ireplace($search, $replacement, $str);
0 ignored issues
show
Bug Best Practice introduced by
The expression return self::str_ireplac...ch, $replacement, $str) could return the type string[] which is incompatible with the type-hinted return string. Consider adding an additional type-check to rule them out.
Loading history...
5290
    }
5291
5292
    /**
5293
     * Replaces all occurrences of $search in $str by $replacement.
5294
     *
5295
     * @param string       $str            <p>The input string.</p>
5296
     * @param array        $search         <p>The elements to search for.</p>
5297
     * @param array|string $replacement    <p>The string to replace with.</p>
5298
     * @param bool         $case_sensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
5299
     *
5300
     * @psalm-pure
5301
     *
5302
     * @return string
5303
     *                <p>A string with replaced parts.</p>
5304
     */
5305
    public static function replace_all(
5306 35
        string $str,
5307
        array $search,
5308
        $replacement,
5309
        bool $case_sensitive = true
5310
    ): string {
5311 35
        if ($case_sensitive) {
5312 9
            return \str_replace($search, $replacement, $str);
5313
        }
5314
5315 35
        return self::str_ireplace($search, $replacement, $str);
0 ignored issues
show
Bug Best Practice introduced by
The expression return self::str_ireplac...ch, $replacement, $str) could return the type string[] which is incompatible with the type-hinted return string. Consider adding an additional type-check to rule them out.
Loading history...
5316 35
    }
5317 35
5318
    /**
5319 2
     * Replace the diamond question mark (�) and invalid-UTF8 chars with the replacement.
5320
     *
5321
     * EXAMPLE: <code>UTF8::replace_diamond_question_mark('中文空白�', ''); // '中文空白'</code>
5322 35
     *
5323
     * @param string $str                        <p>The input string</p>
5324
     * @param string $replacement_char           <p>The replacement character.</p>
5325
     * @param bool   $process_invalid_utf8_chars <p>Convert invalid UTF-8 chars </p>
5326
     *
5327
     * @psalm-pure
5328
     *
5329
     * @return string
5330
     *                <p>A string without diamond question marks (�).</p>
5331 35
     */
5332
    public static function replace_diamond_question_mark(
5333 35
        string $str,
5334
        string $replacement_char = '',
5335 35
        bool $process_invalid_utf8_chars = true
5336 35
    ): string {
5337
        if ($str === '') {
5338
            return '';
5339 35
        }
5340
5341 35
        if ($process_invalid_utf8_chars) {
5342
            if ($replacement_char === '') {
5343
                $replacement_char_helper = 'none';
5344
            } else {
5345 35
                $replacement_char_helper = \ord($replacement_char);
5346 35
            }
5347
5348 35
            if (self::$SUPPORT['mbstring'] === false) {
5349
                // if there is no native support for "mbstring",
5350
                // then we need to clean the string before ...
5351
                $str = self::clean($str);
5352
            }
5353
5354
            /**
5355
             * @psalm-suppress ImpureFunctionCall - we will reset the value in the next step
5356
             */
5357
            $save = \mb_substitute_character();
5358
            /** @noinspection PhpUsageOfSilenceOperatorInspection - ignore "Unknown character" warnings, it's working anyway */
5359
            @\mb_substitute_character($replacement_char_helper);
0 ignored issues
show
Security Best Practice introduced by
It seems like you do not handle an error condition for mb_substitute_character(). This can introduce security issues, and is generally not recommended. ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-unhandled  annotation

5359
            /** @scrutinizer ignore-unhandled */ @\mb_substitute_character($replacement_char_helper);

If you suppress an error, we recommend checking for the error condition explicitly:

// For example instead of
@mkdir($dir);

// Better use
if (@mkdir($dir) === false) {
    throw new \RuntimeException('The directory '.$dir.' could not be created.');
}
Loading history...
5360
            // the polyfill maybe return false, so cast to string
5361
            $str = (string) \mb_convert_encoding($str, 'UTF-8', 'UTF-8');
5362
            \mb_substitute_character($save);
0 ignored issues
show
Bug introduced by
It seems like $save can also be of type true; however, parameter $substitute_character of mb_substitute_character() does only seem to accept integer|null|string, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

5362
            \mb_substitute_character(/** @scrutinizer ignore-type */ $save);
Loading history...
5363
        }
5364
5365 21
        return \str_replace(
5366
            [
5367 21
                "\xEF\xBF\xBD",
5368 3
                '�',
5369
            ],
5370
            [
5371 20
                $replacement_char,
5372 20
                $replacement_char,
5373
            ],
5374 9
            $str
5375 9
        );
5376
    }
5377 14
5378
    /**
5379
     * Strip whitespace or other characters from the end of a UTF-8 string.
5380 20
     *
5381
     * EXAMPLE: <code>UTF8::rtrim('-ABC-中文空白-  '); // '-ABC-中文空白-'</code>
5382
     *
5383
     * @param string      $str   <p>The string to be trimmed.</p>
5384
     * @param string|null $chars <p>Optional characters to be stripped.</p>
5385
     *
5386
     * @psalm-pure
5387
     *
5388
     * @return string
5389
     *                <p>A string with unwanted characters stripped from the right.</p>
5390
     */
5391
    public static function rtrim(string $str = '', string $chars = null): string
5392
    {
5393
        if ($str === '') {
5394
            return '';
5395
        }
5396
5397
        if (self::$SUPPORT['mbstring'] === true) {
5398
            if ($chars !== null) {
5399
                /** @noinspection PregQuoteUsageInspection */
5400
                $chars = \preg_quote($chars);
5401
                $pattern = "[${chars}]+$";
5402 2
            } else {
5403
                $pattern = '[\\s]+$';
5404
            }
5405 2
5406
            return (string) \mb_ereg_replace($pattern, '', $str);
5407 2
        }
5408 2
5409 2
        if ($chars !== null) {
5410
            $chars = \preg_quote($chars, '/');
5411 2
            $pattern = "[${chars}]+$";
5412
        } else {
5413 2
            $pattern = '[\\s]+$';
5414 1
        }
5415
5416
        return self::regex_replace($str, $pattern, '');
5417 2
    }
5418
5419
    /**
5420
     * WARNING: Print native UTF-8 support (libs) by default, e.g. for debugging.
5421
     *
5422
     * @param bool $useEcho
5423
     *
5424
     * @psalm-pure
5425
     *
5426
     * @return string|void
5427
     */
5428
    public static function showSupport(bool $useEcho = true)
5429
    {
5430
        // init
5431
        $html = '';
5432
5433
        $html .= '<pre>';
5434 2
        foreach (self::$SUPPORT as $key => &$value) {
5435
            $html .= $key . ' - ' . \print_r($value, true) . "\n<br>";
0 ignored issues
show
Bug introduced by
Are you sure print_r($value, true) of type string|true can be used in concatenation? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

5435
            $html .= $key . ' - ' . /** @scrutinizer ignore-type */ \print_r($value, true) . "\n<br>";
Loading history...
5436
        }
5437
        $html .= '</pre>';
5438
5439 2
        if ($useEcho) {
5440 2
            echo $html;
5441
        }
5442
5443
        return $html;
5444 2
    }
5445
5446 2
    /**
5447
     * Converts a UTF-8 character to HTML Numbered Entity like "&#123;".
5448 2
     *
5449
     * EXAMPLE: <code>UTF8::single_chr_html_encode('κ'); // '&#954;'</code>
5450
     *
5451 2
     * @param string $char             <p>The Unicode character to be encoded as numbered entity.</p>
5452
     * @param bool   $keep_ascii_chars <p>Set to <strong>true</strong> to keep ASCII chars.</>
5453
     * @param string $encoding         [optional] <p>Set the charset for e.g. "mb_" function</p>
5454
     *
5455
     * @psalm-pure
5456
     *
5457
     * @return string
5458
     *                <p>The HTML numbered entity for the given character.</p>
5459
     */
5460
    public static function single_chr_html_encode(
5461
        string $char,
5462 5
        bool $keep_ascii_chars = false,
5463
        string $encoding = 'UTF-8'
5464 5
    ): string {
5465 3
        if ($char === '') {
5466 2
            return '';
5467 1
        }
5468
5469 1
        if (
5470
            $keep_ascii_chars
5471
            &&
5472 5
            ASCII::is_ascii($char)
5473
        ) {
5474
            return $char;
5475
        }
5476
5477
        return '&#' . self::ord($char, $encoding) . ';';
5478
    }
5479
5480
    /**
5481
     * @param string $str
5482
     * @param int    $tab_length
5483
     *
5484
     * @psalm-pure
5485
     *
5486
     * @return string
5487
     */
5488
    public static function spaces_to_tabs(string $str, int $tab_length = 4): string
5489
    {
5490
        if ($tab_length === 4) {
5491
            $tab = '    ';
5492 32
        } elseif ($tab_length === 2) {
5493
            $tab = '  ';
5494
        } else {
5495
            $tab = \str_repeat(' ', $tab_length);
5496
        }
5497
5498
        return \str_replace($tab, "\t", $str);
5499 32
    }
5500
5501
    /**
5502
     * Returns a camelCase version of the string. Trims surrounding spaces,
5503 32
     * capitalizes letters following digits, spaces, dashes and underscores,
5504 26
     * and removes spaces, dashes, as well as underscores.
5505
     *
5506
     * @param string      $str                           <p>The input string.</p>
5507 32
     * @param string      $encoding                      [optional] <p>Default: 'UTF-8'</p>
5508 32
     * @param bool        $clean_utf8                    [optional] <p>Remove non UTF-8 chars from the string.</p>
5509 32
     * @param string|null $lang                          [optional] <p>Set the language for special cases: az, el, lt,
5510 32
     *                                                   tr</p>
5511 32
     * @param bool        $try_to_keep_the_string_length [optional] <p>true === try to keep the string length: e.g. ẞ
5512 32
     *                                                   -> ß</p>
5513
     *
5514 32
     * @psalm-pure
5515
     *
5516 32
     * @return string
5517
     */
5518 32
    public static function str_camelize(
5519 32
        string $str,
5520
        string $encoding = 'UTF-8',
5521
        bool $clean_utf8 = false,
5522
        string $lang = null,
5523
        bool $try_to_keep_the_string_length = false
5524
    ): string {
5525
        if ($clean_utf8) {
5526
            $str = self::clean($str);
5527 32
        }
5528 27
5529 27
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
5530 27
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
5531 27
        }
5532
5533
        $str = self::lcfirst(
5534
            \trim($str),
5535
            $encoding,
5536
            false,
5537
            $lang,
5538
            $try_to_keep_the_string_length
5539
        );
5540 1
        $str = (string) \preg_replace('/^[-_]+/', '', $str);
5541 32
5542 32
        $use_mb_functions = $lang === null && !$try_to_keep_the_string_length;
5543
5544
        $str = (string) \preg_replace_callback(
5545 32
            '/[-_\\s]+(.)?/u',
5546 32
            /**
5547
             * @param array $match
5548
             *
5549
             * @psalm-pure
5550
             *
5551
             * @return string
5552
             */
5553
            static function (array $match) use ($use_mb_functions, $encoding, $lang, $try_to_keep_the_string_length): string {
5554 32
                if (isset($match[1])) {
5555 6
                    if ($use_mb_functions) {
5556 6
                        if ($encoding === 'UTF-8') {
5557 6
                            return \mb_strtoupper($match[1]);
5558
                        }
5559
5560
                        return \mb_strtoupper($match[1], $encoding);
5561
                    }
5562
5563
                    return self::strtoupper($match[1], $encoding, false, $lang, $try_to_keep_the_string_length);
5564 32
                }
5565 32
5566
                return '';
5567
            },
5568
            $str
5569
        );
5570
5571
        return (string) \preg_replace_callback(
5572
            '/[\\p{N}]+(.)?/u',
5573
            /**
5574
             * @param array $match
5575
             *
5576
             * @psalm-pure
5577
             *
5578
             * @return string
5579
             */
5580 1
            static function (array $match) use ($use_mb_functions, $encoding, $clean_utf8, $lang, $try_to_keep_the_string_length): string {
5581
                if ($use_mb_functions) {
5582 1
                    if ($encoding === 'UTF-8') {
5583 1
                        return \mb_strtoupper($match[0]);
5584 1
                    }
5585 1
5586
                    return \mb_strtoupper($match[0], $encoding);
5587 1
                }
5588
5589
                return self::strtoupper($match[0], $encoding, $clean_utf8, $lang, $try_to_keep_the_string_length);
5590
            },
5591
            $str
5592
        );
5593
    }
5594
5595
    /**
5596
     * Returns the string with the first letter of each word capitalized,
5597
     * except for when the word is a name which shouldn't be capitalized.
5598
     *
5599
     * @param string $str
5600
     *
5601
     * @psalm-pure
5602
     *
5603
     * @return string
5604
     *                <p>A string with $str capitalized.</p>
5605 21
     */
5606
    public static function str_capitalize_name(string $str): string
5607
    {
5608
        return self::str_capitalize_name_helper(
5609
            self::str_capitalize_name_helper(
5610 21
                self::collapse_whitespace($str),
5611 11
                ' '
5612
            ),
5613 11
            '-'
5614
        );
5615
    }
5616
5617
    /**
5618
     * Returns true if the string contains $needle, false otherwise. By default
5619 10
     * the comparison is case-sensitive, but can be made insensitive by setting
5620
     * $case_sensitive to false.
5621
     *
5622
     * @param string $haystack       <p>The input string.</p>
5623
     * @param string $needle         <p>Substring to look for.</p>
5624
     * @param bool   $case_sensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
5625
     *
5626
     * @psalm-pure
5627
     *
5628
     * @return bool
5629
     *              <p>Whether or not $haystack contains $needle.</p>
5630
     */
5631
    public static function str_contains(
5632
        string $haystack,
5633
        string $needle,
5634
        bool $case_sensitive = true
5635
    ): bool {
5636 45
        if ($case_sensitive) {
5637
            if (\PHP_VERSION_ID >= 80000) {
5638
                /** @phpstan-ignore-next-line - only for PHP8 */
5639
                return \str_contains($haystack, $needle);
5640
            }
5641 45
5642 1
            return \strpos($haystack, $needle) !== false;
5643
        }
5644
5645 44
        return \mb_stripos($haystack, $needle) !== false;
5646 44
    }
5647 24
5648 12
    /**
5649
     * Returns true if the string contains all $needles, false otherwise. By
5650
     * default the comparison is case-sensitive, but can be made insensitive by
5651
     * setting $case_sensitive to false.
5652 33
     *
5653 8
     * @param string $haystack       <p>The input string.</p>
5654
     * @param array  $needles        <p>SubStrings to look for.</p>
5655
     * @param bool   $case_sensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
5656
     *
5657 24
     * @psalm-pure
5658
     *
5659
     * @return bool
5660
     *              <p>Whether or not $haystack contains $needle.</p>
5661
     */
5662
    public static function str_contains_all(
5663
        string $haystack,
5664
        array $needles,
5665
        bool $case_sensitive = true
5666
    ): bool {
5667
        if ($haystack === '' || $needles === []) {
5668
            return false;
5669
        }
5670
5671
        foreach ($needles as &$needle) {
5672
            if ($case_sensitive) {
5673
                if (!$needle || \strpos($haystack, $needle) === false) {
5674 46
                    return false;
5675
                }
5676
            }
5677
5678
            if (!$needle || \mb_stripos($haystack, $needle) === false) {
5679 46
                return false;
5680 1
            }
5681
        }
5682
5683 45
        return true;
5684 45
    }
5685
5686
    /**
5687
     * Returns true if the string contains any $needles, false otherwise. By
5688 45
     * default the comparison is case-sensitive, but can be made insensitive by
5689 25
     * setting $case_sensitive to false.
5690 14
     *
5691
     * @param string $haystack       <p>The input string.</p>
5692
     * @param array  $needles        <p>SubStrings to look for.</p>
5693 13
     * @param bool   $case_sensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
5694
     *
5695
     * @psalm-pure
5696 20
     *
5697 12
     * @return bool
5698
     *              <p>Whether or not $str contains $needle.</p>
5699
     */
5700
    public static function str_contains_any(
5701 19
        string $haystack,
5702
        array $needles,
5703
        bool $case_sensitive = true
5704
    ): bool {
5705
        if ($haystack === '' || $needles === []) {
5706
            return false;
5707
        }
5708
5709
        foreach ($needles as &$needle) {
5710
            if (!$needle) {
5711
                continue;
5712
            }
5713
5714
            if ($case_sensitive) {
5715
                if (\strpos($haystack, $needle) !== false) {
5716 19
                    return true;
5717
                }
5718 19
5719
                continue;
5720
            }
5721
5722
            if (\mb_stripos($haystack, $needle) !== false) {
5723
                return true;
5724
            }
5725
        }
5726
5727
        return false;
5728
    }
5729
5730
    /**
5731
     * Returns a lowercase and trimmed string separated by dashes. Dashes are
5732
     * inserted before uppercase characters (with the exception of the first
5733
     * character of the string), and in place of spaces as well as underscores.
5734
     *
5735
     * @param string $str      <p>The input string.</p>
5736
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
5737
     *
5738
     * @psalm-pure
5739
     *
5740 49
     * @return string
5741
     */
5742
    public static function str_dasherize(string $str, string $encoding = 'UTF-8'): string
5743
    {
5744
        return self::str_delimit($str, '-', $encoding);
5745
    }
5746
5747
    /**
5748 49
     * Returns a lowercase and trimmed string separated by the given delimiter.
5749 49
     * Delimiters are inserted before uppercase characters (with the exception
5750
     * of the first character of the string), and in place of spaces, dashes,
5751 49
     * and underscores. Alpha delimiters are not converted to lowercase.
5752 49
     *
5753 22
     * @param string      $str                           <p>The input string.</p>
5754
     * @param string      $delimiter                     <p>Sequence used to separate parts of the string.</p>
5755 27
     * @param string      $encoding                      [optional] <p>Set the charset for e.g. "mb_" function</p>
5756
     * @param bool        $clean_utf8                    [optional] <p>Remove non UTF-8 chars from the string.</p>
5757
     * @param string|null $lang                          [optional] <p>Set the language for special cases: az, el, lt,
5758 49
     *                                                   tr</p>
5759
     * @param bool        $try_to_keep_the_string_length [optional] <p>true === try to keep the string length: e.g. ẞ ->
5760
     *                                                   ß</p>
5761
     *
5762
     * @psalm-pure
5763
     *
5764
     * @return string
5765
     */
5766
    public static function str_delimit(
5767
        string $str,
5768
        string $delimiter,
5769
        string $encoding = 'UTF-8',
5770
        bool $clean_utf8 = false,
5771
        string $lang = null,
5772
        bool $try_to_keep_the_string_length = false
5773
    ): string {
5774
        if (self::$SUPPORT['mbstring'] === true) {
5775
            $str = (string) \mb_ereg_replace('\\B(\\p{Lu})', '-\1', \trim($str));
5776
5777
            $use_mb_functions = $lang === null && !$try_to_keep_the_string_length;
5778
            if ($use_mb_functions && $encoding === 'UTF-8') {
5779
                $str = \mb_strtolower($str);
5780
            } else {
5781
                $str = self::strtolower($str, $encoding, $clean_utf8, $lang, $try_to_keep_the_string_length);
5782
            }
5783
5784
            return (string) \mb_ereg_replace('[\\-_\\s]+', $delimiter, $str);
5785
        }
5786
5787
        $str = (string) \preg_replace('/\\B(\\p{Lu})/u', '-\1', \trim($str));
5788
5789
        $use_mb_functions = $lang === null && !$try_to_keep_the_string_length;
5790
        if ($use_mb_functions && $encoding === 'UTF-8') {
5791 30
            $str = \mb_strtolower($str);
5792
        } else {
5793
            $str = self::strtolower($str, $encoding, $clean_utf8, $lang, $try_to_keep_the_string_length);
5794 30
        }
5795
5796
        return (string) \preg_replace('/[\\-_\\s]+/u', $delimiter, $str);
5797
    }
5798
5799
    /**
5800 30
     * Optimized "mb_detect_encoding()"-function -> with support for UTF-16 and UTF-32.
5801 10
     *
5802 10
     * EXAMPLE: <code>
5803
     * UTF8::str_detect_encoding('中文空白'); // 'UTF-8'
5804
     * UTF8::str_detect_encoding('Abc'); // 'ASCII'
5805 10
     * </code>
5806 1
     *
5807
     * @param string $str <p>The input string.</p>
5808
     *
5809 10
     * @psalm-pure
5810 10
     *
5811 3
     * @return false|string
5812
     *                      <p>
5813 10
     *                      The detected string-encoding e.g. UTF-8 or UTF-16BE,<br>
5814 2
     *                      otherwise it will return false e.g. for BINARY or not detected encoding.
5815
     *                      </p>
5816
     */
5817
    public static function str_detect_encoding($str)
5818 8
    {
5819
        // init
5820
        $str = (string) $str;
5821
5822
        //
5823
        // 1.) check binary strings (010001001...) like UTF-16 / UTF-32 / PDF / Images / ...
5824
        //
5825 27
5826 10
        if (self::is_binary($str, self::string_has_bom($str) ? false : true)) {
5827
            $is_utf32 = self::is_utf32($str, false);
5828
            if ($is_utf32 === 1) {
5829
                return 'UTF-32LE';
5830
            }
5831
            if ($is_utf32 === 2) {
5832
                return 'UTF-32BE';
5833 27
            }
5834 19
5835
            $is_utf16 = self::is_utf16($str, false);
5836
            if ($is_utf16 === 1) {
5837
                return 'UTF-16LE';
5838
            }
5839
            if ($is_utf16 === 2) {
5840
                return 'UTF-16BE';
5841
            }
5842 16
5843
            // is binary but not "UTF-16" or "UTF-32"
5844
            return false;
5845
        }
5846
5847
        //
5848
        // 2.) simple check for ASCII chars
5849
        //
5850
5851
        if (ASCII::is_ascii($str)) {
5852
            return 'ASCII';
5853
        }
5854
5855
        //
5856
        // 3.) simple check for UTF-8 chars
5857
        //
5858
5859
        if (self::is_utf8_string($str)) {
5860
            return 'UTF-8';
5861
        }
5862
5863
        //
5864
        // 4.) check via "mb_detect_encoding()"
5865
        //
5866
        // INFO: UTF-16, UTF-32, UCS2 and UCS4, encoding detection will fail always with "mb_detect_encoding()"
5867
5868
        $encoding_detecting_order = [
5869
            'ISO-8859-1',
5870
            'ISO-8859-2',
5871
            'ISO-8859-3',
5872
            'ISO-8859-4',
5873
            'ISO-8859-5',
5874
            'ISO-8859-6',
5875
            'ISO-8859-7',
5876
            'ISO-8859-8',
5877 16
            'ISO-8859-9',
5878
            'ISO-8859-10',
5879 16
            'ISO-8859-13',
5880 16
            'ISO-8859-14',
5881 16
            'ISO-8859-15',
5882
            'ISO-8859-16',
5883
            'WINDOWS-1251',
5884
            'WINDOWS-1252',
5885
            'WINDOWS-1254',
5886
            'CP932',
5887
            'CP936',
5888
            'CP950',
5889
            'CP866',
5890
            'CP850',
5891
            'CP51932',
5892
            'CP50220',
5893
            'CP50221',
5894
            'CP50222',
5895
            'ISO-2022-JP',
5896
            'ISO-2022-KR',
5897
            'JIS',
5898
            'JIS-ms',
5899
            'EUC-CN',
5900
            'EUC-JP',
5901
        ];
5902
5903
        if (self::$SUPPORT['mbstring'] === true) {
5904
            // info: do not use the symfony polyfill here
5905
            $encoding = \mb_detect_encoding($str, $encoding_detecting_order, true);
5906
            if ($encoding) {
5907
                return $encoding;
5908
            }
5909
        }
5910
5911
        //
5912
        // 5.) check via "iconv()"
5913
        //
5914
5915
        if (self::$ENCODINGS === null) {
5916
            self::$ENCODINGS = self::getData('encodings');
5917
        }
5918
5919 9
        foreach (self::$ENCODINGS as $encoding_tmp) {
5920
            // INFO: //IGNORE but still throw notice
5921 9
            /** @noinspection PhpUsageOfSilenceOperatorInspection */
5922 2
            if ((string) @\iconv($encoding_tmp, $encoding_tmp . '//IGNORE', $str) === $str) {
5923
                return $encoding_tmp;
5924
            }
5925 9
        }
5926 1
5927
        return false;
5928
    }
5929 9
5930
    /**
5931 9
     * Check if the string ends with the given substring.
5932
     *
5933
     * EXAMPLE: <code>
5934
     * UTF8::str_ends_with('BeginMiddleΚόσμε', 'Κόσμε'); // true
5935
     * UTF8::str_ends_with('BeginMiddleΚόσμε', 'κόσμε'); // false
5936
     * </code>
5937
     *
5938
     * @param string $haystack <p>The string to search in.</p>
5939
     * @param string $needle   <p>The substring to search for.</p>
5940
     *
5941
     * @psalm-pure
5942
     *
5943
     * @return bool
5944
     */
5945
    public static function str_ends_with(string $haystack, string $needle): bool
5946
    {
5947
        if ($needle === '') {
5948
            return true;
5949
        }
5950 7
5951
        if ($haystack === '') {
5952 7
            return false;
5953
        }
5954
5955
        if (\PHP_VERSION_ID >= 80000) {
5956 7
            /** @phpstan-ignore-next-line - only for PHP8 */
5957 7
            return \str_ends_with($haystack, $needle);
5958 1
        }
5959
5960
        return \substr($haystack, -\strlen($needle)) === $needle;
5961
    }
5962 6
5963
    /**
5964
     * Returns true if the string ends with any of $substrings, false otherwise.
5965
     *
5966
     * - case-sensitive
5967
     *
5968
     * @param string   $str        <p>The input string.</p>
5969
     * @param string[] $substrings <p>Substrings to look for.</p>
5970
     *
5971
     * @psalm-pure
5972
     *
5973
     * @return bool
5974
     *              <p>Whether or not $str ends with $substring.</p>
5975
     */
5976 10
    public static function str_ends_with_any(string $str, array $substrings): bool
5977
    {
5978
        if ($substrings === []) {
5979 10
            return false;
5980
        }
5981 10
5982
        foreach ($substrings as &$substring) {
5983 6
            if (\substr($str, -\strlen($substring)) === $substring) {
5984
                return true;
5985
            }
5986 4
        }
5987
5988
        return false;
5989
    }
5990
5991
    /**
5992
     * Ensures that the string begins with $substring. If it doesn't, it's
5993
     * prepended.
5994
     *
5995
     * @param string $str       <p>The input string.</p>
5996
     * @param string $substring <p>The substring to add if not present.</p>
5997
     *
5998
     * @psalm-pure
5999 10
     *
6000
     * @return string
6001
     */
6002 10
    public static function str_ensure_left(string $str, string $substring): string
6003
    {
6004 10
        if (
6005
            $substring !== ''
6006 10
            &&
6007
            \strpos($str, $substring) === 0
6008 4
        ) {
6009
            return $str;
6010
        }
6011 10
6012
        return $substring . $str;
6013
    }
6014
6015
    /**
6016
     * Ensures that the string ends with $substring. If it doesn't, it's appended.
6017
     *
6018
     * @param string $str       <p>The input string.</p>
6019
     * @param string $substring <p>The substring to add if not present.</p>
6020
     *
6021
     * @psalm-pure
6022
     *
6023
     * @return string
6024 3
     */
6025
    public static function str_ensure_right(string $str, string $substring): string
6026 3
    {
6027
        if (
6028 3
            $str === ''
6029
            ||
6030
            $substring === ''
6031
            ||
6032 3
            \substr($str, -\strlen($substring)) !== $substring
6033
        ) {
6034
            $str .= $substring;
6035 3
        }
6036
6037
        return $str;
6038 3
    }
6039
6040
    /**
6041
     * Capitalizes the first word of the string, replaces underscores with
6042
     * spaces, and strips '_id'.
6043
     *
6044
     * @param string $str
6045
     *
6046
     * @psalm-pure
6047
     *
6048
     * @return string
6049
     */
6050
    public static function str_humanize($str): string
6051
    {
6052
        $str = \str_replace(
6053
            [
6054
                '_id',
6055
                '_',
6056 12
            ],
6057
            [
6058 12
                '',
6059 2
                ' ',
6060
            ],
6061
            $str
6062 12
        );
6063
6064
        return self::ucfirst(\trim($str));
6065
    }
6066 12
6067
    /**
6068
     * Check if the string ends with the given substring, case-insensitive.
6069
     *
6070
     * EXAMPLE: <code>
6071
     * UTF8::str_iends_with('BeginMiddleΚόσμε', 'Κόσμε'); // true
6072
     * UTF8::str_iends_with('BeginMiddleΚόσμε', 'κόσμε'); // true
6073
     * </code>
6074
     *
6075
     * @param string $haystack <p>The string to search in.</p>
6076
     * @param string $needle   <p>The substring to search for.</p>
6077
     *
6078
     * @psalm-pure
6079
     *
6080
     * @return bool
6081
     */
6082 4
    public static function str_iends_with(string $haystack, string $needle): bool
6083
    {
6084 4
        if ($needle === '') {
6085
            return true;
6086
        }
6087
6088 4
        if ($haystack === '') {
6089 4
            return false;
6090 4
        }
6091
6092
        return self::strcasecmp(\substr($haystack, -\strlen($needle)), $needle) === 0;
6093
    }
6094
6095
    /**
6096
     * Returns true if the string ends with any of $substrings, false otherwise.
6097
     *
6098
     * - case-insensitive
6099
     *
6100
     * @param string   $str        <p>The input string.</p>
6101
     * @param string[] $substrings <p>Substrings to look for.</p>
6102
     *
6103
     * @psalm-pure
6104
     *
6105
     * @return bool
6106
     *              <p>Whether or not $str ends with $substring.</p>
6107
     */
6108
    public static function str_iends_with_any(string $str, array $substrings): bool
6109 8
    {
6110
        if ($substrings === []) {
6111
            return false;
6112
        }
6113
6114
        foreach ($substrings as &$substring) {
6115 8
            if (self::str_iends_with($str, $substring)) {
6116 4
                return true;
6117 4
            }
6118
        }
6119
6120
        return false;
6121
    }
6122 4
6123 4
    /**
6124 4
     * Inserts $substring into the string at the $index provided.
6125
     *
6126
     * @param string $str       <p>The input string.</p>
6127 4
     * @param string $substring <p>String to be inserted.</p>
6128
     * @param int    $index     <p>The index at which to insert the substring.</p>
6129 4
     * @param string $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
6130 4
     *
6131 1
     * @psalm-pure
6132
     *
6133
     * @return string
6134 3
     */
6135 3
    public static function str_insert(
6136 3
        string $str,
6137
        string $substring,
6138
        int $index,
6139
        string $encoding = 'UTF-8'
6140
    ): string {
6141
        if ($encoding === 'UTF-8') {
6142
            $len = (int) \mb_strlen($str);
6143
            if ($index > $len) {
6144
                return $str;
6145
            }
6146
6147
            /** @noinspection UnnecessaryCastingInspection */
6148
            return (string) \mb_substr($str, 0, $index) .
6149
                   $substring .
6150
                   (string) \mb_substr($str, $index, $len);
6151
        }
6152
6153
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
6154
6155
        $len = (int) self::strlen($str, $encoding);
6156
        if ($index > $len) {
6157
            return $str;
6158
        }
6159
6160
        return ((string) self::substr($str, 0, $index, $encoding)) .
6161
               $substring .
6162
               ((string) self::substr($str, $index, $len, $encoding));
6163
    }
6164
6165
    /**
6166
     * Case-insensitive and UTF-8 safe version of <function>str_replace</function>.
6167
     *
6168
     * EXAMPLE: <code>
6169
     * UTF8::str_ireplace('lIzÆ', 'lise', 'Iñtërnâtiônàlizætiøn'); // 'Iñtërnâtiônàlisetiøn'
6170
     * </code>
6171
     *
6172
     * @see http://php.net/manual/en/function.str-ireplace.php
6173
     *
6174 29
     * @param string|string[] $search      <p>
6175
     *                                     Every replacement with search array is
6176 29
     *                                     performed on the result of previous replacement.
6177
     *                                     </p>
6178
     * @param string|string[] $replacement <p>The replacement.</p>
6179 29
     * @param string|string[] $subject     <p>
6180 29
     *                                     If subject is an array, then the search and
6181 29
     *                                     replace is performed with every entry of
6182 6
     *                                     subject, and the return value is an array as
6183
     *                                     well.
6184 24
     *                                     </p>
6185
     * @param int             $count       [optional] <p>
6186
     *                                     The number of matched and replaced needles will
6187
     *                                     be returned in count which is passed by
6188
     *                                     reference.
6189
     *                                     </p>
6190 29
     *
6191 1
     * @psalm-pure
6192
     *
6193
     * @return string|string[]
6194 29
     *                         <p>A string or an array of replacements.</p>
6195 1
     *
6196
     * @template TStrIReplaceSubject
6197
     * @phpstan-param TStrIReplaceSubject $subject
6198
     * @phpstan-return TStrIReplaceSubject
6199
     */
6200
    public static function str_ireplace($search, $replacement, $subject, &$count = null)
6201
    {
6202 29
        $search = (array) $search;
6203
6204 29
        /** @noinspection AlterInForeachInspection */
6205
        foreach ($search as &$s) {
6206
            $s = (string) $s;
6207
            if ($s === '') {
6208
                $s = '/^(?<=.)$/';
6209
            } else {
6210
                $s = '/' . \preg_quote($s, '/') . '/ui';
6211
            }
6212
        }
6213
6214
        // fallback
6215
        /** @phpstan-ignore-next-line - only a fallback for PHP8 */
6216
        if ($replacement === null) {
0 ignored issues
show
introduced by
The condition $replacement === null is always false.
Loading history...
6217
            $replacement = '';
6218
        }
6219 17
        /** @phpstan-ignore-next-line - only a fallback for PHP8 */
6220
        if ($subject === null) {
6221 17
            $subject = '';
6222 4
        }
6223 2
6224
        /**
6225
         * @psalm-suppress PossiblyNullArgument
6226 2
         * @phpstan-var TStrIReplaceSubject $subject
6227 2
         */
6228
        $subject = \preg_replace($search, $replacement, $subject, -1, $count);
6229
6230
        return $subject;
6231 13
    }
6232 2
6233
    /**
6234
     * Replaces $search from the beginning of string with $replacement.
6235 11
     *
6236 11
     * @param string $str         <p>The input string.</p>
6237 10
     * @param string $search      <p>The string to search for.</p>
6238
     * @param string $replacement <p>The replacement.</p>
6239
     *
6240 1
     * @psalm-pure
6241
     *
6242
     * @return string
6243
     *                <p>The string after the replacement.</p>
6244
     */
6245
    public static function str_ireplace_beginning(string $str, string $search, string $replacement): string
6246
    {
6247
        if ($str === '') {
6248
            if ($replacement === '') {
6249
                return '';
6250
            }
6251
6252
            if ($search === '') {
6253
                return $replacement;
6254
            }
6255 17
        }
6256
6257 17
        if ($search === '') {
6258 4
            return $str . $replacement;
6259 2
        }
6260
6261
        $searchLength = \strlen($search);
6262 2
        if (\strncasecmp($str, $search, $searchLength) === 0) {
6263 2
            return $replacement . \substr($str, $searchLength);
6264
        }
6265
6266
        return $str;
6267 13
    }
6268 2
6269
    /**
6270
     * Replaces $search from the ending of string with $replacement.
6271 11
     *
6272 9
     * @param string $str         <p>The input string.</p>
6273
     * @param string $search      <p>The string to search for.</p>
6274
     * @param string $replacement <p>The replacement.</p>
6275 11
     *
6276
     * @psalm-pure
6277
     *
6278
     * @return string
6279
     *                <p>The string after the replacement.</p>
6280
     */
6281
    public static function str_ireplace_ending(string $str, string $search, string $replacement): string
6282
    {
6283
        if ($str === '') {
6284
            if ($replacement === '') {
6285
                return '';
6286
            }
6287
6288
            if ($search === '') {
6289
                return $replacement;
6290
            }
6291
        }
6292
6293 13
        if ($search === '') {
6294
            return $str . $replacement;
6295 13
        }
6296 2
6297
        if (\stripos($str, $search, \strlen($str) - \strlen($search)) !== false) {
6298
            $str = \substr($str, 0, -\strlen($search)) . $replacement;
6299 13
        }
6300
6301
        return $str;
6302
    }
6303 13
6304
    /**
6305
     * Check if the string starts with the given substring, case-insensitive.
6306
     *
6307
     * EXAMPLE: <code>
6308
     * UTF8::str_istarts_with('ΚόσμεMiddleEnd', 'Κόσμε'); // true
6309
     * UTF8::str_istarts_with('ΚόσμεMiddleEnd', 'κόσμε'); // true
6310
     * </code>
6311
     *
6312
     * @param string $haystack <p>The string to search in.</p>
6313
     * @param string $needle   <p>The substring to search for.</p>
6314
     *
6315
     * @psalm-pure
6316
     *
6317
     * @return bool
6318
     */
6319 5
    public static function str_istarts_with(string $haystack, string $needle): bool
6320
    {
6321 5
        if ($needle === '') {
6322
            return true;
6323
        }
6324
6325 5
        if ($haystack === '') {
6326
            return false;
6327
        }
6328
6329 5
        return self::stripos($haystack, $needle) === 0;
6330 5
    }
6331 5
6332
    /**
6333
     * Returns true if the string begins with any of $substrings, false otherwise.
6334
     *
6335 1
     * - case-insensitive
6336
     *
6337
     * @param string $str        <p>The input string.</p>
6338
     * @param array  $substrings <p>Substrings to look for.</p>
6339
     *
6340
     * @psalm-pure
6341
     *
6342
     * @return bool
6343
     *              <p>Whether or not $str starts with $substring.</p>
6344
     */
6345
    public static function str_istarts_with_any(string $str, array $substrings): bool
6346
    {
6347
        if ($str === '') {
6348
            return false;
6349 1
        }
6350
6351
        if ($substrings === []) {
6352
            return false;
6353
        }
6354 1
6355 1
        foreach ($substrings as &$substring) {
6356
            if (self::str_istarts_with($str, $substring)) {
6357
                return true;
6358 1
            }
6359 1
        }
6360 1
6361
        return false;
6362
    }
6363 1
6364 1
    /**
6365 1
     * Gets the substring after the first occurrence of a separator.
6366 1
     *
6367
     * @param string $str       <p>The input string.</p>
6368
     * @param string $separator <p>The string separator.</p>
6369
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
6370
     *
6371
     * @psalm-pure
6372
     *
6373
     * @return string
6374
     */
6375
    public static function str_isubstr_after_first_separator(
6376
        string $str,
6377
        string $separator,
6378
        string $encoding = 'UTF-8'
6379
    ): string {
6380
        if ($separator === '' || $str === '') {
6381
            return '';
6382
        }
6383
6384
        $offset = self::stripos($str, $separator);
6385
        if ($offset === false) {
6386
            return '';
6387
        }
6388
6389 1
        if ($encoding === 'UTF-8') {
6390
            return (string) \mb_substr(
6391
                $str,
6392
                $offset + (int) \mb_strlen($separator)
6393
            );
6394 1
        }
6395 1
6396
        return (string) self::substr(
6397
            $str,
6398 1
            $offset + (int) self::strlen($separator, $encoding),
6399 1
            null,
6400 1
            $encoding
6401
        );
6402
    }
6403 1
6404 1
    /**
6405 1
     * Gets the substring after the last occurrence of a separator.
6406 1
     *
6407
     * @param string $str       <p>The input string.</p>
6408
     * @param string $separator <p>The string separator.</p>
6409
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
6410
     *
6411
     * @psalm-pure
6412
     *
6413
     * @return string
6414
     */
6415
    public static function str_isubstr_after_last_separator(
6416
        string $str,
6417
        string $separator,
6418
        string $encoding = 'UTF-8'
6419
    ): string {
6420
        if ($separator === '' || $str === '') {
6421
            return '';
6422
        }
6423
6424
        $offset = self::strripos($str, $separator);
6425
        if ($offset === false) {
6426
            return '';
6427
        }
6428
6429 1
        if ($encoding === 'UTF-8') {
6430
            return (string) \mb_substr(
6431
                $str,
6432
                $offset + (int) self::strlen($separator)
6433
            );
6434 1
        }
6435 1
6436
        return (string) self::substr(
6437
            $str,
6438 1
            $offset + (int) self::strlen($separator, $encoding),
6439 1
            null,
6440 1
            $encoding
6441
        );
6442
    }
6443 1
6444 1
    /**
6445
     * Gets the substring before the first occurrence of a separator.
6446
     *
6447
     * @param string $str       <p>The input string.</p>
6448
     * @param string $separator <p>The string separator.</p>
6449
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
6450
     *
6451
     * @psalm-pure
6452
     *
6453
     * @return string
6454
     */
6455
    public static function str_isubstr_before_first_separator(
6456
        string $str,
6457
        string $separator,
6458
        string $encoding = 'UTF-8'
6459
    ): string {
6460
        if ($separator === '' || $str === '') {
6461 1
            return '';
6462
        }
6463
6464
        $offset = self::stripos($str, $separator);
6465
        if ($offset === false) {
6466 1
            return '';
6467 1
        }
6468
6469
        if ($encoding === 'UTF-8') {
6470 1
            return (string) \mb_substr($str, 0, $offset);
6471 1
        }
6472 1
6473 1
        return (string) self::substr($str, 0, $offset, $encoding);
6474
    }
6475
6476 1
    /**
6477
     * Gets the substring before the last occurrence of a separator.
6478
     *
6479
     * @param string $str       <p>The input string.</p>
6480
     * @param string $separator <p>The string separator.</p>
6481
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
6482
     *
6483
     * @psalm-pure
6484
     *
6485
     * @return string
6486
     */
6487
    public static function str_isubstr_before_last_separator(
6488
        string $str,
6489
        string $separator,
6490
        string $encoding = 'UTF-8'
6491
    ): string {
6492
        if ($separator === '' || $str === '') {
6493
            return '';
6494
        }
6495
6496
        if ($encoding === 'UTF-8') {
6497
            $offset = \mb_strripos($str, $separator);
6498
            if ($offset === false) {
6499 2
                return '';
6500
            }
6501
6502
            return (string) \mb_substr($str, 0, $offset);
6503
        }
6504
6505
        $offset = self::strripos($str, $separator, 0, $encoding);
6506 2
        if ($offset === false) {
6507
            return '';
6508 2
        }
6509
6510 2
        return (string) self::substr($str, 0, $offset, $encoding);
6511
    }
6512
6513 2
    /**
6514 2
     * Gets the substring after (or before via "$before_needle") the first occurrence of the "$needle".
6515
     *
6516
     * @param string $str           <p>The input string.</p>
6517
     * @param string $needle        <p>The string to look for.</p>
6518
     * @param bool   $before_needle [optional] <p>Default: false</p>
6519 2
     * @param string $encoding      [optional] <p>Default: 'UTF-8'</p>
6520 2
     *
6521
     * @psalm-pure
6522
     *
6523 2
     * @return string
6524
     */
6525
    public static function str_isubstr_first(
6526
        string $str,
6527
        string $needle,
6528
        bool $before_needle = false,
6529
        string $encoding = 'UTF-8'
6530
    ): string {
6531
        if (
6532
            $needle === ''
6533
            ||
6534
            $str === ''
6535
        ) {
6536
            return '';
6537
        }
6538 1
6539
        $part = self::stristr(
6540
            $str,
6541
            $needle,
6542
            $before_needle,
6543
            $encoding
6544
        );
6545 1
        if ($part === false) {
6546
            return '';
6547 1
        }
6548
6549 1
        return $part;
6550
    }
6551
6552 1
    /**
6553 1
     * Gets the substring after (or before via "$before_needle") the last occurrence of the "$needle".
6554
     *
6555
     * @param string $str           <p>The input string.</p>
6556
     * @param string $needle        <p>The string to look for.</p>
6557
     * @param bool   $before_needle [optional] <p>Default: false</p>
6558 1
     * @param string $encoding      [optional] <p>Default: 'UTF-8'</p>
6559 1
     *
6560
     * @psalm-pure
6561
     *
6562 1
     * @return string
6563
     */
6564
    public static function str_isubstr_last(
6565
        string $str,
6566
        string $needle,
6567
        bool $before_needle = false,
6568
        string $encoding = 'UTF-8'
6569
    ): string {
6570
        if (
6571
            $needle === ''
6572
            ||
6573
            $str === ''
6574
        ) {
6575
            return '';
6576 12
        }
6577
6578
        $part = self::strrichr(
6579
            $str,
6580
            $needle,
6581 12
            $before_needle,
6582 4
            $encoding
6583
        );
6584
        if ($part === false) {
6585 8
            return '';
6586 4
        }
6587
6588
        return $part;
6589 4
    }
6590
6591 4
    /**
6592
     * Returns the last $n characters of the string.
6593
     *
6594
     * @param string $str      <p>The input string.</p>
6595
     * @param int    $n        <p>Number of characters to retrieve from the end.</p>
6596
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6597
     *
6598
     * @psalm-pure
6599
     *
6600
     * @return string
6601
     */
6602
    public static function str_last_char(
6603
        string $str,
6604
        int $n = 1,
6605
        string $encoding = 'UTF-8'
6606 2
    ): string {
6607
        if ($str === '' || $n <= 0) {
6608
            return '';
6609
        }
6610
6611
        if ($encoding === 'UTF-8') {
6612 2
            return (string) \mb_substr($str, -$n);
6613 2
        }
6614
6615
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
6616 2
6617 2
        return (string) self::substr($str, -$n, null, $encoding);
6618 2
    }
6619
6620
    /**
6621
     * Limit the number of characters in a string.
6622 2
     *
6623
     * @param string $str        <p>The input string.</p>
6624
     * @param int    $length     [optional] <p>Default: 100</p>
6625
     * @param string $str_add_on [optional] <p>Default: …</p>
6626
     * @param string $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
6627
     *
6628
     * @psalm-pure
6629
     *
6630
     * @return string
6631
     */
6632
    public static function str_limit(
6633
        string $str,
6634
        int $length = 100,
6635
        string $str_add_on = '…',
6636
        string $encoding = 'UTF-8'
6637
    ): string {
6638
        if ($str === '' || $length <= 0) {
6639
            return '';
6640
        }
6641
6642
        if ($encoding === 'UTF-8') {
6643
            if ((int) \mb_strlen($str) <= $length) {
6644
                return $str;
6645
            }
6646
6647
            /** @noinspection UnnecessaryCastingInspection */
6648 6
            return (string) \mb_substr($str, 0, $length - (int) self::strlen($str_add_on)) . $str_add_on;
6649
        }
6650
6651
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
6652
6653
        if ((int) self::strlen($str, $encoding) <= $length) {
6654 6
            return $str;
6655 2
        }
6656
6657
        return ((string) self::substr($str, 0, $length - (int) self::strlen($str_add_on), $encoding)) . $str_add_on;
6658 6
    }
6659 2
6660 2
    /**
6661
     * Limit the number of characters in a string, but also after the next word.
6662
     *
6663 2
     * EXAMPLE: <code>UTF8::str_limit_after_word('fòô bàř fòô', 8, ''); // 'fòô bàř'</code>
6664 2
     *
6665
     * @param string $str        <p>The input string.</p>
6666
     * @param int    $length     [optional] <p>Default: 100</p>
6667 2
     * @param string $str_add_on [optional] <p>Default: …</p>
6668
     * @param string $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
6669 2
     *
6670 2
     * @psalm-pure
6671
     *
6672 2
     * @return string
6673 2
     */
6674
    public static function str_limit_after_word(
6675
        string $str,
6676 4
        int $length = 100,
6677
        string $str_add_on = '…',
6678
        string $encoding = 'UTF-8'
6679
    ): string {
6680 4
        if ($str === '' || $length <= 0) {
6681 3
            return '';
6682
        }
6683
6684
        if ($encoding === 'UTF-8') {
6685 1
            if ((int) \mb_strlen($str) <= $length) {
6686 1
                return $str;
6687
            }
6688
6689
            if (\mb_substr($str, $length - 1, 1) === ' ') {
6690 1
                return ((string) \mb_substr($str, 0, $length - 1)) . $str_add_on;
6691 1
            }
6692
6693 1
            $str = \mb_substr($str, 0, $length);
6694
6695
            $array = \explode(' ', $str, -1);
6696
            $new_str = \implode(' ', $array);
6697
6698 3
            if ($new_str === '') {
6699
                return ((string) \mb_substr($str, 0, $length - 1)) . $str_add_on;
6700
            }
6701
        } else {
6702
            if ((int) self::strlen($str, $encoding) <= $length) {
6703
                return $str;
6704
            }
6705
6706
            if (self::substr($str, $length - 1, 1, $encoding) === ' ') {
6707
                return ((string) self::substr($str, 0, $length - 1, $encoding)) . $str_add_on;
6708
            }
6709
6710
            /** @noinspection CallableParameterUseCaseInTypeContextInspection - FP */
6711
            $str = self::substr($str, 0, $length, $encoding);
6712 10
            if ($str === false) {
6713
                return '' . $str_add_on;
6714
            }
6715
6716
            $array = \explode(' ', $str, -1);
6717
            $new_str = \implode(' ', $array);
6718 10
6719
            if ($new_str === '') {
6720 10
                return ((string) self::substr($str, 0, $length - 1, $encoding)) . $str_add_on;
6721 5
            }
6722 5
        }
6723 5
6724
        return $new_str . $str_add_on;
6725
    }
6726 5
6727 4
    /**
6728
     * Returns the longest common prefix between the $str1 and $str2.
6729
     *
6730 4
     * @param string $str1     <p>The input sting.</p>
6731
     * @param string $str2     <p>Second string for comparison.</p>
6732 4
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6733
     *
6734 3
     * @psalm-pure
6735
     *
6736 3
     * @return string
6737
     */
6738
    public static function str_longest_common_prefix(
6739
        string $str1,
6740 5
        string $str2,
6741
        string $encoding = 'UTF-8'
6742 5
    ): string {
6743 5
        // init
6744 5
        $longest_common_prefix = '';
6745
6746
        if ($encoding === 'UTF-8') {
6747 5
            $max_length = (int) \min(
6748 4
                \mb_strlen($str1),
6749
                \mb_strlen($str2)
6750
            );
6751 4
6752
            for ($i = 0; $i < $max_length; ++$i) {
6753 4
                $char = \mb_substr($str1, $i, 1);
6754
6755 3
                if (
6756
                    $char !== false
6757 3
                    &&
6758
                    $char === \mb_substr($str2, $i, 1)
6759
                ) {
6760
                    $longest_common_prefix .= $char;
6761
                } else {
6762 10
                    break;
6763
                }
6764
            }
6765
        } else {
6766
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
6767
6768
            $max_length = (int) \min(
6769
                self::strlen($str1, $encoding),
6770
                self::strlen($str2, $encoding)
6771
            );
6772
6773
            for ($i = 0; $i < $max_length; ++$i) {
6774
                $char = self::substr($str1, $i, 1, $encoding);
6775
6776
                if (
6777
                    $char !== false
6778 11
                    &&
6779
                    $char === self::substr($str2, $i, 1, $encoding)
6780
                ) {
6781
                    $longest_common_prefix .= $char;
6782
                } else {
6783 11
                    break;
6784 2
                }
6785
            }
6786
        }
6787
6788
        return $longest_common_prefix;
6789
    }
6790 9
6791 4
    /**
6792 4
     * Returns the longest common substring between the $str1 and $str2.
6793
     * In the case of ties, it returns that which occurs first.
6794 5
     *
6795
     * @param string $str1
6796 5
     * @param string $str2     <p>Second string for comparison.</p>
6797 5
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6798
     *
6799
     * @psalm-pure
6800
     *
6801 9
     * @return string
6802
     *                <p>A string with its $str being the longest common substring.</p>
6803
     */
6804
    public static function str_longest_common_substring(
6805 9
        string $str1,
6806 9
        string $str2,
6807 9
        string $encoding = 'UTF-8'
6808 9
    ): string {
6809 9
        if ($str1 === '' || $str2 === '') {
6810 9
            return '';
6811
        }
6812
6813 9
        // Uses dynamic programming to solve
6814 9
        // http://en.wikipedia.org/wiki/Longest_common_substring_problem
6815 9
6816 9
        if ($encoding === 'UTF-8') {
6817 9
            $str_length = (int) \mb_strlen($str1);
6818
            $other_length = (int) \mb_strlen($str2);
6819 9
        } else {
6820 8
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
6821 8
6822 8
            $str_length = (int) self::strlen($str1, $encoding);
6823 8
            $other_length = (int) self::strlen($str2, $encoding);
6824
        }
6825
6826 9
        // Return if either string is empty
6827
        if ($str_length === 0 || $other_length === 0) {
6828
            return '';
6829
        }
6830
6831
        $len = 0;
6832
        $end = 0;
6833
        $table = \array_fill(
6834
            0,
6835
            $str_length + 1,
6836
            \array_fill(0, $other_length + 1, 0)
6837
        );
6838
6839
        if ($encoding === 'UTF-8') {
6840
            for ($i = 1; $i <= $str_length; ++$i) {
6841
                for ($j = 1; $j <= $other_length; ++$j) {
6842
                    $str_char = \mb_substr($str1, $i - 1, 1);
6843
                    $other_char = \mb_substr($str2, $j - 1, 1);
6844
6845
                    if ($str_char === $other_char) {
6846
                        $table[$i][$j] = $table[$i - 1][$j - 1] + 1;
6847
                        if ($table[$i][$j] > $len) {
6848
                            $len = $table[$i][$j];
6849 9
                            $end = $i;
6850 9
                        }
6851
                    } else {
6852
                        $table[$i][$j] = 0;
6853
                    }
6854
                }
6855
            }
6856
        } else {
6857
            for ($i = 1; $i <= $str_length; ++$i) {
6858
                for ($j = 1; $j <= $other_length; ++$j) {
6859
                    $str_char = self::substr($str1, $i - 1, 1, $encoding);
6860
                    $other_char = self::substr($str2, $j - 1, 1, $encoding);
6861
6862
                    if ($str_char === $other_char) {
6863
                        $table[$i][$j] = $table[$i - 1][$j - 1] + 1;
6864
                        if ($table[$i][$j] > $len) {
6865
                            $len = $table[$i][$j];
6866
                            $end = $i;
6867 10
                        }
6868
                    } else {
6869
                        $table[$i][$j] = 0;
6870
                    }
6871
                }
6872 10
            }
6873 2
        }
6874
6875
        if ($encoding === 'UTF-8') {
6876 8
            return (string) \mb_substr($str1, $end - $len, $len);
6877 4
        }
6878 4
6879 4
        return (string) self::substr($str1, $end - $len, $len, $encoding);
6880
    }
6881
6882 4
    /**
6883 4
     * Returns the longest common suffix between the $str1 and $str2.
6884 4
     *
6885
     * @param string $str1
6886
     * @param string $str2     <p>Second string for comparison.</p>
6887 4
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6888
     *
6889 4
     * @psalm-pure
6890
     *
6891 3
     * @return string
6892
     */
6893 3
    public static function str_longest_common_suffix(
6894
        string $str1,
6895
        string $str2,
6896
        string $encoding = 'UTF-8'
6897 4
    ): string {
6898
        if ($str1 === '' || $str2 === '') {
6899 4
            return '';
6900 4
        }
6901 4
6902
        if ($encoding === 'UTF-8') {
6903
            $max_length = (int) \min(
6904 4
                \mb_strlen($str1, $encoding),
6905 4
                \mb_strlen($str2, $encoding)
6906 4
            );
6907
6908
            $longest_common_suffix = '';
6909 4
            for ($i = 1; $i <= $max_length; ++$i) {
6910
                $char = \mb_substr($str1, -$i, 1);
6911 4
6912
                if (
6913 3
                    $char !== false
6914
                    &&
6915 3
                    $char === \mb_substr($str2, -$i, 1)
6916
                ) {
6917
                    $longest_common_suffix = $char . $longest_common_suffix;
6918
                } else {
6919
                    break;
6920 8
                }
6921
            }
6922
        } else {
6923
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
6924
6925
            $max_length = (int) \min(
6926
                self::strlen($str1, $encoding),
6927
                self::strlen($str2, $encoding)
6928
            );
6929
6930
            $longest_common_suffix = '';
6931
            for ($i = 1; $i <= $max_length; ++$i) {
6932
                $char = self::substr($str1, -$i, 1, $encoding);
6933
6934 10
                if (
6935
                    $char !== false
6936 10
                    &&
6937
                    $char === self::substr($str2, -$i, 1, $encoding)
6938
                ) {
6939
                    $longest_common_suffix = $char . $longest_common_suffix;
6940
                } else {
6941
                    break;
6942
                }
6943
            }
6944
        }
6945
6946
        return $longest_common_suffix;
6947
    }
6948
6949
    /**
6950
     * Returns true if $str matches the supplied pattern, false otherwise.
6951
     *
6952
     * @param string $str     <p>The input string.</p>
6953 6
     * @param string $pattern <p>Regex pattern to match against.</p>
6954
     *
6955
     * @psalm-pure
6956 6
     *
6957
     * @return bool
6958 6
     *              <p>Whether or not $str matches the pattern.</p>
6959 3
     */
6960
    public static function str_matches_pattern(string $str, string $pattern): bool
6961
    {
6962 3
        return (bool) \preg_match('/' . $pattern . '/u', $str);
6963
    }
6964
6965
    /**
6966
     * Returns whether or not a character exists at an index. Offsets may be
6967
     * negative to count from the last character in the string. Implements
6968
     * part of the ArrayAccess interface.
6969
     *
6970
     * @param string $str      <p>The input string.</p>
6971
     * @param int    $offset   <p>The index to check.</p>
6972
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6973
     *
6974
     * @psalm-pure
6975
     *
6976
     * @return bool
6977
     *              <p>Whether or not the index exists.</p>
6978
     */
6979
    public static function str_offset_exists(string $str, int $offset, string $encoding = 'UTF-8'): bool
6980
    {
6981
        // init
6982 2
        $length = (int) self::strlen($str, $encoding);
6983
6984
        if ($offset >= 0) {
6985 2
            return $length > $offset;
6986
        }
6987
6988 2
        return $length >= \abs($offset);
6989
    }
6990 2
6991
    /**
6992 1
     * Returns the character at the given index. Offsets may be negative to
6993
     * count from the last character in the string. Implements part of the
6994
     * ArrayAccess interface, and throws an OutOfBoundsException if the index
6995 1
     * does not exist.
6996
     *
6997
     * @param string $str      <p>The input string.</p>
6998
     * @param int    $index    <p>The <strong>index</strong> from which to retrieve the char.</p>
6999
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
7000
     *
7001
     * @throws \OutOfBoundsException if the positive or negative offset does not exist
7002
     *
7003
     * @return string
7004
     *                <p>The character at the specified index.</p>
7005
     *
7006
     * @psalm-pure
7007
     */
7008
    public static function str_offset_get(string $str, int $index, string $encoding = 'UTF-8'): string
7009
    {
7010
        // init
7011
        $length = (int) self::strlen($str);
7012
7013
        if (
7014
            ($index >= 0 && $length <= $index)
7015
            ||
7016
            $length < \abs($index)
7017
        ) {
7018 41
            throw new \OutOfBoundsException('No character exists at the index');
7019
        }
7020
7021
        return self::char_at($str, $index, $encoding);
7022
    }
7023
7024
    /**
7025 41
     * Pad a UTF-8 string to a given length with another string.
7026 1
     *
7027
     * EXAMPLE: <code>UTF8::str_pad('中文空白', 10, '_', STR_PAD_BOTH); // '___中文空白___'</code>
7028
     *
7029 41
     * @param string     $str        <p>The input string.</p>
7030 13
     * @param int        $pad_length <p>The length of return string.</p>
7031 3
     * @param string     $pad_string [optional] <p>String to use for padding the input string.</p>
7032 10
     * @param int|string $pad_type   [optional] <p>
7033 6
     *                               Can be <strong>STR_PAD_RIGHT</strong> (default), [or string "right"]<br>
7034 4
     *                               <strong>STR_PAD_LEFT</strong> [or string "left"] or<br>
7035 3
     *                               <strong>STR_PAD_BOTH</strong> [or string "both"]
7036
     *                               </p>
7037 1
     * @param string     $encoding   [optional] <p>Default: 'UTF-8'</p>
7038 1
     *
7039
     * @psalm-pure
7040
     *
7041
     * @return string
7042
     *                <p>Returns the padded string.</p>
7043 40
     */
7044 25
    public static function str_pad(
7045
        string $str,
7046 25
        int $pad_length,
7047 25
        string $pad_string = ' ',
7048
        $pad_type = \STR_PAD_RIGHT,
7049 8
        string $encoding = 'UTF-8'
7050
    ): string {
7051 8
        if ($pad_length === 0 || $pad_string === '') {
7052
            return $str;
7053 8
        }
7054 8
7055 8
        if ($pad_type !== (int) $pad_type) {
7056 8
            if ($pad_type === 'left') {
7057
                $pad_type = \STR_PAD_LEFT;
7058 8
            } elseif ($pad_type === 'right') {
7059
                $pad_type = \STR_PAD_RIGHT;
7060 8
            } elseif ($pad_type === 'both') {
7061
                $pad_type = \STR_PAD_BOTH;
7062
            } else {
7063 14
                throw new \InvalidArgumentException(
7064
                    'Pad expects $pad_type to be "STR_PAD_*" or ' . "to be one of 'left', 'right' or 'both'"
7065 14
                );
7066
            }
7067 14
        }
7068
7069 14
        if ($encoding === 'UTF-8') {
7070 14
            $str_length = (int) \mb_strlen($str);
7071 14
7072 14
            if ($pad_length >= $str_length) {
7073
                switch ($pad_type) {
7074 14
                    case \STR_PAD_LEFT:
7075 14
                        $ps_length = (int) \mb_strlen($pad_string);
7076 14
7077 14
                        $diff = ($pad_length - $str_length);
7078
7079
                        $pre = (string) \mb_substr(
7080 14
                            \str_repeat($pad_string, (int) \ceil($diff / $ps_length)),
7081
                            0,
7082
                            $diff
7083
                        );
7084 9
                        $post = '';
7085
7086 9
                        break;
7087
7088 9
                    case \STR_PAD_BOTH:
7089 9
                        $diff = ($pad_length - $str_length);
7090 9
7091 9
                        $ps_length_left = (int) \floor($diff / 2);
7092
7093 9
                        $ps_length_right = (int) \ceil($diff / 2);
7094
7095
                        $pre = (string) \mb_substr(
7096 25
                            \str_repeat($pad_string, $ps_length_left),
7097
                            0,
7098
                            $ps_length_left
7099 3
                        );
7100
                        $post = (string) \mb_substr(
7101
                            \str_repeat($pad_string, $ps_length_right),
7102 15
                            0,
7103
                            $ps_length_right
7104 15
                        );
7105
7106 15
                        break;
7107 14
7108
                    case \STR_PAD_RIGHT:
7109 5
                    default:
7110
                        $ps_length = (int) \mb_strlen($pad_string);
7111 5
7112
                        $diff = ($pad_length - $str_length);
7113 5
7114 5
                        $post = (string) \mb_substr(
7115 5
                            \str_repeat($pad_string, (int) \ceil($diff / $ps_length)),
7116
                            0,
7117
                            $diff
7118
                        );
7119 5
                        $pre = '';
7120
                }
7121 5
7122
                return $pre . $str . $post;
7123
            }
7124 3
7125
            return $str;
7126 3
        }
7127
7128 3
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
7129
7130 3
        $str_length = (int) self::strlen($str, $encoding);
7131 3
7132 3
        if ($pad_length >= $str_length) {
7133
            switch ($pad_type) {
7134
                case \STR_PAD_LEFT:
7135
                    $ps_length = (int) self::strlen($pad_string, $encoding);
7136 3
7137 3
                    $diff = ($pad_length - $str_length);
7138 3
7139
                    $pre = (string) self::substr(
7140
                        \str_repeat($pad_string, (int) \ceil($diff / $ps_length)),
7141
                        0,
7142
                        $diff,
7143 3
                        $encoding
7144
                    );
7145
                    $post = '';
7146
7147 6
                    break;
7148
7149 6
                case \STR_PAD_BOTH:
7150
                    $diff = ($pad_length - $str_length);
7151 6
7152 6
                    $ps_length_left = (int) \floor($diff / 2);
7153 6
7154
                    $ps_length_right = (int) \ceil($diff / 2);
7155
7156
                    $pre = (string) self::substr(
7157 6
                        \str_repeat($pad_string, $ps_length_left),
7158
                        0,
7159
                        $ps_length_left,
7160 14
                        $encoding
7161
                    );
7162
                    $post = (string) self::substr(
7163 1
                        \str_repeat($pad_string, $ps_length_right),
7164
                        0,
7165
                        $ps_length_right,
7166
                        $encoding
7167
                    );
7168
7169
                    break;
7170
7171
                case \STR_PAD_RIGHT:
7172
                default:
7173
                    $ps_length = (int) self::strlen($pad_string, $encoding);
7174
7175
                    $diff = ($pad_length - $str_length);
7176
7177
                    $post = (string) self::substr(
7178
                        \str_repeat($pad_string, (int) \ceil($diff / $ps_length)),
7179
                        0,
7180 11
                        $diff,
7181
                        $encoding
7182
                    );
7183
                    $pre = '';
7184
            }
7185
7186 11
            return $pre . $str . $post;
7187 11
        }
7188 11
7189 11
        return $str;
7190 11
    }
7191 11
7192
    /**
7193
     * Returns a new string of a given length such that both sides of the
7194
     * string are padded. Alias for "UTF8::str_pad()" with a $pad_type of 'both'.
7195
     *
7196
     * @param string $str
7197
     * @param int    $length   <p>Desired string length after padding.</p>
7198
     * @param string $pad_str  [optional] <p>String used to pad, defaults to space. Default: ' '</p>
7199
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
7200
     *
7201
     * @psalm-pure
7202
     *
7203
     * @return string
7204
     *                <p>The string with padding applied.</p>
7205
     */
7206
    public static function str_pad_both(
7207
        string $str,
7208
        int $length,
7209 7
        string $pad_str = ' ',
7210
        string $encoding = 'UTF-8'
7211
    ): string {
7212
        return self::str_pad(
7213
            $str,
7214
            $length,
7215 7
            $pad_str,
7216 7
            \STR_PAD_BOTH,
7217 7
            $encoding
7218 7
        );
7219 7
    }
7220 7
7221
    /**
7222
     * Returns a new string of a given length such that the beginning of the
7223
     * string is padded. Alias for "UTF8::str_pad()" with a $pad_type of 'left'.
7224
     *
7225
     * @param string $str
7226
     * @param int    $length   <p>Desired string length after padding.</p>
7227
     * @param string $pad_str  [optional] <p>String used to pad, defaults to space. Default: ' '</p>
7228
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
7229
     *
7230
     * @psalm-pure
7231
     *
7232
     * @return string
7233
     *                <p>The string with left padding.</p>
7234
     */
7235
    public static function str_pad_left(
7236
        string $str,
7237
        int $length,
7238 7
        string $pad_str = ' ',
7239
        string $encoding = 'UTF-8'
7240
    ): string {
7241
        return self::str_pad(
7242
            $str,
7243
            $length,
7244 7
            $pad_str,
7245 7
            \STR_PAD_LEFT,
7246 7
            $encoding
7247 7
        );
7248 7
    }
7249 7
7250
    /**
7251
     * Returns a new string of a given length such that the end of the string
7252
     * is padded. Alias for "UTF8::str_pad()" with a $pad_type of 'right'.
7253
     *
7254
     * @param string $str
7255
     * @param int    $length   <p>Desired string length after padding.</p>
7256
     * @param string $pad_str  [optional] <p>String used to pad, defaults to space. Default: ' '</p>
7257
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
7258
     *
7259
     * @psalm-pure
7260
     *
7261
     * @return string
7262
     *                <p>The string with right padding.</p>
7263
     */
7264
    public static function str_pad_right(
7265
        string $str,
7266
        int $length,
7267
        string $pad_str = ' ',
7268
        string $encoding = 'UTF-8'
7269
    ): string {
7270
        return self::str_pad(
7271
            $str,
7272
            $length,
7273
            $pad_str,
7274
            \STR_PAD_RIGHT,
7275
            $encoding
7276 9
        );
7277
    }
7278 9
7279
    /**
7280 9
     * Repeat a string.
7281
     *
7282
     * EXAMPLE: <code>UTF8::str_repeat("°~\xf0\x90\x28\xbc", 2); // '°~ð(¼°~ð(¼'</code>
7283
     *
7284
     * @param string $str        <p>
7285
     *                           The string to be repeated.
7286
     *                           </p>
7287
     * @param int    $multiplier <p>
7288
     *                           Number of time the input string should be
7289
     *                           repeated.
7290
     *                           </p>
7291
     *                           <p>
7292
     *                           multiplier has to be greater than or equal to 0.
7293
     *                           If the multiplier is set to 0, the function
7294
     *                           will return an empty string.
7295
     *                           </p>
7296
     *
7297
     * @psalm-pure
7298
     *
7299
     * @return string
7300
     *                <p>The repeated string.</p>
7301
     */
7302
    public static function str_repeat(string $str, int $multiplier): string
7303
    {
7304
        $str = self::filter($str);
7305
7306
        return \str_repeat($str, $multiplier);
7307
    }
7308
7309
    /**
7310
     * INFO: This is only a wrapper for "str_replace()"  -> the original functions is already UTF-8 safe.
7311
     *
7312
     * Replace all occurrences of the search string with the replacement string
7313
     *
7314
     * @see http://php.net/manual/en/function.str-replace.php
7315
     *
7316
     * @param string|string[] $search  <p>
7317
     *                                 The value being searched for, otherwise known as the needle.
7318
     *                                 An array may be used to designate multiple needles.
7319
     *                                 </p>
7320
     * @param string|string[] $replace <p>
7321
     *                                 The replacement value that replaces found search
7322
     *                                 values. An array may be used to designate multiple replacements.
7323 12
     *                                 </p>
7324
     * @param string|string[] $subject <p>
7325
     *                                 The string or array of strings being searched and replaced on,
7326
     *                                 otherwise known as the haystack.
7327
     *                                 </p>
7328
     *                                 <p>
7329
     *                                 If subject is an array, then the search and
7330
     *                                 replace is performed with every entry of
7331
     *                                 subject, and the return value is an array as
7332
     *                                 well.
7333 12
     *                                 </p>
7334 12
     * @param int|null        $count   [optional] <p>
7335 12
     *                                 If passed, this will hold the number of matched and replaced needles.
7336 12
     *                                 </p>
7337 12
     *
7338
     * @psalm-pure
7339
     *
7340 12
     * @return string|string[]
7341
     *                         <p>This function returns a string or an array with the replaced values.</p>
7342
     *
7343
     * @template TStrReplaceSubject
7344
     * @phpstan-param TStrReplaceSubject $subject
7345
     * @phpstan-return TStrReplaceSubject
7346
     *
7347
     * @deprecated please use \str_replace() instead
7348
     */
7349
    public static function str_replace(
7350
        $search,
7351
        $replace,
7352
        $subject,
7353
        int &$count = null
7354
    ) {
7355 17
        /**
7356
         * @psalm-suppress PossiblyNullArgument
7357
         * @phpstan-var TStrReplaceSubject $return;
7358
         */
7359
        $return = \str_replace(
7360 17
            $search,
7361 4
            $replace,
7362 2
            $subject,
7363
            $count
7364
        );
7365 2
7366 2
        return $return;
7367
    }
7368
7369
    /**
7370 13
     * Replaces $search from the beginning of string with $replacement.
7371 2
     *
7372
     * @param string $str         <p>The input string.</p>
7373
     * @param string $search      <p>The string to search for.</p>
7374 11
     * @param string $replacement <p>The replacement.</p>
7375 11
     *
7376 9
     * @psalm-pure
7377
     *
7378
     * @return string
7379 2
     *                <p>A string after the replacements.</p>
7380
     */
7381
    public static function str_replace_beginning(
7382
        string $str,
7383
        string $search,
7384
        string $replacement
7385
    ): string {
7386
        if ($str === '') {
7387
            if ($replacement === '') {
7388
                return '';
7389
            }
7390
7391
            if ($search === '') {
7392
                return $replacement;
7393
            }
7394 17
        }
7395
7396
        if ($search === '') {
7397
            return $str . $replacement;
7398
        }
7399 17
7400 4
        $searchLength = \strlen($search);
7401 2
        if (\strncmp($str, $search, $searchLength) === 0) {
7402
            return $replacement . \substr($str, $searchLength);
7403
        }
7404 2
7405 2
        return $str;
7406
    }
7407
7408
    /**
7409 13
     * Replaces $search from the ending of string with $replacement.
7410 2
     *
7411
     * @param string $str         <p>The input string.</p>
7412
     * @param string $search      <p>The string to search for.</p>
7413 11
     * @param string $replacement <p>The replacement.</p>
7414 8
     *
7415
     * @psalm-pure
7416
     *
7417 11
     * @return string
7418
     *                <p>A string after the replacements.</p>
7419
     */
7420
    public static function str_replace_ending(
7421
        string $str,
7422
        string $search,
7423
        string $replacement
7424
    ): string {
7425
        if ($str === '') {
7426
            if ($replacement === '') {
7427
                return '';
7428
            }
7429
7430
            if ($search === '') {
7431
                return $replacement;
7432
            }
7433 2
        }
7434
7435
        if ($search === '') {
7436
            return $str . $replacement;
7437
        }
7438 2
7439
        if (\strpos($str, $search, \strlen($str) - \strlen($search)) !== false) {
7440 2
            $str = \substr($str, 0, -\strlen($search)) . $replacement;
7441
        }
7442
7443
        return $str;
7444 2
    }
7445 2
7446
    /**
7447
     * Replace the first "$search"-term with the "$replace"-term.
7448 2
     *
7449
     * @param string $search
7450
     * @param string $replace
7451
     * @param string $subject
7452
     *
7453
     * @psalm-pure
7454
     *
7455
     * @return string
7456
     *
7457
     * @psalm-suppress InvalidReturnType
7458
     */
7459
    public static function str_replace_first(
7460
        string $search,
7461
        string $replace,
7462
        string $subject
7463
    ): string {
7464
        $pos = self::strpos($subject, $search);
7465
7466
        if ($pos !== false) {
7467
            /**
7468 2
             * @psalm-suppress InvalidReturnStatement
7469
             */
7470
            return self::substr_replace(
7471
                $subject,
7472
                $replace,
7473 2
                $pos,
7474 2
                (int) self::strlen($search)
7475
            );
7476
        }
7477
7478 2
        return $subject;
7479 2
    }
7480
7481
    /**
7482 2
     * Replace the last "$search"-term with the "$replace"-term.
7483
     *
7484
     * @param string $search
7485
     * @param string $replace
7486
     * @param string $subject
7487
     *
7488
     * @psalm-pure
7489
     *
7490
     * @return string
7491
     *
7492
     * @psalm-suppress InvalidReturnType
7493
     */
7494
    public static function str_replace_last(
7495
        string $search,
7496
        string $replace,
7497
        string $subject
7498
    ): string {
7499
        $pos = self::strrpos($subject, $search);
7500
        if ($pos !== false) {
7501
            /**
7502 5
             * @psalm-suppress InvalidReturnStatement
7503
             */
7504 5
            return self::substr_replace(
7505 5
                $subject,
7506 5
                $replace,
7507
                $pos,
7508
                (int) self::strlen($search)
7509 5
            );
7510
        }
7511 5
7512 5
        return $subject;
7513 5
    }
7514 5
7515
    /**
7516
     * Shuffles all the characters in the string.
7517
     *
7518
     * INFO: uses random algorithm which is weak for cryptography purposes
7519
     *
7520
     * EXAMPLE: <code>UTF8::str_shuffle('fòô bàř fòô'); // 'àòôřb ffòô '</code>
7521
     *
7522
     * @param string $str      <p>The input string</p>
7523
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
7524
     *
7525
     * @return string
7526
     *                <p>The shuffled string.</p>
7527
     */
7528
    public static function str_shuffle(string $str, string $encoding = 'UTF-8'): string
7529
    {
7530
        if ($encoding === 'UTF-8') {
7531
            $indexes = \range(0, (int) \mb_strlen($str) - 1);
7532
            \shuffle($indexes);
7533
7534 5
            // init
7535
            $shuffled_str = '';
7536
7537
            foreach ($indexes as &$i) {
7538
                $tmp_sub_str = \mb_substr($str, $i, 1);
7539
                if ($tmp_sub_str !== false) {
7540
                    $shuffled_str .= $tmp_sub_str;
7541
                }
7542
            }
7543
        } else {
7544
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
7545
7546
            $indexes = \range(0, (int) self::strlen($str, $encoding) - 1);
7547
            \shuffle($indexes);
7548
7549
            // init
7550
            $shuffled_str = '';
7551
7552
            foreach ($indexes as &$i) {
7553
                $tmp_sub_str = self::substr($str, $i, 1, $encoding);
7554 18
                if ($tmp_sub_str !== false) {
7555
                    $shuffled_str .= $tmp_sub_str;
7556
                }
7557
            }
7558
        }
7559
7560 18
        return $shuffled_str;
7561 7
    }
7562 1
7563 6
    /**
7564 2
     * Returns the substring beginning at $start, and up to, but not including
7565 4
     * the index specified by $end. If $end is omitted, the function extracts
7566 1
     * the remaining string. If $end is negative, it is computed from the end
7567
     * of the string.
7568 3
     *
7569
     * @param string   $str
7570
     * @param int      $start    <p>Initial index from which to begin extraction.</p>
7571 5
     * @param int|null $end      [optional] <p>Index at which to end extraction. Default: null</p>
7572
     * @param string   $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
7573
     *
7574 11
     * @psalm-pure
7575
     *
7576 11
     * @return false|string
7577 5
     *                      <p>The extracted substring.</p><p>If <i>str</i> is shorter than <i>start</i>
7578 6
     *                      characters long, <b>FALSE</b> will be returned.
7579 2
     */
7580 4
    public static function str_slice(
7581 1
        string $str,
7582
        int $start,
7583 3
        int $end = null,
7584
        string $encoding = 'UTF-8'
7585
    ) {
7586 9
        if ($encoding === 'UTF-8') {
7587
            if ($end === null) {
7588
                $length = (int) \mb_strlen($str);
7589
            } elseif ($end >= 0 && $end <= $start) {
7590
                return '';
7591
            } elseif ($end < 0) {
7592
                $length = (int) \mb_strlen($str) + $end - $start;
7593
            } else {
7594
                $length = $end - $start;
7595
            }
7596
7597
            return \mb_substr($str, $start, $length);
7598
        }
7599
7600 22
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
7601
7602 22
        if ($end === null) {
7603
            $length = (int) self::strlen($str, $encoding);
7604
        } elseif ($end >= 0 && $end <= $start) {
7605
            return '';
7606 22
        } elseif ($end < 0) {
7607 22
            $length = (int) self::strlen($str, $encoding) + $end - $start;
7608 22
        } else {
7609 22
            $length = $end - $start;
7610
        }
7611
7612 22
        return self::substr($str, $start, $length, $encoding);
7613 19
    }
7614
7615
    /**
7616 22
     * Convert a string to e.g.: "snake_case"
7617 22
     *
7618
     * @param string $str
7619
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
7620
     *
7621
     * @psalm-pure
7622
     *
7623
     * @return string
7624
     *                <p>A string in snake_case.</p>
7625 22
     */
7626 9
    public static function str_snakeize(string $str, string $encoding = 'UTF-8'): string
7627 9
    {
7628
        if ($str === '') {
7629 9
            return '';
7630 4
        }
7631
7632
        $str = \str_replace(
7633 5
            '-',
7634 5
            '_',
7635
            self::normalize_whitespace($str)
7636
        );
7637
7638 22
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
7639 22
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
7640
        }
7641
7642 22
        $str = (string) \preg_replace_callback(
7643
            '/([\\p{N}|\\p{Lu}])/u',
7644 22
            /**
7645
             * @param string[] $matches
7646
             *
7647
             * @psalm-pure
7648
             *
7649 22
             * @return string
7650
             */
7651
            static function (array $matches) use ($encoding): string {
7652
                $match = $matches[1];
7653 22
                $match_int = (int) $match;
7654
7655
                if ((string) $match_int === $match) {
7656 22
                    return '_' . $match . '_';
7657
                }
7658
7659
                if ($encoding === 'UTF-8') {
7660
                    return '_' . \mb_strtolower($match);
7661
                }
7662
7663
                return '_' . self::strtolower($match, $encoding);
7664
            },
7665
            $str
7666
        );
7667
7668
        $str = (string) \preg_replace(
7669
            [
7670
                '/\\s+/u',           // convert spaces to "_"
7671
                '/^\\s+|\\s+$/u', // trim leading & trailing spaces
7672
                '/_+/',                 // remove double "_"
7673 2
            ],
7674
            [
7675
                '_',
7676 2
                '',
7677
                '_',
7678 2
            ],
7679 2
            $str
7680
        );
7681
7682 2
        return \trim(\trim($str, '_')); // trim leading & trailing "_" + whitespace
7683 2
    }
7684
7685 2
    /**
7686
     * Sort all characters according to code points.
7687
     *
7688 2
     * EXAMPLE: <code>UTF8::str_sort('  -ABC-中文空白-  '); // '    ---ABC中文白空'</code>
7689
     *
7690
     * @param string $str    <p>A UTF-8 string.</p>
7691
     * @param bool   $unique <p>Sort unique. If <strong>true</strong>, repeated characters are ignored.</p>
7692
     * @param bool   $desc   <p>If <strong>true</strong>, will sort characters in reverse code point order.</p>
7693
     *
7694
     * @psalm-pure
7695
     *
7696
     * @return string
7697
     *                <p>A string of sorted characters.</p>
7698
     */
7699
    public static function str_sort(string $str, bool $unique = false, bool $desc = false): string
7700
    {
7701
        /** @var int[] $array */
7702
        $array = self::codepoints($str);
7703
7704
        if ($unique) {
7705
            $array = \array_flip(\array_flip($array));
7706
        }
7707
7708
        if ($desc) {
7709
            \arsort($array);
7710
        } else {
7711 1
            \asort($array);
7712
        }
7713
7714
        return self::string($array);
7715
    }
7716
7717 1
    /**
7718 1
     * Convert a string to an array of Unicode characters.
7719 1
     *
7720
     * EXAMPLE: <code>
7721
     * UTF8::str_split_array(['中文空白', 'test'], 2); // [['中文', '空白'], ['te', 'st']]
7722
     * </code>
7723
     *
7724
     * @param int[]|string[] $input                   <p>The string[] or int[] to split into array.</p>
7725
     * @param int            $length                  [optional] <p>Max character length of each array
7726
     *                                                lement.</p>
7727 1
     * @param bool           $clean_utf8              [optional] <p>Remove non UTF-8 chars from the
7728
     *                                                string.</p>
7729
     * @param bool           $try_to_use_mb_functions [optional] <p>Set to false, if you don't want to use
7730
     *                                                "mb_substr"</p>
7731
     *
7732
     * @psalm-pure
7733
     *
7734
     * @return string[][]
7735
     *                    <p>An array containing chunks of the input.</p>
7736
     */
7737
    public static function str_split_array(
7738
        array $input,
7739
        int $length = 1,
7740
        bool $clean_utf8 = false,
7741
        bool $try_to_use_mb_functions = true
7742
    ): array {
7743
        foreach ($input as &$v) {
7744
            $v = self::str_split(
7745
                $v,
7746
                $length,
7747
                $clean_utf8,
7748 96
                $try_to_use_mb_functions
7749
            );
7750
        }
7751
7752
        /** @var string[][] $input */
7753
        return $input;
7754 96
    }
7755 3
7756
    /**
7757
     * Convert a string to an array of unicode characters.
7758
     *
7759
     * EXAMPLE: <code>UTF8::str_split('中文空白'); // array('中', '文', '空', '白')</code>
7760
     *
7761 95
     * @param int|string $input                   <p>The string or int to split into array.</p>
7762 95
     * @param int        $length                  [optional] <p>Max character length of each array
7763
     *                                            element.</p>
7764
     * @param bool       $clean_utf8              [optional] <p>Remove non UTF-8 chars from the
7765
     *                                            string.</p>
7766
     * @param bool       $try_to_use_mb_functions [optional] <p>Set to false, if you don't want to use
7767
     *                                            "mb_substr"</p>
7768
     *
7769
     * @psalm-pure
7770
     *
7771
     * @return string[]
7772
     *                  <p>An array containing chunks of chars from the input.</p>
7773
     */
7774 95
    public static function str_split(
7775
        $input,
7776 95
        int $length = 1,
7777 14
        bool $clean_utf8 = false,
7778
        bool $try_to_use_mb_functions = true
7779
    ): array {
7780 92
        if ($length <= 0) {
7781 25
            return [];
7782
        }
7783
7784
        // this is only an old fallback
7785 92
        /** @noinspection PhpSillyAssignmentInspection - hack for phpstan */
7786
        /** @var int|int[]|string|string[] $input */
7787 92
        $input = $input;
7788
        if (\is_array($input)) {
7789 87
            /** @psalm-suppress InvalidReturnStatement */
7790
            /** @phpstan-ignore-next-line - old code :/ */
7791
            return self::str_split_array(
0 ignored issues
show
Bug Best Practice introduced by
The expression return self::str_split_a...ry_to_use_mb_functions) returns the type array<mixed,string[]> which is incompatible with the documented return type string[].
Loading history...
7792
                $input,
7793 87
                $length,
7794 87
                $clean_utf8,
7795 87
                $try_to_use_mb_functions
7796
            );
7797
        }
7798
7799
        // init
7800
        $input = (string) $input;
7801
7802
        if ($input === '') {
7803
            return [];
7804
        }
7805
7806
        if ($clean_utf8) {
7807
            $input = self::clean($input);
7808
        }
7809
7810 29
        if (
7811 22
            $try_to_use_mb_functions
7812 22
            &&
7813 22
            self::$SUPPORT['mbstring'] === true
7814
        ) {
7815
            if (\function_exists('mb_str_split')) {
7816
                /**
7817
                 * @psalm-suppress ImpureFunctionCall - why?
7818 9
                 */
7819 9
                $return = \mb_str_split($input, $length);
7820
                if ($return !== false) {
7821 9
                    return $return;
7822 9
                }
7823 9
            }
7824
7825 8
            $i_max = \mb_strlen($input);
7826
            if ($i_max <= 127) {
7827 8
                $ret = [];
7828
                for ($i = 0; $i < $i_max; ++$i) {
7829 4
                    $ret[] = \mb_substr($input, $i, 1);
7830 4
                }
7831
            } else {
7832 4
                $return_array = [];
7833
                \preg_match_all('/./us', $input, $return_array);
7834
                $ret = $return_array[0] ?? [];
7835 6
            }
7836
        } elseif (self::$SUPPORT['pcre_utf8'] === true) {
7837 6
            $return_array = [];
7838
            \preg_match_all('/./us', $input, $return_array);
7839
            $ret = $return_array[0] ?? [];
7840 6
        } else {
7841
7842 6
            // fallback
7843
7844 6
            $ret = [];
7845
            $len = \strlen($input);
7846 6
7847
            for ($i = 0; $i < $len; ++$i) {
7848
                if (($input[$i] & "\x80") === "\x00") {
7849
                    $ret[] = $input[$i];
7850
                } elseif (
7851
                    isset($input[$i + 1])
7852
                    &&
7853
                    ($input[$i] & "\xE0") === "\xC0"
7854
                ) {
7855
                    if (($input[$i + 1] & "\xC0") === "\x80") {
7856
                        $ret[] = $input[$i] . $input[$i + 1];
7857
7858
                        ++$i;
7859
                    }
7860
                } elseif (
7861
                    isset($input[$i + 2])
7862
                    &&
7863
                    ($input[$i] & "\xF0") === "\xE0"
7864
                ) {
7865
                    if (
7866
                        ($input[$i + 1] & "\xC0") === "\x80"
7867
                        &&
7868 29
                        ($input[$i + 2] & "\xC0") === "\x80"
7869 2
                    ) {
7870 2
                        $ret[] = $input[$i] . $input[$i + 1] . $input[$i + 2];
7871 2
7872 2
                        $i += 2;
7873 2
                    }
7874
                } elseif (
7875
                    isset($input[$i + 3])
7876
                    &&
7877 29
                    ($input[$i] & "\xF8") === "\xF0"
7878
                ) {
7879
                    if (
7880
                        ($input[$i + 1] & "\xC0") === "\x80"
7881 29
                        &&
7882
                        ($input[$i + 2] & "\xC0") === "\x80"
7883
                        &&
7884
                        ($input[$i + 3] & "\xC0") === "\x80"
7885
                    ) {
7886
                        $ret[] = $input[$i] . $input[$i + 1] . $input[$i + 2] . $input[$i + 3];
7887
7888
                        $i += 3;
7889
                    }
7890
                }
7891
            }
7892
        }
7893
7894
        if ($length > 1) {
7895
            return \array_map(
7896
                static function (array $item): string {
7897
                    return \implode('', $item);
7898 16
                },
7899
                \array_chunk($ret, $length)
7900 16
            );
7901 2
        }
7902
7903
        if (isset($ret[0]) && $ret[0] === '') {
7904 14
            return [];
7905 1
        }
7906
7907
        return $ret;
7908 13
    }
7909 13
7910 8
    /**
7911 8
     * Splits the string with the provided regular expression, returning an
7912
     * array of strings. An optional integer $limit will truncate the
7913
     * results.
7914
     *
7915 8
     * @param string $str
7916 8
     * @param string $pattern <p>The regex with which to split the string.</p>
7917 8
     * @param int    $limit   [optional] <p>Maximum number of results to return. Default: -1 === no limit</p>
7918 4
     *
7919
     * @psalm-pure
7920 8
     *
7921
     * @return string[]
7922 8
     *                  <p>An array of strings.</p>
7923
     */
7924
    public static function str_split_pattern(string $str, string $pattern, int $limit = -1): array
7925 8
    {
7926
        if ($limit === 0) {
7927
            return [];
7928 5
        }
7929 5
7930
        if ($pattern === '') {
7931
            return [$str];
7932
        }
7933 5
7934
        if (self::$SUPPORT['mbstring'] === true) {
7935
            if ($limit >= 0) {
7936
                $result_tmp = \mb_split($pattern, $str);
7937
                if ($result_tmp === false) {
7938
                    return [];
7939
                }
7940
7941
                $result = [];
7942
                foreach ($result_tmp as $item_tmp) {
7943
                    if ($limit === 0) {
7944
                        break;
7945
                    }
7946
                    --$limit;
7947
7948
                    $result[] = $item_tmp;
7949
                }
7950
7951
                return $result;
7952
            }
7953
7954
            $result = \mb_split($pattern, $str);
7955
            if ($result === false) {
7956
                return [];
7957
            }
7958
7959
            return $result;
7960
        }
7961
7962
        if ($limit > 0) {
7963
            ++$limit;
7964
        } else {
7965
            $limit = -1;
7966
        }
7967
7968
        $array = \preg_split('/' . \preg_quote($pattern, '/') . '/u', $str, $limit);
7969 19
        if ($array === false) {
7970
            return [];
7971 19
        }
7972 2
7973
        if ($limit > 0 && \count($array) === $limit) {
7974
            \array_pop($array);
7975 19
        }
7976 1
7977
        return $array;
7978
    }
7979 19
7980
    /**
7981 19
     * Check if the string starts with the given substring.
7982
     *
7983
     * EXAMPLE: <code>
7984
     * UTF8::str_starts_with('ΚόσμεMiddleEnd', 'Κόσμε'); // true
7985
     * UTF8::str_starts_with('ΚόσμεMiddleEnd', 'κόσμε'); // false
7986
     * </code>
7987
     *
7988
     * @param string $haystack <p>The string to search in.</p>
7989
     * @param string $needle   <p>The substring to search for.</p>
7990
     *
7991
     * @psalm-pure
7992
     *
7993
     * @return bool
7994
     */
7995
    public static function str_starts_with(string $haystack, string $needle): bool
7996
    {
7997
        if ($needle === '') {
7998
            return true;
7999
        }
8000 8
8001
        if ($haystack === '') {
8002 8
            return false;
8003
        }
8004
8005
        if (\PHP_VERSION_ID >= 80000) {
8006 8
            /** @phpstan-ignore-next-line - only for PHP8 */
8007
            return \str_starts_with($haystack, $needle);
8008
        }
8009
8010 8
        return \strncmp($haystack, $needle, \strlen($needle)) === 0;
8011 8
    }
8012 2
8013
    /**
8014
     * Returns true if the string begins with any of $substrings, false otherwise.
8015
     *
8016 6
     * - case-sensitive
8017
     *
8018
     * @param string $str        <p>The input string.</p>
8019
     * @param array  $substrings <p>Substrings to look for.</p>
8020
     *
8021
     * @psalm-pure
8022
     *
8023
     * @return bool
8024
     *              <p>Whether or not $str starts with $substring.</p>
8025
     */
8026
    public static function str_starts_with_any(string $str, array $substrings): bool
8027
    {
8028
        if ($str === '') {
8029
            return false;
8030 1
        }
8031
8032 1
        if ($substrings === []) {
8033 1
            return false;
8034
        }
8035
8036 1
        foreach ($substrings as &$substring) {
8037 1
            if (self::str_starts_with($str, $substring)) {
8038 1
                return true;
8039 1
            }
8040
        }
8041
8042 1
        return false;
8043 1
    }
8044 1
8045
    /**
8046
     * Gets the substring after the first occurrence of a separator.
8047
     *
8048
     * @param string $str       <p>The input string.</p>
8049
     * @param string $separator <p>The string separator.</p>
8050
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
8051
     *
8052
     * @psalm-pure
8053
     *
8054
     * @return string
8055
     */
8056
    public static function str_substr_after_first_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
8057
    {
8058
        if ($separator === '' || $str === '') {
8059
            return '';
8060
        }
8061
8062
        if ($encoding === 'UTF-8') {
8063
            $offset = \mb_strpos($str, $separator);
8064
            if ($offset === false) {
8065
                return '';
8066
            }
8067
8068
            return (string) \mb_substr(
8069
                $str,
8070
                $offset + (int) \mb_strlen($separator)
8071
            );
8072 1
        }
8073
8074
        $offset = self::strpos($str, $separator, 0, $encoding);
8075
        if ($offset === false) {
8076
            return '';
8077 1
        }
8078 1
8079
        return (string) \mb_substr(
8080
            $str,
8081 1
            $offset + (int) self::strlen($separator, $encoding),
8082 1
            null,
8083 1
            $encoding
8084 1
        );
8085
    }
8086
8087 1
    /**
8088 1
     * Gets the substring after the last occurrence of a separator.
8089 1
     *
8090
     * @param string $str       <p>The input string.</p>
8091
     * @param string $separator <p>The string separator.</p>
8092
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
8093
     *
8094
     * @psalm-pure
8095
     *
8096
     * @return string
8097
     */
8098
    public static function str_substr_after_last_separator(
8099
        string $str,
8100
        string $separator,
8101
        string $encoding = 'UTF-8'
8102
    ): string {
8103
        if ($separator === '' || $str === '') {
8104
            return '';
8105
        }
8106
8107
        if ($encoding === 'UTF-8') {
8108
            $offset = \mb_strrpos($str, $separator);
8109
            if ($offset === false) {
8110
                return '';
8111
            }
8112
8113
            return (string) \mb_substr(
8114
                $str,
8115
                $offset + (int) \mb_strlen($separator)
8116
            );
8117 1
        }
8118
8119
        $offset = self::strrpos($str, $separator, 0, $encoding);
8120
        if ($offset === false) {
8121
            return '';
8122 1
        }
8123 1
8124
        return (string) self::substr(
8125
            $str,
8126 1
            $offset + (int) self::strlen($separator, $encoding),
8127 1
            null,
8128 1
            $encoding
8129 1
        );
8130
    }
8131
8132 1
    /**
8133 1
     * Gets the substring before the first occurrence of a separator.
8134 1
     *
8135 1
     * @param string $str       <p>The input string.</p>
8136
     * @param string $separator <p>The string separator.</p>
8137
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
8138
     *
8139
     * @psalm-pure
8140
     *
8141
     * @return string
8142
     */
8143
    public static function str_substr_before_first_separator(
8144
        string $str,
8145
        string $separator,
8146
        string $encoding = 'UTF-8'
8147
    ): string {
8148
        if ($separator === '' || $str === '') {
8149
            return '';
8150
        }
8151
8152
        if ($encoding === 'UTF-8') {
8153
            $offset = \mb_strpos($str, $separator);
8154
            if ($offset === false) {
8155
                return '';
8156
            }
8157
8158
            return (string) \mb_substr(
8159
                $str,
8160
                0,
8161
                $offset
8162
            );
8163 1
        }
8164
8165 1
        $offset = self::strpos($str, $separator, 0, $encoding);
8166 1
        if ($offset === false) {
8167
            return '';
8168
        }
8169 1
8170 1
        return (string) self::substr(
8171 1
            $str,
8172 1
            0,
8173
            $offset,
8174
            $encoding
8175 1
        );
8176 1
    }
8177 1
8178 1
    /**
8179
     * Gets the substring before the last occurrence of a separator.
8180
     *
8181
     * @param string $str       <p>The input string.</p>
8182
     * @param string $separator <p>The string separator.</p>
8183
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
8184
     *
8185
     * @psalm-pure
8186
     *
8187
     * @return string
8188
     */
8189
    public static function str_substr_before_last_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
8190
    {
8191
        if ($separator === '' || $str === '') {
8192
            return '';
8193
        }
8194
8195
        if ($encoding === 'UTF-8') {
8196
            $offset = \mb_strrpos($str, $separator);
8197
            if ($offset === false) {
8198
                return '';
8199
            }
8200
8201
            return (string) \mb_substr(
8202
                $str,
8203
                0,
8204
                $offset
8205
            );
8206
        }
8207
8208
        $offset = self::strrpos($str, $separator, 0, $encoding);
8209 2
        if ($offset === false) {
8210
            return '';
8211
        }
8212
8213
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
8214
8215 2
        return (string) self::substr(
8216 2
            $str,
8217
            0,
8218
            $offset,
8219 2
            $encoding
8220 2
        );
8221 1
    }
8222 1
8223 1
    /**
8224 1
     * Gets the substring after (or before via "$before_needle") the first occurrence of the "$needle".
8225
     *
8226
     * @param string $str           <p>The input string.</p>
8227 2
     * @param string $needle        <p>The string to look for.</p>
8228 1
     * @param bool   $before_needle [optional] <p>Default: false</p>
8229 1
     * @param string $encoding      [optional] <p>Default: 'UTF-8'</p>
8230
     *
8231
     * @psalm-pure
8232
     *
8233
     * @return string
8234
     */
8235
    public static function str_substr_first(
8236
        string $str,
8237
        string $needle,
8238
        bool $before_needle = false,
8239
        string $encoding = 'UTF-8'
8240
    ): string {
8241 2
        if ($str === '' || $needle === '') {
8242
            return '';
8243
        }
8244
8245
        if ($encoding === 'UTF-8') {
8246
            if ($before_needle) {
8247
                $part = \mb_strstr(
8248
                    $str,
8249
                    $needle,
8250
                    $before_needle
8251
                );
8252
            } else {
8253
                $part = \mb_strstr(
8254
                    $str,
8255
                    $needle
8256 2
                );
8257
            }
8258
        } else {
8259
            $part = self::strstr(
8260
                $str,
8261
                $needle,
8262 2
                $before_needle,
8263 2
                $encoding
8264
            );
8265
        }
8266 2
8267 2
        return $part === false ? '' : $part;
8268 1
    }
8269 1
8270 1
    /**
8271 1
     * Gets the substring after (or before via "$before_needle") the last occurrence of the "$needle".
8272
     *
8273
     * @param string $str           <p>The input string.</p>
8274 2
     * @param string $needle        <p>The string to look for.</p>
8275 1
     * @param bool   $before_needle [optional] <p>Default: false</p>
8276 1
     * @param string $encoding      [optional] <p>Default: 'UTF-8'</p>
8277
     *
8278
     * @psalm-pure
8279
     *
8280
     * @return string
8281
     */
8282
    public static function str_substr_last(
8283
        string $str,
8284
        string $needle,
8285
        bool $before_needle = false,
8286
        string $encoding = 'UTF-8'
8287
    ): string {
8288 2
        if ($str === '' || $needle === '') {
8289
            return '';
8290
        }
8291
8292
        if ($encoding === 'UTF-8') {
8293
            if ($before_needle) {
8294
                $part = \mb_strrchr(
8295
                    $str,
8296
                    $needle,
8297
                    $before_needle
8298
                );
8299
            } else {
8300
                $part = \mb_strrchr(
8301
                    $str,
8302 5
                    $needle
8303
                );
8304 5
            }
8305
        } else {
8306
            $part = self::strrchr(
8307
                $str,
8308
                $needle,
8309
                $before_needle,
8310
                $encoding
8311
            );
8312
        }
8313
8314
        return $part === false ? '' : $part;
8315
    }
8316
8317
    /**
8318
     * Surrounds $str with the given substring.
8319
     *
8320
     * @param string $str
8321
     * @param string $substring <p>The substring to add to both sides.</p>
8322
     *
8323
     * @psalm-pure
8324
     *
8325
     * @return string
8326
     *                <p>A string with the substring both prepended and appended.</p>
8327
     */
8328
    public static function str_surround(string $str, string $substring): string
8329
    {
8330
        return $substring . $str . $substring;
8331
    }
8332 10
8333
    /**
8334
     * Returns a trimmed string with the first letter of each word capitalized.
8335
     * Also accepts an array, $ignore, allowing you to list words not to be
8336
     * capitalized.
8337
     *
8338
     * @param string              $str
8339
     * @param array|string[]|null $ignore                        [optional] <p>An array of words not to capitalize or
8340
     *                                                           null. Default: null</p>
8341
     * @param string              $encoding                      [optional] <p>Default: 'UTF-8'</p>
8342 10
     * @param bool                $clean_utf8                    [optional] <p>Remove non UTF-8 chars from the
8343
     *                                                           string.</p>
8344
     * @param string|null         $lang                          [optional] <p>Set the language for special cases: az,
8345
     *                                                           el, lt, tr</p>
8346 10
     * @param bool                $try_to_keep_the_string_length [optional] <p>true === try to keep the string length:
8347 9
     *                                                           e.g. ẞ -> ß</p>
8348
     * @param bool                $use_trim_first                [optional] <p>true === trim the input string,
8349
     *                                                           first</p>
8350 10
     * @param string|null         $word_define_chars             [optional] <p>An string of chars that will be used as
8351 10
     *                                                           whitespace separator === words.</p>
8352
     *
8353
     * @psalm-pure
8354 10
     *
8355
     * @return string
8356
     *                <p>The titleized string.</p>
8357
     */
8358 10
    public static function str_titleize(
8359
        string $str,
8360 10
        array $ignore = null,
8361 4
        string $encoding = 'UTF-8',
8362
        bool $clean_utf8 = false,
8363 6
        string $lang = null,
8364
        bool $try_to_keep_the_string_length = false,
8365
        bool $use_trim_first = true,
8366 10
        string $word_define_chars = null
8367 10
    ): string {
8368 10
        if ($str === '') {
8369 10
            return '';
8370 4
        }
8371
8372
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
8373 10
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
8374 10
        }
8375 10
8376 10
        if ($use_trim_first) {
8377
            $str = \trim($str);
8378
        }
8379
8380
        if ($clean_utf8) {
8381
            $str = self::clean($str);
8382
        }
8383
8384
        $use_mb_functions = $lang === null && !$try_to_keep_the_string_length;
8385
8386
        if ($word_define_chars) {
8387
            $word_define_chars = \preg_quote($word_define_chars, '/');
8388
        } else {
8389
            $word_define_chars = '';
8390
        }
8391
8392
        $str = (string) \preg_replace_callback(
8393
            '/([^\\s' . $word_define_chars . ']+)/u',
8394
            static function (array $match) use ($try_to_keep_the_string_length, $lang, $ignore, $use_mb_functions, $encoding): string {
8395
                if ($ignore !== null && \in_array($match[0], $ignore, true)) {
8396 10
                    return $match[0];
8397 10
                }
8398
8399
                if ($use_mb_functions) {
8400 10
                    if ($encoding === 'UTF-8') {
8401
                        return \mb_strtoupper(\mb_substr($match[0], 0, 1))
8402
                               . \mb_strtolower(\mb_substr($match[0], 1));
8403
                    }
8404
8405
                    return \mb_strtoupper(\mb_substr($match[0], 0, 1, $encoding), $encoding)
8406
                           . \mb_strtolower(\mb_substr($match[0], 1, null, $encoding), $encoding);
8407
                }
8408
8409
                return self::ucfirst(
8410
                    self::strtolower(
8411
                        $match[0],
8412
                        $encoding,
8413
                        false,
8414
                        $lang,
8415
                        $try_to_keep_the_string_length
8416
                    ),
8417
                    $encoding,
8418
                    false,
8419
                    $lang,
8420
                    $try_to_keep_the_string_length
8421 1
                );
8422
            },
8423
            $str
8424
        );
8425
8426
        return $str;
8427 1
    }
8428 1
8429
    /**
8430 1
     * Convert a string into a obfuscate string.
8431 1
     *
8432 1
     * EXAMPLE: <code>
8433 1
     *
8434 1
     * UTF8::str_obfuscate('[email protected]', 0.5, '*', ['@', '.']); // e.g. "l***@m**lleke*.*r*"
8435
     * </code>
8436 1
     *
8437 1
     * @param string   $str
8438 1
     * @param float    $percent
8439 1
     * @param string   $obfuscateChar
8440
     * @param string[] $keepChars
8441
     *
8442 1
     * @psalm-pure
8443 1
     *
8444
     * @return string
8445
     *                <p>The obfuscate string.</p>
8446 1
     */
8447
    public static function str_obfuscate(
8448
        string $str,
8449
        float $percent = 0.5,
8450 1
        string $obfuscateChar = '*',
8451 1
        array $keepChars = []
8452
    ): string {
8453 1
        $obfuscateCharHelper = "\u{2603}";
8454
        $str = \str_replace($obfuscateChar, $obfuscateCharHelper, $str);
8455
8456
        $chars = self::chars($str);
8457 1
        $charsMax = \count($chars);
8458 1
        $charsMaxChange = \round($charsMax * $percent);
8459
        $charsCounter = 0;
8460
        $charKeyDone = [];
8461 1
8462
        while ($charsCounter < $charsMaxChange) {
8463
            foreach ($chars as $charKey => $char) {
8464
                if (isset($charKeyDone[$charKey])) {
8465 1
                    continue;
8466
                }
8467 1
8468
                if (\random_int(0, 100) > 50) {
8469
                    continue;
8470
                }
8471
8472
                if ($char === $obfuscateChar) {
8473
                    continue;
8474
                }
8475
8476
                ++$charsCounter;
8477
                $charKeyDone[$charKey] = true;
8478
8479
                if ($charsCounter > $charsMaxChange) {
8480
                    break;
8481
                }
8482
8483
                if (\in_array($char, $keepChars, true)) {
8484
                    continue;
8485
                }
8486
8487
                $chars[$charKey] = $obfuscateChar;
8488
            }
8489 35
        }
8490
8491
        $str = \implode('', $chars);
8492
8493
        return \str_replace($obfuscateCharHelper, $obfuscateChar, $str);
8494 35
    }
8495
8496
    /**
8497
     * Returns a trimmed string in proper title case.
8498 35
     *
8499
     * Also accepts an array, $ignore, allowing you to list words not to be
8500
     * capitalized.
8501
     *
8502
     * Adapted from John Gruber's script.
8503
     *
8504
     * @see https://gist.github.com/gruber/9f9e8650d68b13ce4d78
8505
     *
8506
     * @param string $str
8507
     * @param array  $ignore   <p>An array of words not to capitalize.</p>
8508
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
8509
     *
8510
     * @psalm-pure
8511
     *
8512
     * @return string
8513
     *                <p>The titleized string.</p>
8514
     */
8515
    public static function str_titleize_for_humans(
8516
        string $str,
8517
        array $ignore = [],
8518
        string $encoding = 'UTF-8'
8519
    ): string {
8520 35
        if ($str === '') {
8521 1
            return '';
8522
        }
8523
8524 35
        $small_words = [
8525 35
            '(?<!q&)a',
8526
            'an',
8527 35
            'and',
8528
            'as',
8529 35
            'at(?!&t)',
8530 2
            'but',
8531
            'by',
8532
            'en',
8533
            'for',
8534 35
            'if',
8535
            'in',
8536
            'of',
8537 35
            'on',
8538
            'or',
8539 35
            'the',
8540
            'to',
8541 35
            'v[.]?',
8542
            'via',
8543 35
            'vs[.]?',
8544
        ];
8545
8546
        if ($ignore !== []) {
8547
            $small_words = \array_merge($small_words, $ignore);
8548
        }
8549
8550
        $small_words_rx = \implode('|', $small_words);
8551
        $apostrophe_rx = '(?x: [\'’] [[:lower:]]* )?';
8552
8553 35
        $str = \trim($str);
8554
8555 35
        if (!self::has_lowercase($str)) {
8556 35
            $str = self::strtolower($str, $encoding);
8557
        }
8558 5
8559 35
        // the main substitutions
8560
        $str = (string) \preg_replace_callback(
8561 25
            '~\\b (_*) (?:                                                           # 1. Leading underscore and
8562 35
                        ( (?<=[ ][/\\\\]) [[:alpha:]]+ [-_[:alpha:]/\\\\]+ |                # 2. file path or 
8563
                          [-_[:alpha:]]+ [@.:] [-_[:alpha:]@.:/]+ ' . $apostrophe_rx . ' )  #    URL, domain, or email
8564 34
                        |
8565
                        ( (?i: ' . $small_words_rx . ' ) ' . $apostrophe_rx . ' )           # 3. or small word (case-insensitive)
8566
                        |
8567 7
                        ( [[:alpha:]] [[:lower:]\'’()\[\]{}]* ' . $apostrophe_rx . ' )     # 4. or word w/o internal caps
8568
                        |
8569
                        ( [[:alpha:]] [[:alpha:]\'’()\[\]{}]* ' . $apostrophe_rx . ' )     # 5. or some other word
8570 35
                      ) (_*) \\b                                                          # 6. With trailing underscore
8571
                    ~ux',
8572 35
            /**
8573 35
             * @param string[] $matches
8574 35
             *
8575
             * @psalm-pure
8576
             *
8577
             * @return string
8578 35
             */
8579
            static function (array $matches) use ($encoding): string {
8580
                // preserve leading underscore
8581
                $str = $matches[1];
8582 35
                if ($matches[2]) {
8583
                    // preserve URLs, domains, emails and file paths
8584
                    $str .= $matches[2];
8585
                } elseif ($matches[3]) {
8586
                    // lower-case small words
8587
                    $str .= self::strtolower($matches[3], $encoding);
8588
                } elseif ($matches[4]) {
8589
                    // capitalize word w/o internal caps
8590
                    $str .= static::ucfirst($matches[4], $encoding);
8591 35
                } else {
8592 11
                    // preserve other kinds of word (iPhone)
8593 35
                    $str .= $matches[5];
8594 35
                }
8595
                // preserve trailing underscore
8596
                $str .= $matches[6];
8597
8598 35
                return $str;
8599 35
            },
8600
            $str
8601
        );
8602
8603
        // Exceptions for small words: capitalize at start of title...
8604
        $str = (string) \preg_replace_callback(
8605
            '~(  \\A [[:punct:]]*            # start of title...
8606
                      |  [:.;?!][ ]+                # or of subsentence...
8607
                      |  [ ][\'"“‘(\[][ ]* )        # or of inserted subphrase...
8608
                      ( ' . $small_words_rx . ' ) \\b # ...followed by small word
8609
                     ~uxi',
8610 35
            /**
8611 3
             * @param string[] $matches
8612 35
             *
8613 35
             * @psalm-pure
8614
             *
8615
             * @return string
8616
             */
8617
            static function (array $matches) use ($encoding): string {
8618 35
                return $matches[1] . static::ucfirst($matches[2], $encoding);
8619
            },
8620
            $str
8621 35
        );
8622
8623
        // ...and end of title
8624
        $str = (string) \preg_replace_callback(
8625
            '~\\b ( ' . $small_words_rx . ' ) # small word...
8626
                      (?= [[:punct:]]* \Z          # ...at the end of the title...
8627
                      |   [\'"’”)\]] [ ] )         # ...or of an inserted subphrase?
8628
                     ~uxi',
8629
            /**
8630
             * @param string[] $matches
8631 35
             *
8632
             * @psalm-pure
8633 35
             *
8634 35
             * @return string
8635
             */
8636
            static function (array $matches) use ($encoding): string {
8637
                return static::ucfirst($matches[1], $encoding);
8638 35
            },
8639
            $str
8640
        );
8641
8642 35
        // Exceptions for small words in hyphenated compound words.
8643
        // e.g. "in-flight" -> In-Flight
8644
        $str = (string) \preg_replace_callback(
8645
            '~\\b
8646
                        (?<! -)                   # Negative lookbehind for a hyphen; we do not want to match man-in-the-middle but do want (in-flight)
8647
                        ( ' . $small_words_rx . ' )
8648
                        (?= -[[:alpha:]]+)        # lookahead for "-someword"
8649
                       ~uxi',
8650
            /**
8651
             * @param string[] $matches
8652 35
             *
8653
             * @psalm-pure
8654 35
             *
8655 35
             * @return string
8656
             */
8657
            static function (array $matches) use ($encoding): string {
8658 35
                return static::ucfirst($matches[1], $encoding);
8659
            },
8660
            $str
8661
        );
8662
8663
        // e.g. "Stand-in" -> "Stand-In" (Stand is already capped at this point)
8664
        $str = (string) \preg_replace_callback(
8665
            '~\\b
8666
                      (?<!…)                    # Negative lookbehind for a hyphen; we do not want to match man-in-the-middle but do want (stand-in)
8667
                      ( [[:alpha:]]+- )         # $1 = first word and hyphen, should already be properly capped
8668
                      ( ' . $small_words_rx . ' ) # ...followed by small word
8669
                      (?!	- )                 # Negative lookahead for another -
8670
                     ~uxi',
8671
            /**
8672
             * @param string[] $matches
8673 2
             *
8674
             * @psalm-pure
8675
             *
8676 2
             * @return string
8677 2
             */
8678
            static function (array $matches) use ($encoding): string {
8679
                return $matches[1] . static::ucfirst($matches[2], $encoding);
8680
            },
8681
            $str
8682 2
        );
8683
8684
        return $str;
8685
    }
8686
8687
    /**
8688
     * Get a binary representation of a specific string.
8689
     *
8690
     * EXAPLE: <code>UTF8::str_to_binary('😃'); // '11110000100111111001100010000011'</code>
8691
     *
8692
     * @param string $str <p>The input string.</p>
8693
     *
8694 17
     * @psalm-pure
8695
     *
8696 17
     * @return false|string
8697 1
     *                      <p>false on error</p>
8698
     */
8699
    public static function str_to_binary(string $str)
8700 16
    {
8701 16
        /** @var array|false $value - needed for PhpStan (stubs error) */
8702
        $value = \unpack('H*', $str);
8703
        if ($value === false) {
8704
            return false;
8705
        }
8706 16
8707
        /** @noinspection OffsetOperationsInspection */
8708
        return \base_convert($value[1], 16, 2);
8709
    }
8710
8711 16
    /**
8712
     * @param string   $str
8713 16
     * @param bool     $remove_empty_values <p>Remove empty values.</p>
8714
     * @param int|null $remove_short_values <p>The min. string length or null to disable</p>
8715 16
     *
8716
     * @psalm-pure
8717
     *
8718
     * @return string[]
8719
     */
8720
    public static function str_to_lines(string $str, bool $remove_empty_values = false, int $remove_short_values = null): array
8721
    {
8722
        if ($str === '') {
8723
            return $remove_empty_values ? [] : [''];
8724
        }
8725
8726
        if (self::$SUPPORT['mbstring'] === true) {
8727
            $return = \mb_split("[\r\n]{1,2}", $str);
8728
        } else {
8729
            $return = \preg_split("/[\r\n]{1,2}/u", $str);
8730
        }
8731
8732
        if ($return === false) {
8733
            return $remove_empty_values ? [] : [''];
8734
        }
8735
8736
        if (
8737
            $remove_short_values === null
8738
            &&
8739 16
            !$remove_empty_values
8740
        ) {
8741
            return $return;
8742
        }
8743
8744
        return self::reduce_string_array(
8745 16
            $return,
8746 4
            $remove_empty_values,
8747
            $remove_short_values
8748
        );
8749 16
    }
8750
8751 16
    /**
8752 16
     * Convert a string into an array of words.
8753
     *
8754
     * EXAMPLE: <code>UTF8::str_to_words('中文空白 oöäü#s', '#') // array('', '中文空白', ' ', 'oöäü#s', '')</code>
8755
     *
8756
     * @param string   $str
8757 16
     * @param string   $char_list           <p>Additional chars for the definition of "words".</p>
8758
     * @param bool     $remove_empty_values <p>Remove empty values.</p>
8759 16
     * @param int|null $remove_short_values <p>The min. string length or null to disable</p>
8760
     *
8761 16
     * @psalm-pure
8762
     *
8763
     * @return string[]
8764 2
     */
8765 2
    public static function str_to_words(
8766
        string $str,
8767
        string $char_list = '',
8768
        bool $remove_empty_values = false,
8769
        int $remove_short_values = null
8770 2
    ): array {
8771 2
        if ($str === '') {
8772
            return $remove_empty_values ? [] : [''];
8773
        }
8774 2
8775
        $char_list = self::rxClass($char_list, '\pL');
8776
8777
        $return = \preg_split("/({$char_list}+(?:[\p{Pd}’']{$char_list}+)*)/u", $str, -1, \PREG_SPLIT_DELIM_CAPTURE);
8778
        if ($return === false) {
8779
            return $remove_empty_values ? [] : [''];
8780
        }
8781
8782
        if (
8783
            $remove_short_values === null
8784
            &&
8785
            !$remove_empty_values
8786
        ) {
8787
            return $return;
0 ignored issues
show
Bug Best Practice introduced by
The expression return $return returns an array which contains values of type array which are incompatible with the documented value type string.
Loading history...
8788
        }
8789
8790
        $tmp_return = self::reduce_string_array(
8791
            $return,
8792 22
            $remove_empty_values,
8793
            $remove_short_values
8794
        );
8795
8796
        foreach ($tmp_return as &$item) {
8797
            $item = (string) $item;
8798 22
        }
8799
8800
        return $tmp_return;
8801
    }
8802 22
8803 10
    /**
8804 2
     * Truncates the string to a given length. If $substring is provided, and
8805
     * truncating occurs, the string is further truncated so that the substring
8806
     * may be appended without exceeding the desired length.
8807 8
     *
8808 4
     * @param string $str
8809
     * @param int    $length    <p>Desired length of the truncated string.</p>
8810
     * @param string $substring [optional] <p>The substring to append if it can fit. Default: ''</p>
8811 4
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
8812
     *
8813
     * @psalm-pure
8814 4
     *
8815
     * @return string
8816
     *                <p>A string after truncating.</p>
8817 12
     */
8818
    public static function str_truncate(
8819 12
        string $str,
8820 2
        int $length,
8821
        string $substring = '',
8822
        string $encoding = 'UTF-8'
8823 10
    ): string {
8824 6
        if ($str === '') {
8825
            return '';
8826
        }
8827
8828 10
        if ($encoding === 'UTF-8') {
8829 10
            if ($length >= (int) \mb_strlen($str)) {
8830 10
                return $str;
8831
            }
8832
8833
            if ($substring !== '') {
8834 10
                $length -= (int) \mb_strlen($substring);
8835
8836
                /** @noinspection UnnecessaryCastingInspection */
8837
                return (string) \mb_substr($str, 0, $length) . $substring;
8838
            }
8839
8840
            return (string) \mb_substr($str, 0, $length);
8841
        }
8842
8843
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
8844
8845
        if ($length >= (int) self::strlen($str, $encoding)) {
8846
            return $str;
8847
        }
8848
8849
        if ($substring !== '') {
8850
            $length -= (int) self::strlen($substring, $encoding);
8851
        }
8852
8853
        return (
8854
               (string) self::substr(
8855
                   $str,
8856 47
                   0,
8857
                   $length,
8858
                   $encoding
8859
               )
8860
               ) . $substring;
8861
    }
8862
8863 47
    /**
8864 1
     * Truncates the string to a given length, while ensuring that it does not
8865
     * split words. If $substring is provided, and truncating occurs, the
8866
     * string is further truncated so that the substring may be appended without
8867 47
     * exceeding the desired length.
8868 21
     *
8869 5
     * @param string $str
8870
     * @param int    $length                                 <p>Desired length of the truncated string.</p>
8871
     * @param string $substring                              [optional] <p>The substring to append if it can fit.
8872
     *                                                       Default:
8873 17
     *                                                       ''</p>
8874 17
     * @param string $encoding                               [optional] <p>Default: 'UTF-8'</p>
8875 1
     * @param bool   $ignore_do_not_split_words_for_one_word [optional] <p>Default: false</p>
8876
     *
8877
     * @psalm-pure
8878
     *
8879 17
     * @return string
8880 17
     *                <p>A string after truncating.</p>
8881
     */
8882
    public static function str_truncate_safe(
8883
        string $str,
8884
        int $length,
8885 17
        string $substring = '',
8886 17
        string $encoding = 'UTF-8',
8887
        bool $ignore_do_not_split_words_for_one_word = false
8888 13
    ): string {
8889
        if ($str === '' || $length <= 0) {
8890
            return $substring;
8891 13
        }
8892
8893
        if ($encoding === 'UTF-8') {
8894 3
            if ($length >= (int) \mb_strlen($str)) {
8895
                return $str;
8896 13
            }
8897
8898
            // need to further trim the string so we can append the substring
8899 17
            $length -= (int) \mb_strlen($substring);
8900
            if ($length <= 0) {
8901
                return $substring;
8902
            }
8903 26
8904
            /** @var false|string $truncated - needed for PhpStan (stubs error) */
8905 26
            $truncated = \mb_substr($str, 0, $length);
8906 4
            if ($truncated === false) {
8907
                return '';
8908
            }
8909
8910 22
            // if the last word was truncated
8911 22
            $space_position = \mb_strpos($str, ' ', $length - 1);
8912
            if ($space_position !== $length) {
8913
                // find pos of the last occurrence of a space, get up to that
8914
                $last_position = \mb_strrpos($truncated, ' ', 0);
8915 22
8916
                if (
8917 22
                    $last_position !== false
8918
                    ||
8919
                    (
8920
                        $space_position !== false
8921
                        &&
8922 22
                        !$ignore_do_not_split_words_for_one_word
8923 22
                    )
8924
                ) {
8925 12
                    $truncated = (string) \mb_substr($truncated, 0, (int) $last_position);
8926
                }
8927
            }
8928 12
        } else {
8929
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
8930
8931 4
            if ($length >= (int) self::strlen($str, $encoding)) {
8932
                return $str;
8933 12
            }
8934
8935
            // need to further trim the string so we can append the substring
8936 9
            $length -= (int) self::strlen($substring, $encoding);
8937
            if ($length <= 0) {
8938
                return $substring;
8939
            }
8940
8941 39
            $truncated = self::substr($str, 0, $length, $encoding);
8942
8943
            if ($truncated === false) {
8944
                return '';
8945
            }
8946
8947
            // if the last word was truncated
8948
            $space_position = self::strpos($str, ' ', $length - 1, $encoding);
8949
            if ($space_position !== $length) {
8950
                // find pos of the last occurrence of a space, get up to that
8951
                $last_position = self::strrpos($truncated, ' ', 0, $encoding);
8952
8953
                if (
8954
                    $last_position !== false
8955
                    ||
8956
                    (
8957 16
                        $space_position !== false
8958
                        &&
8959 16
                        !$ignore_do_not_split_words_for_one_word
8960
                    )
8961
                ) {
8962
                    $truncated = (string) self::substr($truncated, 0, (int) $last_position, $encoding);
8963
                }
8964
            }
8965
        }
8966
8967
        return $truncated . $substring;
8968
    }
8969
8970
    /**
8971
     * Returns a lowercase and trimmed string separated by underscores.
8972
     * Underscores are inserted before uppercase characters (with the exception
8973
     * of the first character of the string), and in place of spaces as well as
8974
     * dashes.
8975
     *
8976
     * @param string $str
8977
     *
8978
     * @psalm-pure
8979
     *
8980 13
     * @return string
8981
     *                <p>The underscored string.</p>
8982
     */
8983
    public static function str_underscored(string $str): string
8984
    {
8985
        return self::str_delimit($str, '_');
8986
    }
8987 13
8988
    /**
8989
     * Returns an UpperCamelCase version of the supplied string. It trims
8990
     * surrounding spaces, capitalizes letters following digits, spaces, dashes
8991
     * and underscores, and removes spaces, dashes, underscores.
8992
     *
8993
     * @param string      $str                           <p>The input string.</p>
8994
     * @param string      $encoding                      [optional] <p>Default: 'UTF-8'</p>
8995
     * @param bool        $clean_utf8                    [optional] <p>Remove non UTF-8 chars from the string.</p>
8996
     * @param string|null $lang                          [optional] <p>Set the language for special cases: az, el, lt,
8997
     *                                                   tr</p>
8998
     * @param bool        $try_to_keep_the_string_length [optional] <p>true === try to keep the string length: e.g. ẞ
8999
     *                                                   -> ß</p>
9000
     *
9001
     * @psalm-pure
9002
     *
9003
     * @return string
9004
     *                <p>A string in UpperCamelCase.</p>
9005
     */
9006
    public static function str_upper_camelize(
9007
        string $str,
9008
        string $encoding = 'UTF-8',
9009
        bool $clean_utf8 = false,
9010
        string $lang = null,
9011
        bool $try_to_keep_the_string_length = false
9012
    ): string {
9013
        return self::ucfirst(self::str_camelize($str, $encoding), $encoding, $clean_utf8, $lang, $try_to_keep_the_string_length);
9014
    }
9015
9016
    /**
9017
     * Get the number of words in a specific string.
9018
     *
9019
     * EXAMPLES: <code>
9020
     * // format: 0 -> return only word count (int)
9021
     * //
9022
     * UTF8::str_word_count('中文空白 öäü abc#c'); // 4
9023 2
     * UTF8::str_word_count('中文空白 öäü abc#c', 0, '#'); // 3
9024
     *
9025 2
     * // format: 1 -> return words (array)
9026
     * //
9027 2
     * UTF8::str_word_count('中文空白 öäü abc#c', 1); // array('中文空白', 'öäü', 'abc', 'c')
9028
     * UTF8::str_word_count('中文空白 öäü abc#c', 1, '#'); // array('中文空白', 'öäü', 'abc#c')
9029 2
     *
9030 2
     * // format: 2 -> return words with offset (array)
9031 2
     * //
9032 2
     * UTF8::str_word_count('中文空白 öäü ab#c', 2); // array(0 => '中文空白', 5 => 'öäü', 9 => 'abc', 13 => 'c')
9033
     * UTF8::str_word_count('中文空白 öäü ab#c', 2, '#'); // array(0 => '中文空白', 5 => 'öäü', 9 => 'abc#c')
9034 2
     * </code>
9035 2
     *
9036 2
     * @param string $str       <p>The input string.</p>
9037 2
     * @param int    $format    [optional] <p>
9038 2
     *                          <strong>0</strong> => return a number of words (default)<br>
9039 2
     *                          <strong>1</strong> => return an array of words<br>
9040
     *                          <strong>2</strong> => return an array of words with word-offset as key
9041
     *                          </p>
9042 2
     * @param string $char_list [optional] <p>Additional chars that contains to words and do not start a new word.</p>
9043
     *
9044
     * @psalm-pure
9045 2
     *
9046
     * @return int|string[]
9047
     *                      <p>The number of words in the string.</p>
9048
     */
9049
    public static function str_word_count(string $str, int $format = 0, string $char_list = '')
9050
    {
9051
        $str_parts = self::str_to_words($str, $char_list);
9052
9053
        $len = \count($str_parts);
9054
9055
        if ($format === 1) {
9056
            $number_of_words = [];
9057
            for ($i = 1; $i < $len; $i += 2) {
9058
                $number_of_words[] = $str_parts[$i];
9059
            }
9060
        } elseif ($format === 2) {
9061
            $number_of_words = [];
9062
            $offset = (int) self::strlen($str_parts[0]);
9063
            for ($i = 1; $i < $len; $i += 2) {
9064
                $number_of_words[$offset] = $str_parts[$i];
9065
                $offset += (int) self::strlen($str_parts[$i]) + (int) self::strlen($str_parts[$i + 1]);
9066 23
            }
9067
        } else {
9068
            $number_of_words = (int) (($len - 1) / 2);
9069
        }
9070
9071 23
        return $number_of_words;
9072 23
    }
9073 23
9074 23
    /**
9075 23
     * Case-insensitive string comparison.
9076
     *
9077 23
     * INFO: Case-insensitive version of UTF8::strcmp()
9078 23
     *
9079
     * EXAMPLE: <code>UTF8::strcasecmp("iñtërnâtiôn\nàlizætiøn", "Iñtërnâtiôn\nàlizætiøn"); // 0</code>
9080 23
     *
9081 23
     * @param string $str1     <p>The first string.</p>
9082 23
     * @param string $str2     <p>The second string.</p>
9083 23
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
9084
     *
9085 23
     * @psalm-pure
9086 23
     *
9087
     * @return int
9088
     *             <strong>&lt; 0</strong> if str1 is less than str2;<br>
9089
     *             <strong>&gt; 0</strong> if str1 is greater than str2,<br>
9090
     *             <strong>0</strong> if they are equal
9091
     */
9092
    public static function strcasecmp(
9093
        string $str1,
9094
        string $str2,
9095
        string $encoding = 'UTF-8'
9096
    ): int {
9097
        return self::strcmp(
9098
            self::strtocasefold(
9099
                $str1,
9100
                true,
9101
                false,
9102
                $encoding,
9103
                null,
9104
                false
9105
            ),
9106 29
            self::strtocasefold(
9107
                $str2,
9108 29
                true,
9109 21
                false,
9110
                $encoding,
9111
                null,
9112 24
                false
9113
            )
9114 24
        );
9115
    }
9116 24
9117
    /**
9118
     * Case-sensitive string comparison.
9119
     *
9120
     * EXAMPLE: <code>UTF8::strcmp("iñtërnâtiôn\nàlizætiøn", "iñtërnâtiôn\nàlizætiøn"); // 0</code>
9121
     *
9122
     * @param string $str1 <p>The first string.</p>
9123
     * @param string $str2 <p>The second string.</p>
9124
     *
9125
     * @psalm-pure
9126
     *
9127
     * @return int
9128
     *             <strong>&lt; 0</strong> if str1 is less than str2<br>
9129
     *             <strong>&gt; 0</strong> if str1 is greater than str2<br>
9130
     *             <strong>0</strong> if they are equal
9131
     */
9132
    public static function strcmp(string $str1, string $str2): int
9133 12
    {
9134
        if ($str1 === $str2) {
9135
            return 0;
9136
        }
9137
9138
        return \strcmp(
9139
            /** @phpstan-ignore-next-line - we use only NFD */
9140 12
            \Normalizer::normalize($str1, \Normalizer::NFD),
9141
            /** @phpstan-ignore-next-line - we use only NFD */
9142
            \Normalizer::normalize($str2, \Normalizer::NFD)
9143
        );
9144 12
    }
9145 2
9146
    /**
9147
     * Find length of initial segment not matching mask.
9148 11
     *
9149 3
     * @param string   $str
9150 3
     * @param string   $char_list
9151 2
     * @param int      $offset
9152
     * @param int|null $length
9153 3
     * @param string   $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
9154
     *
9155
     * @psalm-pure
9156
     *
9157
     * @return int
9158
     */
9159 3
    public static function strcspn(
9160
        string $str,
9161
        string $char_list,
9162
        int $offset = 0,
9163 3
        int $length = null,
9164
        string $encoding = 'UTF-8'
9165
    ): int {
9166 11
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
9167 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
9168
        }
9169
9170 10
        if ($char_list === '') {
9171 10
            return (int) self::strlen($str, $encoding);
9172 9
        }
9173 9
9174
        if ($offset || $length !== null) {
9175
            if ($encoding === 'UTF-8') {
9176
                if ($length === null) {
9177 9
                    $str_tmp = \mb_substr($str, $offset);
9178
                } else {
9179
                    $str_tmp = \mb_substr($str, $offset, $length);
9180 2
                }
9181
            } else {
9182
                $str_tmp = self::substr($str, $offset, $length, $encoding);
9183
            }
9184
9185
            if ($str_tmp === false) {
9186
                return 0;
9187
            }
9188
9189
            $str = $str_tmp;
9190
        }
9191
9192
        if ($str === '') {
9193
            return 0;
9194
        }
9195
9196
        $matches = [];
9197
        if (\preg_match('/^(.*?)' . self::rxClass($char_list) . '/us', $str, $matches)) {
9198
            $return = self::strlen($matches[1], $encoding);
9199 4
            if ($return === false) {
9200
                return 0;
9201 4
            }
9202 4
9203
            return $return;
9204
        }
9205 4
9206 1
        return (int) self::strlen($str, $encoding);
9207
    }
9208
9209 4
    /**
9210 4
     * Create a UTF-8 string from code points.
9211 4
     *
9212
     * INFO: opposite to UTF8::codepoints()
9213
     *
9214
     * EXAMPLE: <code>UTF8::string(array(246, 228, 252)); // 'öäü'</code>
9215
     *
9216 4
     * @param int|int[]|string|string[] $intOrHex <p>Integer or Hexadecimal codepoints.</p>
9217
     *
9218
     * @phpstan-param int[]|numeric-string[]|int|numeric-string $intOrHex
9219
     *
9220
     * @psalm-pure
9221
     *
9222
     * @return string
9223
     *                <p>A UTF-8 encoded string.</p>
9224
     */
9225
    public static function string($intOrHex): string
9226
    {
9227
        if ($intOrHex === []) {
9228
            return '';
9229
        }
9230
9231
        if (!\is_array($intOrHex)) {
9232
            $intOrHex = [$intOrHex];
9233
        }
9234 40
9235
        $str = '';
9236 40
        foreach ($intOrHex as $strPart) {
9237 40
            $str .= '&#' . (int) $strPart . ';';
9238 13
        }
9239
9240
        // We cannot use html_entity_decode() here, as it will not return
9241
        // characters for many values < 160.
9242 40
        return mb_convert_encoding($str, 'UTF-8', 'HTML-ENTITIES');
0 ignored issues
show
Bug Best Practice introduced by
The expression return mb_convert_encodi...TF-8', 'HTML-ENTITIES') could return the type array which is incompatible with the type-hinted return string. Consider adding an additional type-check to rule them out.
Loading history...
9243
    }
9244
9245
    /**
9246
     * Checks if string starts with "BOM" (Byte Order Mark Character) character.
9247
     *
9248
     * EXAMPLE: <code>UTF8::string_has_bom("\xef\xbb\xbf foobar"); // true</code>
9249
     *
9250
     * @param string $str <p>The input string.</p>
9251
     *
9252
     * @psalm-pure
9253
     *
9254
     * @return bool
9255
     *              <p>
9256
     *              <strong>true</strong> if the string has BOM at the start,<br>
9257
     *              <strong>false</strong> otherwise
9258
     *              </p>
9259
     */
9260
    public static function string_has_bom(string $str): bool
9261
    {
9262
        foreach (self::$BOM as $bom_string => &$bom_byte_length) {
9263
            if (\strncmp($str, $bom_string, $bom_byte_length) === 0) {
9264
                return true;
9265
            }
9266
        }
9267
9268
        return false;
9269
    }
9270 4
9271
    /**
9272
     * Strip HTML and PHP tags from a string + clean invalid UTF-8.
9273
     *
9274
     * EXAMPLE: <code>UTF8::strip_tags("<span>κόσμε\xa0\xa1</span>"); // 'κόσμε'</code>
9275 4
     *
9276 1
     * @see http://php.net/manual/en/function.strip-tags.php
9277
     *
9278
     * @param string      $str            <p>
9279 4
     *                                    The input string.
9280 2
     *                                    </p>
9281
     * @param string|null $allowable_tags [optional] <p>
9282
     *                                    You can use the optional second parameter to specify tags which should
9283 4
     *                                    not be stripped.
9284 4
     *                                    </p>
9285
     *                                    <p>
9286
     *                                    HTML comments and PHP tags are also stripped. This is hardcoded and
9287 2
     *                                    can not be changed with allowable_tags.
9288
     *                                    </p>
9289
     * @param bool        $clean_utf8     [optional] <p>Remove non UTF-8 chars from the string.</p>
9290
     *
9291
     * @psalm-pure
9292
     *
9293
     * @return string
9294
     *                <p>The stripped string.</p>
9295
     */
9296
    public static function strip_tags(
9297
        string $str,
9298
        string $allowable_tags = null,
9299
        bool $clean_utf8 = false
9300
    ): string {
9301
        if ($str === '') {
9302
            return '';
9303 36
        }
9304
9305 36
        if ($clean_utf8) {
9306 3
            $str = self::clean($str);
9307
        }
9308
9309 33
        if ($allowable_tags === null) {
9310
            return \strip_tags($str);
9311
        }
9312
9313
        return \strip_tags($str, $allowable_tags);
9314
    }
9315
9316
    /**
9317
     * Strip all whitespace characters. This includes tabs and newline
9318
     * characters, as well as multibyte whitespace such as the thin space
9319
     * and ideographic space.
9320
     *
9321
     * EXAMPLE: <code>UTF8::strip_whitespace('   Ο     συγγραφέας  '); // 'Οσυγγραφέας'</code>
9322
     *
9323
     * @param string $str
9324
     *
9325
     * @psalm-pure
9326
     *
9327
     * @return string
9328
     */
9329
    public static function strip_whitespace(string $str): string
9330
    {
9331
        if ($str === '') {
9332
            return '';
9333 25
        }
9334
9335
        return (string) \preg_replace('/[[:space:]]+/u', '', $str);
9336
    }
9337
9338
    /**
9339
     * Find the position of the first occurrence of a substring in a string, case-insensitive.
9340 25
     *
9341 5
     * INFO: use UTF8::stripos_in_byte() for the byte-length
9342 2
     *
9343
     * EXAMPLE: <code>UTF8::stripos('aσσb', 'ΣΣ'); // 1</code> (σσ == ΣΣ)
9344
     *
9345 5
     * @see http://php.net/manual/en/function.mb-stripos.php
9346
     *
9347
     * @param string $haystack   <p>The string from which to get the position of the first occurrence of needle.</p>
9348 24
     * @param string $needle     <p>The string to find in haystack.</p>
9349
     * @param int    $offset     [optional] <p>The position in haystack to start searching.</p>
9350
     * @param string $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
9351
     * @param bool   $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
9352 24
     *
9353
     * @psalm-pure
9354
     *
9355 1
     * @return false|int
9356 1
     *                   Return the <strong>(int)</strong> numeric position of the first occurrence of needle in the
9357
     *                   haystack string,<br> or <strong>false</strong> if needle is not found
9358
     */
9359 24
    public static function stripos(
9360 24
        string $haystack,
9361 24
        string $needle,
9362
        int $offset = 0,
9363
        string $encoding = 'UTF-8',
9364 2
        bool $clean_utf8 = false
9365
    ) {
9366 2
        if ($haystack === '') {
9367
            if (\PHP_VERSION_ID >= 80000 && $needle === '') {
9368
                return 0;
9369 2
            }
9370
9371
            return false;
9372 2
        }
9373
9374 2
        if ($needle === '' && \PHP_VERSION_ID < 80000) {
9375
            return false;
9376 2
        }
9377
9378
        if ($clean_utf8) {
9379
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
9380
            // if invalid characters are found in $haystack before $needle
9381
            $haystack = self::clean($haystack);
9382
            $needle = self::clean($needle);
9383
        }
9384
9385
        if (self::$SUPPORT['mbstring'] === true) {
9386
            if ($encoding === 'UTF-8') {
9387
                return \mb_stripos($haystack, $needle, $offset);
9388 2
            }
9389 2
9390
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
9391
9392
            return \mb_stripos($haystack, $needle, $offset, $encoding);
9393
        }
9394
9395
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
9396 2
9397 2
        if (
9398
            $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
9399 2
            &&
9400
            $offset >= 0 // grapheme_stripos() can't handle negative offset
9401
            &&
9402
            self::$SUPPORT['intl'] === true
9403
        ) {
9404
            $return_tmp = \grapheme_stripos($haystack, $needle, $offset);
9405
            if ($return_tmp !== false) {
9406
                return $return_tmp;
9407
            }
9408
        }
9409
9410
        //
9411
        // fallback for ascii only
9412
        //
9413
9414
        if (ASCII::is_ascii($haystack . $needle)) {
9415
            return \stripos($haystack, $needle, $offset);
9416
        }
9417
9418
        //
9419
        // fallback via vanilla php
9420
        //
9421
9422
        $haystack = self::strtocasefold($haystack, true, false, $encoding, null, false);
9423
        $needle = self::strtocasefold($needle, true, false, $encoding, null, false);
9424
9425
        return self::strpos($haystack, $needle, $offset, $encoding);
9426
    }
9427 13
9428
    /**
9429
     * Returns all of haystack starting from and including the first occurrence of needle to the end.
9430
     *
9431
     * EXAMPLE: <code>
9432
     * $str = 'iñtërnâtiônàlizætiøn';
9433
     * $search = 'NÂT';
9434 13
     *
9435 3
     * UTF8::stristr($str, $search)); // 'nâtiônàlizætiøn'
9436 2
     * UTF8::stristr($str, $search, true)); // 'iñtër'
9437
     * </code>
9438
     *
9439 2
     * @param string $haystack      <p>The input string. Must be valid UTF-8.</p>
9440
     * @param string $needle        <p>The string to look for. Must be valid UTF-8.</p>
9441
     * @param bool   $before_needle [optional] <p>
9442 11
     *                              If <b>TRUE</b>, it returns the part of the
9443
     *                              haystack before the first occurrence of the needle (excluding the needle).
9444
     *                              </p>
9445 1
     * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
9446 1
     * @param bool   $clean_utf8    [optional] <p>Remove non UTF-8 chars from the string.</p>
9447
     *
9448
     * @psalm-pure
9449 11
     *
9450 2
     * @return false|string
9451 2
     *                      <p>A sub-string,<br>or <strong>false</strong> if needle is not found.</p>
9452
     */
9453
    public static function stristr(
9454
        string $haystack,
9455
        string $needle,
9456
        bool $before_needle = false,
9457 10
        string $encoding = 'UTF-8',
9458 10
        bool $clean_utf8 = false
9459 10
    ) {
9460
        if ($haystack === '') {
9461
            if (\PHP_VERSION_ID >= 80000 && $needle === '') {
9462 1
                return '';
9463
            }
9464 1
9465
            return false;
9466
        }
9467
9468
        if ($clean_utf8) {
9469
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
9470
            // if invalid characters are found in $haystack before $needle
9471
            $needle = self::clean($needle);
9472
            $haystack = self::clean($haystack);
9473
        }
9474
9475
        if ($needle === '') {
9476
            if (\PHP_VERSION_ID >= 80000) {
9477
                return $haystack;
9478
            }
9479
9480
            return false;
9481
        }
9482
9483
        if (self::$SUPPORT['mbstring'] === true) {
9484
            if ($encoding === 'UTF-8') {
9485
                return \mb_stristr($haystack, $needle, $before_needle);
9486
            }
9487
9488
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
9489
9490
            return \mb_stristr($haystack, $needle, $before_needle, $encoding);
9491
        }
9492
9493
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
9494
9495
        if (
9496
            $encoding !== 'UTF-8'
9497
            &&
9498
            self::$SUPPORT['mbstring'] === false
9499
        ) {
9500
            /**
9501
             * @psalm-suppress ImpureFunctionCall - is is only a warning
9502
             */
9503
            \trigger_error('UTF8::stristr() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
9504
        }
9505
9506
        if (
9507
            $encoding === 'UTF-8' // INFO: "grapheme_stristr()" can't handle other encodings
9508
            &&
9509
            self::$SUPPORT['intl'] === true
9510
        ) {
9511
            $return_tmp = \grapheme_stristr($haystack, $needle, $before_needle);
9512
            if ($return_tmp !== false) {
9513
                return $return_tmp;
9514
            }
9515
        }
9516
9517
        if (ASCII::is_ascii($needle . $haystack)) {
9518
            return \stristr($haystack, $needle, $before_needle);
9519
        }
9520
9521
        \preg_match('/^(.*?)' . \preg_quote($needle, '/') . '/usi', $haystack, $match);
9522
9523
        if (!isset($match[1])) {
9524
            return false;
9525
        }
9526
9527
        if ($before_needle) {
9528
            return $match[1];
9529
        }
9530
9531
        return self::substr($haystack, (int) self::strlen($match[1], $encoding), null, $encoding);
9532
    }
9533 174
9534
    /**
9535
     * Get the string length, not the byte-length!
9536
     *
9537
     * INFO: use UTF8::strwidth() for the char-length
9538 174
     *
9539 25
     * EXAMPLE: <code>UTF8::strlen("Iñtërnâtiôn\xE9àlizætiøn")); // 20</code>
9540
     *
9541
     * @see http://php.net/manual/en/function.mb-strlen.php
9542 172
     *
9543 12
     * @param string $str        <p>The string being checked for length.</p>
9544
     * @param string $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
9545
     * @param bool   $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
9546 172
     *
9547
     * @psalm-pure
9548
     *
9549 5
     * @return false|int
9550
     *                   <p>
9551
     *                   The number <strong>(int)</strong> of characters in the string $str having character encoding
9552
     *                   $encoding.
9553
     *                   (One multi-byte character counted as +1).
9554
     *                   <br>
9555
     *                   Can return <strong>false</strong>, if e.g. mbstring is not installed and we process invalid
9556 172
     *                   chars.
9557 166
     *                   </p>
9558
     */
9559 166
    public static function strlen(
9560
        string $str,
9561
        string $encoding = 'UTF-8',
9562
        bool $clean_utf8 = false
9563 4
    ) {
9564
        if ($str === '') {
9565
            return 0;
9566
        }
9567
9568
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
9569
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
9570
        }
9571 8
9572
        if ($clean_utf8) {
9573 8
            // "mb_strlen" and "\iconv_strlen" returns wrong length,
9574
            // if invalid characters are found in $str
9575
            $str = self::clean($str);
9576
        }
9577
9578
        //
9579 8
        // fallback via mbstring
9580
        //
9581 8
9582
        if (self::$SUPPORT['mbstring'] === true) {
9583 8
            if ($encoding === 'UTF-8') {
9584
                /** @noinspection PhpUsageOfSilenceOperatorInspection - ignore warnings, it's working anyway */
9585
                return @\mb_strlen($str);
9586
            }
9587
9588 2
            /** @noinspection PhpUsageOfSilenceOperatorInspection - ignore warnings, it's working anyway */
9589
            return @\mb_strlen($str, $encoding);
9590
        }
9591
9592
        //
9593
        // fallback for binary || ascii only
9594
        //
9595 8
9596
        if (
9597
            $encoding === 'CP850'
9598
            ||
9599
            $encoding === 'ASCII'
9600
        ) {
9601
            return \strlen($str);
9602
        }
9603
9604
        if (
9605
            $encoding !== 'UTF-8'
9606
            &&
9607 8
            self::$SUPPORT['mbstring'] === false
9608
            &&
9609 8
            self::$SUPPORT['iconv'] === false
9610
        ) {
9611
            /**
9612
             * @psalm-suppress ImpureFunctionCall - is is only a warning
9613
             */
9614
            \trigger_error('UTF8::strlen() without mbstring / iconv cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
9615
        }
9616
9617
        //
9618
        // fallback via iconv
9619
        //
9620
9621 8
        if (self::$SUPPORT['iconv'] === true) {
9622 4
            $return_tmp = \iconv_strlen($str, $encoding);
9623
            if ($return_tmp !== false) {
9624
                return $return_tmp;
9625
            }
9626
        }
9627
9628
        //
9629 8
        // fallback via intl
9630
        //
9631 8
9632 8
        if (
9633
            $encoding === 'UTF-8' // INFO: "grapheme_strlen()" can't handle other encodings
9634
            &&
9635
            self::$SUPPORT['intl'] === true
9636 8
        ) {
9637
            $return_tmp = \grapheme_strlen($str);
9638
            if ($return_tmp !== null) {
9639
                return $return_tmp;
9640
            }
9641
        }
9642
9643
        //
9644
        // fallback for ascii only
9645
        //
9646
9647
        if (ASCII::is_ascii($str)) {
9648 1
            return \strlen($str);
9649
        }
9650 1
9651
        //
9652
        // fallback via vanilla php
9653
        //
9654 1
9655
        \preg_match_all('/./us', $str, $parts);
9656
9657
        $return_tmp = \count($parts[0]);
9658
        if ($return_tmp === 0) {
9659 1
            return false;
9660
        }
9661
9662
        return $return_tmp;
9663
    }
9664
9665
    /**
9666
     * Get string length in byte.
9667
     *
9668
     * @param string $str
9669
     *
9670
     * @psalm-pure
9671
     *
9672
     * @return int
9673
     */
9674
    public static function strlen_in_byte(string $str): int
9675
    {
9676
        if ($str === '') {
9677
            return 0;
9678
        }
9679
9680
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
9681
            // "mb_" is available if overload is used, so use it ...
9682
            return \mb_strlen($str, 'CP850'); // 8-BIT
9683
        }
9684
9685
        return \strlen($str);
9686 2
    }
9687
9688 2
    /**
9689 2
     * Case-insensitive string comparisons using a "natural order" algorithm.
9690 2
     *
9691
     * INFO: natural order version of UTF8::strcasecmp()
9692
     *
9693
     * EXAMPLES: <code>
9694
     * UTF8::strnatcasecmp('2', '10Hello WORLD 中文空白!'); // -1
9695
     * UTF8::strcasecmp('2Hello world 中文空白!', '10Hello WORLD 中文空白!'); // 1
9696
     *
9697
     * UTF8::strnatcasecmp('10Hello world 中文空白!', '2Hello WORLD 中文空白!'); // 1
9698
     * UTF8::strcasecmp('10Hello world 中文空白!', '2Hello WORLD 中文空白!'); // -1
9699
     * </code>
9700
     *
9701
     * @param string $str1     <p>The first string.</p>
9702
     * @param string $str2     <p>The second string.</p>
9703
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
9704
     *
9705
     * @psalm-pure
9706
     *
9707
     * @return int
9708
     *             <strong>&lt; 0</strong> if str1 is less than str2<br>
9709
     *             <strong>&gt; 0</strong> if str1 is greater than str2<br>
9710
     *             <strong>0</strong> if they are equal
9711
     */
9712
    public static function strnatcasecmp(string $str1, string $str2, string $encoding = 'UTF-8'): int
9713
    {
9714
        return self::strnatcmp(
9715
            self::strtocasefold($str1, true, false, $encoding, null, false),
9716
            self::strtocasefold($str2, true, false, $encoding, null, false)
9717
        );
9718
    }
9719 4
9720
    /**
9721 4
     * String comparisons using a "natural order" algorithm
9722 4
     *
9723
     * INFO: natural order version of UTF8::strcmp()
9724
     *
9725 4
     * EXAMPLES: <code>
9726 4
     * UTF8::strnatcmp('2Hello world 中文空白!', '10Hello WORLD 中文空白!'); // -1
9727 4
     * UTF8::strcmp('2Hello world 中文空白!', '10Hello WORLD 中文空白!'); // 1
9728
     *
9729
     * UTF8::strnatcmp('10Hello world 中文空白!', '2Hello WORLD 中文空白!'); // 1
9730
     * UTF8::strcmp('10Hello world 中文空白!', '2Hello WORLD 中文空白!'); // -1
9731
     * </code>
9732
     *
9733
     * @see http://php.net/manual/en/function.strnatcmp.php
9734
     *
9735
     * @param string $str1 <p>The first string.</p>
9736
     * @param string $str2 <p>The second string.</p>
9737
     *
9738
     * @psalm-pure
9739
     *
9740
     * @return int
9741
     *             <strong>&lt; 0</strong> if str1 is less than str2;<br>
9742
     *             <strong>&gt; 0</strong> if str1 is greater than str2;<br>
9743
     *             <strong>0</strong> if they are equal
9744
     */
9745
    public static function strnatcmp(string $str1, string $str2): int
9746
    {
9747
        if ($str1 === $str2) {
9748
            return 0;
9749
        }
9750
9751
        return \strnatcmp(
9752 2
            (string) self::strtonatfold($str1),
9753
            (string) self::strtonatfold($str2)
9754
        );
9755
    }
9756
9757
    /**
9758 2
     * Case-insensitive string comparison of the first n characters.
9759 2
     *
9760 2
     * EXAMPLE: <code>
9761
     * UTF8::strcasecmp("iñtërnâtiôn\nàlizætiøn321", "iñtërnâtiôn\nàlizætiøn123", 5); // 0
9762
     * </code>
9763
     *
9764
     * @see http://php.net/manual/en/function.strncasecmp.php
9765
     *
9766
     * @param string $str1     <p>The first string.</p>
9767
     * @param string $str2     <p>The second string.</p>
9768
     * @param int    $len      <p>The length of strings to be used in the comparison.</p>
9769
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
9770
     *
9771
     * @psalm-pure
9772
     *
9773
     * @return int
9774
     *             <strong>&lt; 0</strong> if <i>str1</i> is less than <i>str2</i>;<br>
9775
     *             <strong>&gt; 0</strong> if <i>str1</i> is greater than <i>str2</i>;<br>
9776
     *             <strong>0</strong> if they are equal
9777
     */
9778
    public static function strncasecmp(
9779
        string $str1,
9780
        string $str2,
9781
        int $len,
9782
        string $encoding = 'UTF-8'
9783
    ): int {
9784
        return self::strncmp(
9785
            self::strtocasefold($str1, true, false, $encoding, null, false),
9786 4
            self::strtocasefold($str2, true, false, $encoding, null, false),
9787
            $len
9788
        );
9789
    }
9790
9791
    /**
9792 4
     * String comparison of the first n characters.
9793
     *
9794
     * EXAMPLE: <code>
9795
     * UTF8::strncmp("Iñtërnâtiôn\nàlizætiøn321", "Iñtërnâtiôn\nàlizætiøn123", 5); // 0
9796 4
     * </code>
9797 4
     *
9798 4
     * @see http://php.net/manual/en/function.strncmp.php
9799
     *
9800
     * @param string $str1     <p>The first string.</p>
9801
     * @param string $str2     <p>The second string.</p>
9802
     * @param int    $len      <p>Number of characters to use in the comparison.</p>
9803
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
9804 4
     *
9805
     * @psalm-pure
9806
     *
9807
     * @return int
9808
     *             <strong>&lt; 0</strong> if <i>str1</i> is less than <i>str2</i>;<br>
9809
     *             <strong>&gt; 0</strong> if <i>str1</i> is greater than <i>str2</i>;<br>
9810
     *             <strong>0</strong> if they are equal
9811
     */
9812
    public static function strncmp(
9813
        string $str1,
9814
        string $str2,
9815
        int $len,
9816
        string $encoding = 'UTF-8'
9817
    ): int {
9818
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
9819
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
9820
        }
9821
9822 2
        if ($encoding === 'UTF-8') {
9823
            $str1 = (string) \mb_substr($str1, 0, $len);
9824 2
            $str2 = (string) \mb_substr($str2, 0, $len);
9825 2
        } else {
9826
            $str1 = (string) self::substr($str1, 0, $len, $encoding);
9827
            $str2 = (string) self::substr($str2, 0, $len, $encoding);
9828 2
        }
9829 2
9830
        return self::strcmp($str1, $str2);
9831
    }
9832 2
9833
    /**
9834
     * Search a string for any of a set of characters.
9835
     *
9836
     * EXAMPLE: <code>UTF8::strpbrk('-中文空白-', '白'); // '白-'</code>
9837
     *
9838
     * @see http://php.net/manual/en/function.strpbrk.php
9839
     *
9840
     * @param string $haystack  <p>The string where char_list is looked for.</p>
9841
     * @param string $char_list <p>This parameter is case-sensitive.</p>
9842
     *
9843
     * @psalm-pure
9844
     *
9845
     * @return false|string
9846
     *                      <p>The string starting from the character found, or false if it is not found.</p>
9847
     */
9848
    public static function strpbrk(string $haystack, string $char_list)
9849
    {
9850
        if ($haystack === '' || $char_list === '') {
9851
            return false;
9852
        }
9853
9854
        if (\preg_match('/' . self::rxClass($char_list) . '/us', $haystack, $m)) {
9855
            return \substr($haystack, (int) \strpos($haystack, $m[0]));
9856 52
        }
9857
9858
        return false;
9859
    }
9860
9861
    /**
9862
     * Find the position of the first occurrence of a substring in a string.
9863 52
     *
9864 4
     * INFO: use UTF8::strpos_in_byte() for the byte-length
9865 4
     *
9866 4
     * EXAMPLE: <code>UTF8::strpos('ABC-ÖÄÜ-中文空白-中文空白', '中'); // 8</code>
9867
     *
9868
     * @see http://php.net/manual/en/function.mb-strpos.php
9869
     *
9870
     * @param string     $haystack   <p>The string from which to get the position of the first occurrence of needle.</p>
9871
     * @param int|string $needle     <p>The string to find in haystack.<br>Or a code point as int.</p>
9872
     * @param int        $offset     [optional] <p>The search offset. If it is not specified, 0 is used.</p>
9873
     * @param string     $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
9874 52
     * @param bool       $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
9875
     *
9876
     * @psalm-pure
9877 52
     *
9878
     * @return false|int
9879 52
     *                   The <strong>(int)</strong> numeric position of the first occurrence of needle in the haystack
9880 2
     *                   string.<br> If needle is not found it returns false.
9881
     */
9882
    public static function strpos(
9883
        string $haystack,
9884 2
        $needle,
9885
        int $offset = 0,
9886
        string $encoding = 'UTF-8',
9887 51
        bool $clean_utf8 = false
9888
    ) {
9889
        if ($haystack === '') {
9890
            if (\PHP_VERSION_ID >= 80000) {
9891 51
                if ($needle === '') {
9892
                    return 0;
9893
                }
9894 3
            } else {
9895 3
                return false;
9896
            }
9897
        }
9898 51
9899 10
        // iconv and mbstring do not support integer $needle
9900
        if ((int) $needle === $needle) {
9901
            $needle = (string) self::chr($needle);
9902
        }
9903
        $needle = (string) $needle;
9904
9905
        if ($haystack === '') {
9906 51
            if (\PHP_VERSION_ID >= 80000 && $needle === '') {
9907 49
                return 0;
9908
            }
9909 49
9910
            return false;
9911
        }
9912
9913 2
        if ($needle === '' && \PHP_VERSION_ID < 80000) {
9914
            return false;
9915
        }
9916
9917
        if ($clean_utf8) {
9918
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
9919
            // if invalid characters are found in $haystack before $needle
9920 4
            $needle = self::clean($needle);
9921
            $haystack = self::clean($haystack);
9922 4
        }
9923
9924 2
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
9925
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
9926
        }
9927
9928 4
        //
9929
        // fallback via mbstring
9930 4
        //
9931
9932 4
        if (self::$SUPPORT['mbstring'] === true) {
9933
            if ($encoding === 'UTF-8') {
9934
                /** @noinspection PhpUsageOfSilenceOperatorInspection - Offset not contained in string */
9935
                return @\mb_strpos($haystack, $needle, $offset);
9936
            }
9937 2
9938
            /** @noinspection PhpUsageOfSilenceOperatorInspection - Offset not contained in string */
9939
            return @\mb_strpos($haystack, $needle, $offset, $encoding);
9940
        }
9941
9942
        //
9943
        // fallback for binary || ascii only
9944
        //
9945 4
        if (
9946
            $encoding === 'CP850'
9947 4
            ||
9948
            $encoding === 'ASCII'
9949 4
        ) {
9950
            return \strpos($haystack, $needle, $offset);
9951
        }
9952
9953
        if (
9954
            $encoding !== 'UTF-8'
9955
            &&
9956
            self::$SUPPORT['iconv'] === false
9957
            &&
9958
            self::$SUPPORT['mbstring'] === false
9959
        ) {
9960
            /**
9961
             * @psalm-suppress ImpureFunctionCall - is is only a warning
9962 4
             */
9963
            \trigger_error('UTF8::strpos() without mbstring / iconv cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
9964 4
        }
9965
9966
        //
9967
        // fallback via intl
9968
        //
9969
9970
        if (
9971
            $encoding === 'UTF-8' // INFO: "grapheme_strpos()" can't handle other encodings
9972
            &&
9973
            $offset >= 0 // grapheme_strpos() can't handle negative offset
9974
            &&
9975
            self::$SUPPORT['intl'] === true
9976
        ) {
9977
            $return_tmp = \grapheme_strpos($haystack, $needle, $offset);
9978 4
            if ($return_tmp !== false) {
9979
                return $return_tmp;
9980 2
            }
9981
        }
9982
9983
        //
9984
        // fallback via iconv
9985
        //
9986
9987 4
        if (
9988 4
            $offset >= 0 // iconv_strpos() can't handle negative offset
9989
            &&
9990
            self::$SUPPORT['iconv'] === true
9991 4
        ) {
9992
            // ignore invalid negative offset to keep compatibility
9993 4
            // with php < 5.5.35, < 5.6.21, < 7.0.6
9994
            $return_tmp = \iconv_strpos($haystack, $needle, $offset > 0 ? $offset : 0, $encoding);
9995
            if ($return_tmp !== false) {
9996
                return $return_tmp;
9997 4
            }
9998 4
        }
9999 3
10000
        //
10001
        // fallback for ascii only
10002 4
        //
10003 4
10004
        if (ASCII::is_ascii($haystack . $needle)) {
10005
            /** @noinspection PhpUsageOfSilenceOperatorInspection - Offset not contained in string */
10006 4
            return @\strpos($haystack, $needle, $offset);
10007
        }
10008
10009
        //
10010
        // fallback via vanilla php
10011
        //
10012
10013
        $haystack_tmp = self::substr($haystack, $offset, null, $encoding);
10014
        if ($haystack_tmp === false) {
10015
            $haystack_tmp = '';
10016
        }
10017
        $haystack = (string) $haystack_tmp;
10018
10019
        if ($offset < 0) {
10020
            $offset = 0;
10021
        }
10022
10023
        $pos = \strpos($haystack, $needle);
10024
        if ($pos === false) {
10025
            return false;
10026
        }
10027
10028 2
        if ($pos) {
10029
            return $offset + (int) self::strlen(\substr($haystack, 0, $pos), $encoding);
10030 2
        }
10031
10032
        return $offset + 0;
10033
    }
10034 2
10035
    /**
10036
     * Find the position of the first occurrence of a substring in a string.
10037
     *
10038
     * @param string $haystack <p>
10039 2
     *                         The string being checked.
10040
     *                         </p>
10041
     * @param string $needle   <p>
10042
     *                         The position counted from the beginning of haystack.
10043
     *                         </p>
10044
     * @param int    $offset   [optional] <p>
10045
     *                         The search offset. If it is not specified, 0 is used.
10046
     *                         </p>
10047
     *
10048
     * @psalm-pure
10049
     *
10050
     * @return false|int
10051
     *                   <p>The numeric position of the first occurrence of needle in the
10052
     *                   haystack string. If needle is not found, it returns false.</p>
10053
     */
10054
    public static function strpos_in_byte(string $haystack, string $needle, int $offset = 0)
10055
    {
10056
        if ($haystack === '' || $needle === '') {
10057
            return false;
10058
        }
10059
10060
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
10061 2
            // "mb_" is available if overload is used, so use it ...
10062
            return \mb_strpos($haystack, $needle, $offset, 'CP850'); // 8-BIT
10063 2
        }
10064
10065
        return \strpos($haystack, $needle, $offset);
10066
    }
10067 2
10068
    /**
10069
     * Find the position of the first occurrence of a substring in a string, case-insensitive.
10070
     *
10071
     * @param string $haystack <p>
10072 2
     *                         The string being checked.
10073
     *                         </p>
10074
     * @param string $needle   <p>
10075
     *                         The position counted from the beginning of haystack.
10076
     *                         </p>
10077
     * @param int    $offset   [optional] <p>
10078
     *                         The search offset. If it is not specified, 0 is used.
10079
     *                         </p>
10080
     *
10081
     * @psalm-pure
10082
     *
10083
     * @return false|int
10084
     *                   <p>The numeric position of the first occurrence of needle in the
10085
     *                   haystack string. If needle is not found, it returns false.</p>
10086
     */
10087
    public static function stripos_in_byte(string $haystack, string $needle, int $offset = 0)
10088
    {
10089
        if ($haystack === '' || $needle === '') {
10090
            return false;
10091
        }
10092
10093
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
10094
            // "mb_" is available if overload is used, so use it ...
10095
            return \mb_stripos($haystack, $needle, $offset, 'CP850'); // 8-BIT
10096
        }
10097
10098
        return \stripos($haystack, $needle, $offset);
10099
    }
10100 2
10101
    /**
10102
     * Find the last occurrence of a character in a string within another.
10103
     *
10104
     * EXAMPLE: <code>UTF8::strrchr('κόσμεκόσμε-äöü', 'κόσμε'); // 'κόσμε-äöü'</code>
10105
     *
10106
     * @see http://php.net/manual/en/function.mb-strrchr.php
10107 2
     *
10108 2
     * @param string $haystack      <p>The string from which to get the last occurrence of needle.</p>
10109
     * @param string $needle        <p>The string to find in haystack</p>
10110
     * @param bool   $before_needle [optional] <p>
10111 2
     *                              Determines which portion of haystack
10112 2
     *                              this function returns.
10113
     *                              If set to true, it returns all of haystack
10114
     *                              from the beginning to the last occurrence of needle.
10115 2
     *                              If set to false, it returns all of haystack
10116
     *                              from the last occurrence of needle to the end,
10117
     *                              </p>
10118 2
     * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
10119 2
     * @param bool   $clean_utf8    [optional] <p>Remove non UTF-8 chars from the string.</p>
10120
     *
10121
     * @psalm-pure
10122
     *
10123
     * @return false|string
10124
     *                      <p>The portion of haystack or false if needle is not found.</p>
10125
     */
10126 2
    public static function strrchr(
10127 2
        string $haystack,
10128 2
        string $needle,
10129
        bool $before_needle = false,
10130
        string $encoding = 'UTF-8',
10131 2
        bool $clean_utf8 = false
10132
    ) {
10133
        if ($haystack === '' || $needle === '') {
10134
            return false;
10135
        }
10136
10137
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
10138
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
10139
        }
10140
10141
        if ($clean_utf8) {
10142
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
10143
            // if invalid characters are found in $haystack before $needle
10144
            $needle = self::clean($needle);
10145
            $haystack = self::clean($haystack);
10146
        }
10147
10148
        //
10149
        // fallback via mbstring
10150
        //
10151
10152
        if (self::$SUPPORT['mbstring'] === true) {
10153
            if ($encoding === 'UTF-8') {
10154
                return \mb_strrchr($haystack, $needle, $before_needle);
10155
            }
10156
10157
            return \mb_strrchr($haystack, $needle, $before_needle, $encoding);
10158
        }
10159
10160
        //
10161
        // fallback for binary || ascii only
10162
        //
10163
10164
        if (
10165
            !$before_needle
10166
            &&
10167
            (
10168
                $encoding === 'CP850'
10169
                ||
10170
                $encoding === 'ASCII'
10171
            )
10172
        ) {
10173
            return \strrchr($haystack, $needle);
10174
        }
10175
10176
        if (
10177
            $encoding !== 'UTF-8'
10178
            &&
10179
            self::$SUPPORT['mbstring'] === false
10180
        ) {
10181
            /**
10182
             * @psalm-suppress ImpureFunctionCall - is is only a warning
10183
             */
10184
            \trigger_error('UTF8::strrchr() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
10185
        }
10186
10187
        //
10188
        // fallback via iconv
10189
        //
10190
10191
        if (self::$SUPPORT['iconv'] === true) {
10192
            $needle_tmp = self::substr($needle, 0, 1, $encoding);
10193
            if ($needle_tmp === false) {
10194
                return false;
10195
            }
10196
            $needle = $needle_tmp;
10197
10198
            $pos = \iconv_strrpos($haystack, $needle, $encoding);
10199
            if ($pos === false) {
10200
                return false;
10201
            }
10202
10203
            if ($before_needle) {
10204
                return self::substr($haystack, 0, $pos, $encoding);
10205
            }
10206
10207
            return self::substr($haystack, $pos, null, $encoding);
10208
        }
10209
10210
        //
10211
        // fallback via vanilla php
10212
        //
10213
10214
        $needle_tmp = self::substr($needle, 0, 1, $encoding);
10215
        if ($needle_tmp === false) {
10216
            return false;
10217
        }
10218
        $needle = $needle_tmp;
10219 10
10220
        $pos = self::strrpos($haystack, $needle, 0, $encoding);
10221 10
        if ($pos === false) {
10222 4
            return false;
10223
        }
10224
10225
        if ($before_needle) {
10226 8
            return self::substr($haystack, 0, $pos, $encoding);
10227
        }
10228 8
10229
        return self::substr($haystack, $pos, null, $encoding);
10230 8
    }
10231 8
10232
    /**
10233 8
     * Reverses characters order in the string.
10234 8
     *
10235 8
     * EXAMPLE: <code>UTF8::strrev('κ-öäü'); // 'üäö-κ'</code>
10236 8
     *
10237 8
     * @param string $str      <p>The input string.</p>
10238
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
10239
     *
10240
     * @psalm-pure
10241
     *
10242 8
     * @return string
10243
     *                <p>The string with characters in the reverse sequence.</p>
10244
     */
10245
    public static function strrev(string $str, string $encoding = 'UTF-8'): string
10246
    {
10247
        if ($str === '') {
10248
            return '';
10249
        }
10250
10251
        // init
10252
        $reversed = '';
10253
10254
        $str = self::emoji_encode($str, true);
10255
10256
        if ($encoding === 'UTF-8') {
10257
            if (self::$SUPPORT['intl'] === true) {
10258
                // try "grapheme" first: https://stackoverflow.com/questions/17496493/strrev-dosent-support-utf-8
10259
                $i = (int) \grapheme_strlen($str);
10260
                while ($i--) {
10261 8
                    $reversed_tmp = \grapheme_substr($str, $i, 1);
10262
                    if ($reversed_tmp !== false) {
10263
                        $reversed .= $reversed_tmp;
10264
                    }
10265
                }
10266
            } else {
10267
                $i = (int) \mb_strlen($str);
10268
                while ($i--) {
10269
                    $reversed_tmp = \mb_substr($str, $i, 1);
10270
                    if ($reversed_tmp !== false) {
10271
                        $reversed .= $reversed_tmp;
10272
                    }
10273
                }
10274
            }
10275
        } else {
10276
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
10277
10278
            $i = (int) self::strlen($str, $encoding);
10279
            while ($i--) {
10280
                $reversed_tmp = self::substr($str, $i, 1, $encoding);
10281
                if ($reversed_tmp !== false) {
10282
                    $reversed .= $reversed_tmp;
10283
                }
10284
            }
10285
        }
10286
10287
        return self::emoji_decode($reversed, true);
10288
    }
10289 3
10290
    /**
10291
     * Find the last occurrence of a character in a string within another, case-insensitive.
10292
     *
10293
     * EXAMPLE: <code>UTF8::strrichr('Aκόσμεκόσμε-äöü', 'aκόσμε'); // 'Aκόσμεκόσμε-äöü'</code>
10294
     *
10295
     * @see http://php.net/manual/en/function.mb-strrichr.php
10296 3
     *
10297 2
     * @param string $haystack      <p>The string from which to get the last occurrence of needle.</p>
10298
     * @param string $needle        <p>The string to find in haystack.</p>
10299
     * @param bool   $before_needle [optional] <p>
10300 3
     *                              Determines which portion of haystack
10301 2
     *                              this function returns.
10302
     *                              If set to true, it returns all of haystack
10303
     *                              from the beginning to the last occurrence of needle.
10304 3
     *                              If set to false, it returns all of haystack
10305
     *                              from the last occurrence of needle to the end,
10306
     *                              </p>
10307 2
     * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
10308 2
     * @param bool   $clean_utf8    [optional] <p>Remove non UTF-8 chars from the string.</p>
10309
     *
10310
     * @psalm-pure
10311
     *
10312
     * @return false|string
10313
     *                      <p>The portion of haystack or<br>false if needle is not found.</p>
10314
     */
10315 3
    public static function strrichr(
10316 3
        string $haystack,
10317 3
        string $needle,
10318
        bool $before_needle = false,
10319
        string $encoding = 'UTF-8',
10320 2
        bool $clean_utf8 = false
10321
    ) {
10322
        if ($haystack === '' || $needle === '') {
10323
            return false;
10324
        }
10325
10326
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
10327
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
10328
        }
10329
10330
        if ($clean_utf8) {
10331
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
10332
            // if invalid characters are found in $haystack before $needle
10333
            $needle = self::clean($needle);
10334
            $haystack = self::clean($haystack);
10335
        }
10336
10337
        //
10338
        // fallback via mbstring
10339
        //
10340
10341
        if (self::$SUPPORT['mbstring'] === true) {
10342
            if ($encoding === 'UTF-8') {
10343
                return \mb_strrichr($haystack, $needle, $before_needle);
10344
            }
10345
10346
            return \mb_strrichr($haystack, $needle, $before_needle, $encoding);
10347
        }
10348
10349
        //
10350
        // fallback via vanilla php
10351
        //
10352
10353
        $needle_tmp = self::substr($needle, 0, 1, $encoding);
10354
        if ($needle_tmp === false) {
10355
            return false;
10356
        }
10357
        $needle = $needle_tmp;
10358
10359
        $pos = self::strripos($haystack, $needle, 0, $encoding);
10360
        if ($pos === false) {
10361
            return false;
10362 14
        }
10363
10364
        if ($before_needle) {
10365
            return self::substr($haystack, 0, $pos, $encoding);
10366
        }
10367
10368
        return self::substr($haystack, $pos, null, $encoding);
10369 14
    }
10370 3
10371 3
    /**
10372 3
     * Find the position of the last occurrence of a substring in a string, case-insensitive.
10373
     *
10374
     * EXAMPLE: <code>UTF8::strripos('ABC-ÖÄÜ-中文空白-中文空白', '中'); // 13</code>
10375
     *
10376
     * @param string     $haystack   <p>The string to look in.</p>
10377
     * @param int|string $needle     <p>The string to look for.</p>
10378
     * @param int        $offset     [optional] <p>Number of characters to ignore in the beginning or end.</p>
10379
     * @param string     $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
10380 14
     * @param bool       $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
10381
     *
10382
     * @psalm-pure
10383 14
     *
10384
     * @return false|int
10385 14
     *                   <p>The <strong>(int)</strong> numeric position of the last occurrence of needle in the haystack
10386 1
     *                   string.<br>If needle is not found, it returns false.</p>
10387
     */
10388
    public static function strripos(
10389
        string $haystack,
10390 1
        $needle,
10391
        int $offset = 0,
10392
        string $encoding = 'UTF-8',
10393 14
        bool $clean_utf8 = false
10394
    ) {
10395
        if ($haystack === '') {
10396
            if (\PHP_VERSION_ID >= 80000) {
10397 14
                if ($needle === '') {
10398
                    return 0;
10399 3
                }
10400 3
            } else {
10401
                return false;
10402
            }
10403 14
        }
10404 9
10405
        // iconv and mbstring do not support integer $needle
10406
        if ((int) $needle === $needle && $needle >= 0) {
10407
            $needle = (string) self::chr($needle);
10408
        }
10409
        $needle = (string) $needle;
10410
10411 14
        if ($haystack === '') {
10412 14
            if (\PHP_VERSION_ID >= 80000 && $needle === '') {
10413 14
                return 0;
10414
            }
10415
10416
            return false;
10417
        }
10418
10419
        if ($needle === '' && \PHP_VERSION_ID < 80000) {
10420
            return false;
10421
        }
10422
10423
        if ($clean_utf8) {
10424
            // mb_strripos() && iconv_strripos() is not tolerant to invalid characters
10425
            $needle = self::clean($needle);
10426
            $haystack = self::clean($haystack);
10427
        }
10428
10429
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
10430
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
10431
        }
10432
10433
        //
10434
        // fallback via mbstrig
10435
        //
10436
10437
        if (self::$SUPPORT['mbstring'] === true) {
10438
            if ($encoding === 'UTF-8') {
10439
                return \mb_strripos($haystack, $needle, $offset);
10440
            }
10441
10442
            return \mb_strripos($haystack, $needle, $offset, $encoding);
10443
        }
10444
10445
        //
10446
        // fallback for binary || ascii only
10447
        //
10448
10449
        if (
10450
            $encoding === 'CP850'
10451
            ||
10452
            $encoding === 'ASCII'
10453
        ) {
10454
            return \strripos($haystack, $needle, $offset);
10455
        }
10456
10457
        if (
10458
            $encoding !== 'UTF-8'
10459
            &&
10460
            self::$SUPPORT['mbstring'] === false
10461
        ) {
10462
            /**
10463
             * @psalm-suppress ImpureFunctionCall - is is only a warning
10464
             */
10465
            \trigger_error('UTF8::strripos() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
10466
        }
10467
10468
        //
10469
        // fallback via intl
10470
        //
10471
10472
        if (
10473
            $encoding === 'UTF-8' // INFO: "grapheme_strripos()" can't handle other encodings
10474
            &&
10475
            $offset >= 0 // grapheme_strripos() can't handle negative offset
10476
            &&
10477
            self::$SUPPORT['intl'] === true
10478
        ) {
10479
            $return_tmp = \grapheme_strripos($haystack, $needle, $offset);
10480
            if ($return_tmp !== false) {
10481
                return $return_tmp;
10482
            }
10483
        }
10484
10485
        //
10486
        // fallback for ascii only
10487
        //
10488
10489
        if (ASCII::is_ascii($haystack . $needle)) {
10490
            return \strripos($haystack, $needle, $offset);
10491
        }
10492
10493
        //
10494
        // fallback via vanilla php
10495
        //
10496
10497
        $haystack = self::strtocasefold($haystack, true, false, $encoding);
10498 2
        $needle = self::strtocasefold($needle, true, false, $encoding);
10499
10500 2
        return self::strrpos($haystack, $needle, $offset, $encoding, $clean_utf8);
10501
    }
10502
10503
    /**
10504 2
     * Finds position of last occurrence of a string within another, case-insensitive.
10505
     *
10506
     * @param string $haystack <p>
10507
     *                         The string from which to get the position of the last occurrence
10508
     *                         of needle.
10509 2
     *                         </p>
10510
     * @param string $needle   <p>
10511
     *                         The string to find in haystack.
10512
     *                         </p>
10513
     * @param int    $offset   [optional] <p>
10514
     *                         The position in haystack
10515
     *                         to start searching.
10516
     *                         </p>
10517
     *
10518
     * @psalm-pure
10519
     *
10520
     * @return false|int
10521
     *                   <p>eturn the numeric position of the last occurrence of needle in the
10522
     *                   haystack string, or false if needle is not found.</p>
10523
     */
10524
    public static function strripos_in_byte(string $haystack, string $needle, int $offset = 0)
10525
    {
10526
        if ($haystack === '' || $needle === '') {
10527
            return false;
10528
        }
10529
10530
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
10531
            // "mb_" is available if overload is used, so use it ...
10532
            return \mb_strripos($haystack, $needle, $offset, 'CP850'); // 8-BIT
10533
        }
10534 35
10535
        return \strripos($haystack, $needle, $offset);
10536
    }
10537
10538
    /**
10539
     * Find the position of the last occurrence of a substring in a string.
10540
     *
10541 35
     * EXAMPLE: <code>UTF8::strrpos('ABC-ÖÄÜ-中文空白-中文空白', '中'); // 13</code>
10542 4
     *
10543 4
     * @see http://php.net/manual/en/function.mb-strrpos.php
10544 4
     *
10545
     * @param string     $haystack   <p>The string being checked, for the last occurrence of needle</p>
10546
     * @param int|string $needle     <p>The string to find in haystack.<br>Or a code point as int.</p>
10547
     * @param int        $offset     [optional] <p>May be specified to begin searching an arbitrary number of characters
10548
     *                               into the string. Negative values will stop searching at an arbitrary point prior to
10549
     *                               the end of the string.
10550
     *                               </p>
10551
     * @param string     $encoding   [optional] <p>Set the charset.</p>
10552 35
     * @param bool       $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
10553 1
     *
10554
     * @psalm-pure
10555 35
     *
10556
     * @return false|int
10557 35
     *                   <p>The <strong>(int)</strong> numeric position of the last occurrence of needle in the haystack
10558 2
     *                   string.<br>If needle is not found, it returns false.</p>
10559
     */
10560
    public static function strrpos(
10561
        string $haystack,
10562 2
        $needle,
10563
        int $offset = 0,
10564
        string $encoding = 'UTF-8',
10565 34
        bool $clean_utf8 = false
10566
    ) {
10567
        if ($haystack === '') {
10568
            if (\PHP_VERSION_ID >= 80000) {
10569 34
                if ($needle === '') {
10570
                    return 0;
10571 4
                }
10572 4
            } else {
10573
                return false;
10574
            }
10575 34
        }
10576 8
10577
        // iconv and mbstring do not support integer $needle
10578
        if ((int) $needle === $needle && $needle >= 0) {
10579
            $needle = (string) self::chr($needle);
10580
        }
10581
        $needle = (string) $needle;
10582
10583 34
        if ($haystack === '') {
10584 34
            if (\PHP_VERSION_ID >= 80000 && $needle === '') {
10585 34
                return 0;
10586
            }
10587
10588 2
            return false;
10589
        }
10590
10591
        if ($needle === '' && \PHP_VERSION_ID < 80000) {
10592
            return false;
10593
        }
10594
10595
        if ($clean_utf8) {
10596
            // mb_strrpos && iconv_strrpos is not tolerant to invalid characters
10597
            $needle = self::clean($needle);
10598
            $haystack = self::clean($haystack);
10599
        }
10600
10601
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
10602
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
10603
        }
10604
10605
        //
10606
        // fallback via mbstring
10607
        //
10608
10609
        if (self::$SUPPORT['mbstring'] === true) {
10610
            if ($encoding === 'UTF-8') {
10611
                return \mb_strrpos($haystack, $needle, $offset);
10612
            }
10613
10614
            return \mb_strrpos($haystack, $needle, $offset, $encoding);
10615
        }
10616
10617
        //
10618
        // fallback for binary || ascii only
10619
        //
10620
10621
        if (
10622
            $encoding === 'CP850'
10623
            ||
10624
            $encoding === 'ASCII'
10625
        ) {
10626
            return \strrpos($haystack, $needle, $offset);
10627
        }
10628
10629
        if (
10630
            $encoding !== 'UTF-8'
10631
            &&
10632
            self::$SUPPORT['mbstring'] === false
10633
        ) {
10634
            /**
10635
             * @psalm-suppress ImpureFunctionCall - is is only a warning
10636
             */
10637
            \trigger_error('UTF8::strrpos() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
10638
        }
10639
10640
        //
10641
        // fallback via intl
10642
        //
10643
10644
        if (
10645
            $offset >= 0 // grapheme_strrpos() can't handle negative offset
10646
            &&
10647
            $encoding === 'UTF-8' // INFO: "grapheme_strrpos()" can't handle other encodings
10648
            &&
10649
            self::$SUPPORT['intl'] === true
10650
        ) {
10651
            $return_tmp = \grapheme_strrpos($haystack, $needle, $offset);
10652
            if ($return_tmp !== false) {
10653
                return $return_tmp;
10654
            }
10655
        }
10656
10657
        //
10658
        // fallback for ascii only
10659
        //
10660
10661
        if (ASCII::is_ascii($haystack . $needle)) {
10662
            return \strrpos($haystack, $needle, $offset);
10663
        }
10664
10665
        //
10666
        // fallback via vanilla php
10667
        //
10668
10669
        $haystack_tmp = null;
10670
        if ($offset > 0) {
10671
            $haystack_tmp = self::substr($haystack, $offset);
10672
        } elseif ($offset < 0) {
10673
            $haystack_tmp = self::substr($haystack, 0, $offset);
10674
            $offset = 0;
10675
        }
10676
10677
        if ($haystack_tmp !== null) {
10678
            if ($haystack_tmp === false) {
10679
                $haystack_tmp = '';
10680
            }
10681
            $haystack = (string) $haystack_tmp;
10682
        }
10683
10684
        $pos = \strrpos($haystack, $needle);
10685
        if ($pos === false) {
10686
            return false;
10687
        }
10688
10689
        /** @var false|string $str_tmp - needed for PhpStan (stubs error) */
10690
        $str_tmp = \substr($haystack, 0, $pos);
10691
        if ($str_tmp === false) {
10692
            return false;
10693 2
        }
10694
10695 2
        return $offset + (int) self::strlen($str_tmp);
10696
    }
10697
10698
    /**
10699 2
     * Find the position of the last occurrence of a substring in a string.
10700
     *
10701
     * @param string $haystack <p>
10702
     *                         The string being checked, for the last occurrence
10703
     *                         of needle.
10704 2
     *                         </p>
10705
     * @param string $needle   <p>
10706
     *                         The string to find in haystack.
10707
     *                         </p>
10708
     * @param int    $offset   [optional] <p>May be specified to begin searching an arbitrary number of characters into
10709
     *                         the string. Negative values will stop searching at an arbitrary point
10710
     *                         prior to the end of the string.
10711
     *                         </p>
10712
     *
10713
     * @psalm-pure
10714
     *
10715
     * @return false|int
10716
     *                   <p>The numeric position of the last occurrence of needle in the
10717
     *                   haystack string. If needle is not found, it returns false.</p>
10718
     */
10719
    public static function strrpos_in_byte(string $haystack, string $needle, int $offset = 0)
10720
    {
10721
        if ($haystack === '' || $needle === '') {
10722
            return false;
10723 10
        }
10724
10725
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
10726
            // "mb_" is available if overload is used, so use it ...
10727
            return \mb_strrpos($haystack, $needle, $offset, 'CP850'); // 8-BIT
10728
        }
10729
10730 10
        return \strrpos($haystack, $needle, $offset);
10731
    }
10732
10733
    /**
10734 10
     * Finds the length of the initial segment of a string consisting entirely of characters contained within a given
10735 2
     * mask.
10736 2
     *
10737
     * EXAMPLE: <code>UTF8::strspn('iñtërnâtiônàlizætiøn', 'itñ'); // '3'</code>
10738
     *
10739 2
     * @param string   $str      <p>The input string.</p>
10740
     * @param string   $mask     <p>The mask of chars</p>
10741
     * @param int      $offset   [optional]
10742
     * @param int|null $length   [optional]
10743
     * @param string   $encoding [optional] <p>Set the charset.</p>
10744
     *
10745
     * @psalm-pure
10746 10
     *
10747 2
     * @return false|int
10748
     */
10749
    public static function strspn(
10750 8
        string $str,
10751
        string $mask,
10752 8
        int $offset = 0,
10753
        int $length = null,
10754
        string $encoding = 'UTF-8'
10755
    ) {
10756
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
10757
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
10758
        }
10759
10760
        if ($offset || $length !== null) {
10761
            if ($encoding === 'UTF-8') {
10762
                if ($length === null) {
10763
                    $str = (string) \mb_substr($str, $offset);
10764
                } else {
10765
                    $str = (string) \mb_substr($str, $offset, $length);
10766
                }
10767
            } else {
10768
                $str = (string) self::substr($str, $offset, $length, $encoding);
10769
            }
10770
        }
10771
10772
        if ($str === '' || $mask === '') {
10773
            return 0;
10774
        }
10775
10776
        $matches = [];
10777
10778
        return \preg_match('/^' . self::rxClass($mask) . '+/u', $str, $matches) ? (int) self::strlen($matches[0], $encoding) : 0;
10779
    }
10780 3
10781
    /**
10782
     * Returns part of haystack string from the first occurrence of needle to the end of haystack.
10783
     *
10784
     * EXAMPLE: <code>
10785
     * $str = 'iñtërnâtiônàlizætiøn';
10786
     * $search = 'nât';
10787 3
     *
10788 2
     * UTF8::strstr($str, $search)); // 'nâtiônàlizætiøn'
10789 1
     * UTF8::strstr($str, $search, true)); // 'iñtër'
10790
     * </code>
10791
     *
10792 2
     * @param string $haystack      <p>The input string. Must be valid UTF-8.</p>
10793
     * @param string $needle        <p>The string to look for. Must be valid UTF-8.</p>
10794
     * @param bool   $before_needle [optional] <p>
10795 3
     *                              If <b>TRUE</b>, strstr() returns the part of the
10796
     *                              haystack before the first occurrence of the needle (excluding the needle).
10797
     *                              </p>
10798
     * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
10799
     * @param bool   $clean_utf8    [optional] <p>Remove non UTF-8 chars from the string.</p>
10800
     *
10801
     * @psalm-pure
10802 3
     *
10803 1
     * @return false|string
10804 1
     *                      <p>A sub-string,<br>or <strong>false</strong> if needle is not found.</p>
10805
     */
10806
    public static function strstr(
10807
        string $haystack,
10808
        string $needle,
10809
        bool $before_needle = false,
10810 3
        string $encoding = 'UTF-8',
10811 2
        bool $clean_utf8 = false
10812
    ) {
10813
        if ($haystack === '') {
10814
            if (\PHP_VERSION_ID >= 80000 && $needle === '') {
10815
                return '';
10816
            }
10817
10818 3
            return false;
10819 3
        }
10820 3
10821
        if ($clean_utf8) {
10822
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
10823 2
            // if invalid characters are found in $haystack before $needle
10824
            $needle = self::clean($needle);
10825
            $haystack = self::clean($haystack);
10826
        }
10827
10828
        if ($needle === '') {
10829
            if (\PHP_VERSION_ID >= 80000) {
10830
                return $haystack;
10831
            }
10832
10833
            return false;
10834
        }
10835
10836
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
10837
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
10838
        }
10839
10840
        //
10841
        // fallback via mbstring
10842
        //
10843
10844
        if (self::$SUPPORT['mbstring'] === true) {
10845
            if ($encoding === 'UTF-8') {
10846
                return \mb_strstr($haystack, $needle, $before_needle);
10847
            }
10848
10849
            return \mb_strstr($haystack, $needle, $before_needle, $encoding);
10850
        }
10851
10852
        //
10853
        // fallback for binary || ascii only
10854
        //
10855
10856
        if (
10857
            $encoding === 'CP850'
10858
            ||
10859
            $encoding === 'ASCII'
10860
        ) {
10861
            return \strstr($haystack, $needle, $before_needle);
10862
        }
10863
10864
        if (
10865
            $encoding !== 'UTF-8'
10866
            &&
10867
            self::$SUPPORT['mbstring'] === false
10868
        ) {
10869
            /**
10870
             * @psalm-suppress ImpureFunctionCall - is is only a warning
10871
             */
10872
            \trigger_error('UTF8::strstr() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
10873
        }
10874
10875
        //
10876
        // fallback via intl
10877
        //
10878
10879
        if (
10880
            $encoding === 'UTF-8' // INFO: "grapheme_strstr()" can't handle other encodings
10881
            &&
10882
            self::$SUPPORT['intl'] === true
10883
        ) {
10884
            $return_tmp = \grapheme_strstr($haystack, $needle, $before_needle);
10885
            if ($return_tmp !== false) {
10886
                return $return_tmp;
10887
            }
10888
        }
10889
10890
        //
10891
        // fallback for ascii only
10892
        //
10893
10894
        if (ASCII::is_ascii($haystack . $needle)) {
10895
            return \strstr($haystack, $needle, $before_needle);
10896
        }
10897
10898
        //
10899
        // fallback via vanilla php
10900
        //
10901
10902
        \preg_match('/^(.*?)' . \preg_quote($needle, '/') . '/us', $haystack, $match);
10903
10904
        if (!isset($match[1])) {
10905
            return false;
10906
        }
10907
10908
        if ($before_needle) {
10909
            return $match[1];
10910
        }
10911
10912
        return self::substr($haystack, (int) self::strlen($match[1]));
10913
    }
10914 2
10915
    /**
10916
     * Finds first occurrence of a string within another.
10917
     *
10918
     * @param string $haystack      <p>
10919 2
     *                              The string from which to get the first occurrence
10920
     *                              of needle.
10921
     *                              </p>
10922
     * @param string $needle        <p>
10923 2
     *                              The string to find in haystack.
10924
     *                              </p>
10925
     * @param bool   $before_needle [optional] <p>
10926
     *                              Determines which portion of haystack
10927
     *                              this function returns.
10928 2
     *                              If set to true, it returns all of haystack
10929
     *                              from the beginning to the first occurrence of needle.
10930
     *                              If set to false, it returns all of haystack
10931
     *                              from the first occurrence of needle to the end,
10932
     *                              </p>
10933
     *
10934
     * @psalm-pure
10935
     *
10936
     * @return false|string
10937
     *                      <p>The portion of haystack,
10938
     *                      or false if needle is not found.</p>
10939
     */
10940
    public static function strstr_in_byte(
10941
        string $haystack,
10942
        string $needle,
10943
        bool $before_needle = false
10944
    ) {
10945
        if ($haystack === '' || $needle === '') {
10946
            return false;
10947
        }
10948
10949
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
10950
            // "mb_" is available if overload is used, so use it ...
10951
            return \mb_strstr($haystack, $needle, $before_needle, 'CP850'); // 8-BIT
10952
        }
10953 32
10954
        return \strstr($haystack, $needle, $before_needle);
10955
    }
10956
10957
    /**
10958
     * Unicode transformation for case-less matching.
10959
     *
10960
     * EXAMPLE: <code>UTF8::strtocasefold('ǰ◌̱'); // 'ǰ◌̱'</code>
10961 32
     *
10962 7
     * @see http://unicode.org/reports/tr21/tr21-5.html
10963
     *
10964
     * @param string      $str        <p>The input string.</p>
10965 31
     * @param bool        $full       [optional] <p>
10966
     *                                <b>true</b>, replace full case folding chars (default)<br>
10967
     *                                <b>false</b>, use only limited static array [UTF8::$COMMON_CASE_FOLD]
10968 2
     *                                </p>
10969
     * @param bool        $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
10970
     * @param string      $encoding   [optional] <p>Set the charset.</p>
10971 31
     * @param string|null $lang       [optional] <p>Set the language for special cases: az, el, lt, tr</p>
10972
     * @param bool        $lower      [optional] <p>Use lowercase string, otherwise use uppercase string. PS: uppercase
10973 31
     *                                is for some languages better ...</p>
10974 31
     *
10975 2
     * @psalm-pure
10976
     *
10977
     * @return string
10978 29
     */
10979
    public static function strtocasefold(
10980
        string $str,
10981 2
        bool $full = true,
10982
        bool $clean_utf8 = false,
10983
        string $encoding = 'UTF-8',
10984
        string $lang = null,
10985 2
        bool $lower = true
10986
    ): string {
10987
        if ($str === '') {
10988
            return '';
10989
        }
10990
10991
        if ($clean_utf8) {
10992
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
10993
            // if invalid characters are found in $haystack before $needle
10994
            $str = self::clean($str);
10995
        }
10996
10997
        $str = self::fixStrCaseHelper($str, $lower, $full);
10998
10999
        if ($lang === null && $encoding === 'UTF-8') {
11000
            if ($lower) {
11001
                return \mb_strtolower($str);
11002
            }
11003
11004
            return \mb_strtoupper($str);
11005
        }
11006
11007
        if ($lower) {
11008 73
            return self::strtolower($str, $encoding, false, $lang);
11009
        }
11010
11011
        return self::strtoupper($str, $encoding, false, $lang);
11012
    }
11013
11014
    /**
11015
     * Make a string lowercase.
11016 73
     *
11017
     * EXAMPLE: <code>UTF8::strtolower('DÉJÀ Σσς Iıİi'); // 'déjà σσς iıii'</code>
11018 73
     *
11019 1
     * @see http://php.net/manual/en/function.mb-strtolower.php
11020
     *
11021
     * @param string      $str                           <p>The string being lowercased.</p>
11022 72
     * @param string      $encoding                      [optional] <p>Set the charset for e.g. "mb_" function</p>
11023
     * @param bool        $clean_utf8                    [optional] <p>Remove non UTF-8 chars from the string.</p>
11024
     * @param string|null $lang                          [optional] <p>Set the language for special cases: az, el, lt,
11025 2
     *                                                   tr</p>
11026
     * @param bool        $try_to_keep_the_string_length [optional] <p>true === try to keep the string length: e.g. ẞ
11027
     *                                                   -> ß</p>
11028
     *
11029 72
     * @psalm-pure
11030
     *
11031
     * @return string
11032
     *                <p>String with all alphabetic characters converted to lowercase.</p>
11033 72
     */
11034 13
    public static function strtolower(
11035
        $str,
11036
        string $encoding = 'UTF-8',
11037 61
        bool $clean_utf8 = false,
11038
        string $lang = null,
11039 61
        bool $try_to_keep_the_string_length = false
11040 2
    ): string {
11041 2
        // init
11042
        $str = (string) $str;
11043
11044
        if ($str === '') {
11045 2
            return '';
11046 2
        }
11047
11048
        if ($clean_utf8) {
11049
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
11050
            // if invalid characters are found in $haystack before $needle
11051
            $str = self::clean($str);
11052
        }
11053
11054
        // hack for old php version or for the polyfill ...
11055 2
        if ($try_to_keep_the_string_length) {
11056
            $str = self::fixStrCaseHelper($str, true);
11057
        }
11058
11059
        if ($lang === null && $encoding === 'UTF-8') {
11060
            return \mb_strtolower($str);
11061
        }
11062
11063
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
11064
11065 61
        if ($lang !== null) {
11066
            if (self::$SUPPORT['intl'] === true) {
11067
                if (self::$INTL_TRANSLITERATOR_LIST === null) {
11068
                    self::$INTL_TRANSLITERATOR_LIST = self::getData('transliterator_list');
11069
                }
11070
11071
                $language_code = $lang . '-Lower';
11072
                if (!\in_array($language_code, self::$INTL_TRANSLITERATOR_LIST, true)) {
11073
                    /**
11074
                     * @psalm-suppress ImpureFunctionCall - is is only a warning
11075
                     */
11076
                    \trigger_error('UTF8::strtolower() cannot handle special language: ' . $lang . ' | supported: ' . \print_r(self::$INTL_TRANSLITERATOR_LIST, true), \E_USER_WARNING);
0 ignored issues
show
Bug introduced by
Are you sure print_r(self::INTL_TRANSLITERATOR_LIST, true) of type string|true can be used in concatenation? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

11076
                    \trigger_error('UTF8::strtolower() cannot handle special language: ' . $lang . ' | supported: ' . /** @scrutinizer ignore-type */ \print_r(self::$INTL_TRANSLITERATOR_LIST, true), \E_USER_WARNING);
Loading history...
11077
11078
                    $language_code = 'Any-Lower';
11079
                }
11080
11081
                return (string) \transliterator_transliterate($language_code, $str);
11082
            }
11083
11084
            /**
11085
             * @psalm-suppress ImpureFunctionCall - is is only a warning
11086
             */
11087
            \trigger_error('UTF8::strtolower() without intl cannot handle the "lang" parameter: ' . $lang, \E_USER_WARNING);
11088 17
        }
11089
11090
        // always fallback via symfony polyfill
11091
        return \mb_strtolower($str, $encoding);
11092
    }
11093
11094
    /**
11095
     * Make a string uppercase.
11096 17
     *
11097
     * EXAMPLE: <code>UTF8::strtoupper('Déjà Σσς Iıİi'); // 'DÉJÀ ΣΣΣ IIİI'</code>
11098 17
     *
11099 1
     * @see http://php.net/manual/en/function.mb-strtoupper.php
11100
     *
11101
     * @param string      $str                           <p>The string being uppercased.</p>
11102 16
     * @param string      $encoding                      [optional] <p>Set the charset.</p>
11103
     * @param bool        $clean_utf8                    [optional] <p>Remove non UTF-8 chars from the string.</p>
11104
     * @param string|null $lang                          [optional] <p>Set the language for special cases: az, el, lt,
11105 2
     *                                                   tr</p>
11106
     * @param bool        $try_to_keep_the_string_length [optional] <p>true === try to keep the string length: e.g. ẞ
11107
     *                                                   -> ß</p>
11108
     *
11109 16
     * @psalm-pure
11110 2
     *
11111
     * @return string
11112
     *                <p>String with all alphabetic characters converted to uppercase.</p>
11113 16
     */
11114 8
    public static function strtoupper(
11115
        $str,
11116
        string $encoding = 'UTF-8',
11117 10
        bool $clean_utf8 = false,
11118
        string $lang = null,
11119 10
        bool $try_to_keep_the_string_length = false
11120 2
    ): string {
11121 2
        // init
11122
        $str = (string) $str;
11123
11124
        if ($str === '') {
11125 2
            return '';
11126 2
        }
11127
11128
        if ($clean_utf8) {
11129
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
11130
            // if invalid characters are found in $haystack before $needle
11131
            $str = self::clean($str);
11132
        }
11133
11134
        // hack for old php version or for the polyfill ...
11135 2
        if ($try_to_keep_the_string_length) {
11136
            $str = self::fixStrCaseHelper($str);
11137
        }
11138
11139
        if ($lang === null && $encoding === 'UTF-8') {
11140
            return \mb_strtoupper($str);
11141
        }
11142
11143
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
11144
11145 10
        if ($lang !== null) {
11146
            if (self::$SUPPORT['intl'] === true) {
11147
                if (self::$INTL_TRANSLITERATOR_LIST === null) {
11148
                    self::$INTL_TRANSLITERATOR_LIST = self::getData('transliterator_list');
11149
                }
11150
11151
                $language_code = $lang . '-Upper';
11152
                if (!\in_array($language_code, self::$INTL_TRANSLITERATOR_LIST, true)) {
11153
                    /**
11154
                     * @psalm-suppress ImpureFunctionCall - is is only a warning
11155
                     */
11156
                    \trigger_error('UTF8::strtoupper() without intl for special language: ' . $lang, \E_USER_WARNING);
11157
11158
                    $language_code = 'Any-Upper';
11159
                }
11160
11161
                return (string) \transliterator_transliterate($language_code, $str);
11162
            }
11163
11164
            /**
11165
             * @psalm-suppress ImpureFunctionCall - is is only a warning
11166
             */
11167
            \trigger_error('UTF8::strtolower() without intl cannot handle the "lang"-parameter: ' . $lang, \E_USER_WARNING);
11168
        }
11169
11170
        // always fallback via symfony polyfill
11171
        return \mb_strtoupper($str, $encoding);
11172 2
    }
11173
11174 2
    /**
11175
     * Translate characters or replace sub-strings.
11176
     *
11177
     * EXAMPLE:
11178 2
     * <code>
11179
     * $array = [
11180
     *     'Hello'   => '○●◎',
11181
     *     '中文空白' => 'earth',
11182 2
     * ];
11183 2
     * UTF8::strtr('Hello 中文空白', $array); // '○●◎ earth'
11184 2
     * </code>
11185
     *
11186
     * @see http://php.net/manual/en/function.strtr.php
11187 2
     *
11188 2
     * @param string          $str  <p>The string being translated.</p>
11189
     * @param string|string[] $from <p>The string replacing from.</p>
11190
     * @param string|string[] $to   [optional] <p>The string being translated to to.</p>
11191 2
     *
11192 2
     * @psalm-pure
11193
     *
11194 2
     * @return string
11195 2
     *                <p>This function returns a copy of str, translating all occurrences of each character in "from"
11196 2
     *                to the corresponding character in "to".</p>
11197 2
     */
11198 2
    public static function strtr(string $str, $from, $to = ''): string
11199
    {
11200
        if ($str === '') {
11201
            return '';
11202 2
        }
11203 2
11204
        if ($from === $to) {
11205
            return $str;
11206
        }
11207
11208 2
        if ($to !== '') {
11209 2
            if (!\is_array($from)) {
11210
                $from = self::str_split($from);
11211
            }
11212 2
11213
            if (!\is_array($to)) {
11214
                $to = self::str_split($to);
11215
            }
11216
11217
            $count_from = \count($from);
11218
            $count_to = \count($to);
11219
11220
            if ($count_from !== $count_to) {
11221
                if ($count_from > $count_to) {
11222
                    $from = \array_slice($from, 0, $count_to);
11223
                } elseif ($count_from < $count_to) {
11224
                    $to = \array_slice($to, 0, $count_from);
11225
                }
11226
            }
11227
11228
            $from = \array_combine($from, $to);
11229
            if ($from === false) {
11230 2
                throw new \InvalidArgumentException('The number of elements for each array isn\'t equal or the arrays are empty: (from: ' . \print_r($from, true) . ' | to: ' . \print_r($to, true) . ')');
0 ignored issues
show
Bug introduced by
Are you sure print_r($to, true) of type string|true can be used in concatenation? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

11230
                throw new \InvalidArgumentException('The number of elements for each array isn\'t equal or the arrays are empty: (from: ' . \print_r($from, true) . ' | to: ' . /** @scrutinizer ignore-type */ \print_r($to, true) . ')');
Loading history...
Bug introduced by
Are you sure print_r($from, true) of type string|true can be used in concatenation? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

11230
                throw new \InvalidArgumentException('The number of elements for each array isn\'t equal or the arrays are empty: (from: ' . /** @scrutinizer ignore-type */ \print_r($from, true) . ' | to: ' . \print_r($to, true) . ')');
Loading history...
11231
            }
11232
        }
11233
11234
        if (\is_string($from)) {
11235 2
            return \str_replace($from, $to, $str);
0 ignored issues
show
Bug introduced by
It seems like $to can also be of type array<mixed,string[]>; however, parameter $replace of str_replace() does only seem to accept string|string[], maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

11235
            return \str_replace($from, /** @scrutinizer ignore-type */ $to, $str);
Loading history...
11236 2
        }
11237
11238
        return \strtr($str, $from);
11239 2
    }
11240 2
11241
    /**
11242
     * Return the width of a string.
11243 2
     *
11244
     * INFO: use UTF8::strlen() for the byte-length
11245
     *
11246 2
     * EXAMPLE: <code>UTF8::strwidth("Iñtërnâtiôn\xE9àlizætiøn")); // 21</code>
11247
     *
11248
     * @param string $str        <p>The input string.</p>
11249
     * @param string $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
11250
     * @param bool   $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
11251
     *
11252
     * @psalm-pure
11253 2
     *
11254 2
     * @return int
11255 2
     */
11256
    public static function strwidth(
11257
        string $str,
11258
        string $encoding = 'UTF-8',
11259
        bool $clean_utf8 = false
11260
    ): int {
11261
        if ($str === '') {
11262
            return 0;
11263
        }
11264
11265
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
11266
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
11267
        }
11268
11269
        if ($clean_utf8) {
11270
            // iconv and mbstring are not tolerant to invalid encoding
11271
            // further, their behaviour is inconsistent with that of PHP's substr
11272
            $str = self::clean($str);
11273
        }
11274
11275
        //
11276
        // fallback via mbstring
11277
        //
11278
11279
        if (self::$SUPPORT['mbstring'] === true) {
11280
            if ($encoding === 'UTF-8') {
11281
                return \mb_strwidth($str);
11282
            }
11283
11284
            return \mb_strwidth($str, $encoding);
11285
        }
11286
11287
        //
11288
        // fallback via vanilla php
11289
        //
11290
11291
        if ($encoding !== 'UTF-8') {
11292
            $str = self::encode('UTF-8', $str, false, $encoding);
11293
        }
11294
11295 172
        $wide = 0;
11296
        $str = (string) \preg_replace('/[\x{1100}-\x{115F}\x{2329}\x{232A}\x{2E80}-\x{303E}\x{3040}-\x{A4CF}\x{AC00}-\x{D7A3}\x{F900}-\x{FAFF}\x{FE10}-\x{FE19}\x{FE30}-\x{FE6F}\x{FF00}-\x{FF60}\x{FFE0}-\x{FFE6}\x{20000}-\x{2FFFD}\x{30000}-\x{3FFFD}]/u', '', $str, -1, $wide);
11297
11298
        return ($wide << 1) + (int) self::strlen($str);
11299
    }
11300
11301
    /**
11302
     * Get part of a string.
11303 172
     *
11304 8
     * EXAMPLE: <code>UTF8::substr('中文空白', 1, 2); // '文空'</code>
11305
     *
11306
     * @see http://php.net/manual/en/function.mb-substr.php
11307 168
     *
11308
     * @param string   $str        <p>The string being checked.</p>
11309
     * @param int      $offset     <p>The first position used in str.</p>
11310 2
     * @param int|null $length     [optional] <p>The maximum length of the returned string.</p>
11311
     * @param string   $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
11312
     * @param bool     $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
11313
     *
11314 168
     * @psalm-pure
11315 7
     *
11316
     * @return false|string
11317
     *                      The portion of <i>str</i> specified by the <i>offset</i> and
11318 163
     *                      <i>length</i> parameters.</p><p>If <i>str</i> is shorter than <i>offset</i>
11319 19
     *                      characters long, <b>FALSE</b> will be returned.
11320
     */
11321
    public static function substr(
11322
        string $str,
11323
        int $offset = 0,
11324
        int $length = null,
11325
        string $encoding = 'UTF-8',
11326 163
        bool $clean_utf8 = false
11327 161
    ) {
11328 64
        // empty string
11329
        if ($str === '' || $length === 0) {
11330
            return '';
11331 102
        }
11332
11333
        if ($clean_utf8) {
11334
            // iconv and mbstring are not tolerant to invalid encoding
11335
            // further, their behaviour is inconsistent with that of PHP's substr
11336
            $str = self::clean($str);
11337
        }
11338
11339 4
        // whole string
11340
        if (!$offset && $length === null) {
11341 4
            return $str;
11342
        }
11343
11344
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
11345
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
11346
        }
11347
11348
        //
11349
        // fallback via mbstring
11350
        //
11351 4
11352 4
        if (self::$SUPPORT['mbstring'] === true && $encoding === 'UTF-8') {
11353 4
            if ($length === null) {
11354
                return \mb_substr($str, $offset);
11355
            }
11356
11357 4
            return \mb_substr($str, $offset, $length);
11358
        }
11359
11360
        //
11361
        // fallback for binary || ascii only
11362 4
        //
11363
11364
        if (
11365
            $encoding === 'CP850'
11366
            ||
11367 4
            $encoding === 'ASCII'
11368
        ) {
11369
            if ($length === null) {
11370
                return \substr($str, $offset);
11371 4
            }
11372
11373
            return \substr($str, $offset, $length);
11374 4
        }
11375
11376 4
        // otherwise we need the string-length
11377
        $str_length = 0;
11378
        if ($offset || $length === null) {
11379
            $str_length = self::strlen($str, $encoding);
11380
        }
11381 2
11382
        // e.g.: invalid chars + mbstring not installed
11383
        if ($str_length === false) {
11384
            return false;
11385
        }
11386
11387
        // empty string
11388
        if ($offset === $str_length && !$length) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $length of type integer|null is loosely compared to false; this is ambiguous if the integer can be 0. You might want to explicitly use === null instead.

In PHP, under loose comparison (like ==, or !=, or switch conditions), values of different types might be equal.

For integer values, zero is a special case, in particular the following results might be unexpected:

0   == false // true
0   == null  // true
123 == false // false
123 == null  // false

// It is often better to use strict comparison
0 === false // false
0 === null  // false
Loading history...
11389 4
            return '';
11390
        }
11391 4
11392
        // impossible
11393 4
        if ($offset && $offset > $str_length) {
11394
            return '';
11395
        }
11396
11397
        $length = $length ?? $str_length;
11398
11399
        if (
11400
            $encoding !== 'UTF-8'
11401
            &&
11402
            self::$SUPPORT['mbstring'] === false
11403
        ) {
11404
            /**
11405
             * @psalm-suppress ImpureFunctionCall - is is only a warning
11406 4
             */
11407
            \trigger_error('UTF8::substr() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
11408 4
        }
11409
11410
        //
11411
        // fallback via intl
11412
        //
11413
11414
        if (
11415
            $encoding === 'UTF-8' // INFO: "grapheme_substr()" can't handle other encodings
11416
            &&
11417
            $offset >= 0 // grapheme_substr() can't handle negative offset
11418
            &&
11419
            self::$SUPPORT['intl'] === true
11420 4
        ) {
11421
            $return_tmp = \grapheme_substr($str, $offset, $length);
11422
            if ($return_tmp !== false) {
11423
                return $return_tmp;
11424
            }
11425
        }
11426
11427
        //
11428
        // fallback via iconv
11429
        //
11430
11431 4
        if (
11432
            $length >= 0 // "iconv_substr()" can't handle negative length
11433
            &&
11434
            self::$SUPPORT['iconv'] === true
11435
        ) {
11436
            $return_tmp = \iconv_substr($str, $offset, $length);
11437
            if ($return_tmp !== false) {
11438
                return $return_tmp;
11439
            }
11440
        }
11441
11442
        //
11443
        // fallback for ascii only
11444
        //
11445
11446
        if (ASCII::is_ascii($str)) {
11447
            return \substr($str, $offset, $length);
11448
        }
11449
11450
        //
11451
        // fallback via vanilla php
11452
        //
11453
11454
        // split to array, and remove invalid characters
11455
        // &&
11456
        // extract relevant part, and join to make sting again
11457
        return \implode('', \array_slice(self::str_split($str), $offset, $length));
11458
    }
11459
11460
    /**
11461 2
     * Binary-safe comparison of two strings from an offset, up to a length of characters.
11462
     *
11463
     * EXAMPLE: <code>
11464
     * UTF8::substr_compare("○●◎\r", '●◎', 0, 2); // -1
11465
     * UTF8::substr_compare("○●◎\r", '◎●', 1, 2); // 1
11466
     * UTF8::substr_compare("○●◎\r", '●◎', 1, 2); // 0
11467
     * </code>
11468
     *
11469
     * @param string   $str1               <p>The main string being compared.</p>
11470 2
     * @param string   $str2               <p>The secondary string being compared.</p>
11471
     * @param int      $offset             [optional] <p>The start position for the comparison. If negative, it starts
11472 2
     *                                     counting from the end of the string.</p>
11473
     * @param int|null $length             [optional] <p>The length of the comparison. The default value is the largest
11474 2
     *                                     of the length of the str compared to the length of main_str less the
11475 2
     *                                     offset.</p>
11476 2
     * @param bool     $case_insensitivity [optional] <p>If case_insensitivity is TRUE, comparison is case
11477
     *                                     insensitive.</p>
11478 2
     * @param string   $encoding           [optional] <p>Set the charset for e.g. "mb_" function</p>
11479
     *
11480 2
     * @psalm-pure
11481
     *
11482
     * @return int
11483
     *             <strong>&lt; 0</strong> if str1 is less than str2;<br>
11484
     *             <strong>&gt; 0</strong> if str1 is greater than str2,<br>
11485
     *             <strong>0</strong> if they are equal
11486
     */
11487
    public static function substr_compare(
11488
        string $str1,
11489 2
        string $str2,
11490 2
        int $offset = 0,
11491
        int $length = null,
11492
        bool $case_insensitivity = false,
11493 2
        string $encoding = 'UTF-8'
11494
    ): int {
11495
        if (
11496
            $offset !== 0
11497
            ||
11498
            $length !== null
11499
        ) {
11500
            if ($encoding === 'UTF-8') {
11501
                if ($length === null) {
11502
                    $str1 = (string) \mb_substr($str1, $offset);
11503
                } else {
11504
                    $str1 = (string) \mb_substr($str1, $offset, $length);
11505
                }
11506
                $str2 = (string) \mb_substr($str2, 0, (int) self::strlen($str1));
11507
            } else {
11508
                $encoding = self::normalize_encoding($encoding, 'UTF-8');
11509
11510
                $str1 = (string) self::substr($str1, $offset, $length, $encoding);
11511
                $str2 = (string) self::substr($str2, 0, (int) self::strlen($str1), $encoding);
11512
            }
11513
        }
11514
11515
        if ($case_insensitivity) {
11516
            return self::strcasecmp($str1, $str2, $encoding);
11517
        }
11518
11519 5
        return self::strcmp($str1, $str2);
11520
    }
11521
11522
    /**
11523
     * Count the number of substring occurrences.
11524
     *
11525
     * EXAMPLE: <code>UTF8::substr_count('中文空白', '文空', 1, 2); // 1</code>
11526
     *
11527 5
     * @see http://php.net/manual/en/function.substr-count.php
11528 2
     *
11529
     * @param string   $haystack   <p>The string to search in.</p>
11530
     * @param string   $needle     <p>The substring to search for.</p>
11531 5
     * @param int      $offset     [optional] <p>The offset where to start counting.</p>
11532 2
     * @param int|null $length     [optional] <p>
11533 2
     *                             The maximum length after the specified offset to search for the
11534
     *                             substring. It outputs a warning if the offset plus the length is
11535
     *                             greater than the haystack length.
11536
     *                             </p>
11537
     * @param string   $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
11538
     * @param bool     $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
11539 5
     *
11540 2
     * @psalm-pure
11541
     *
11542
     * @return false|int
11543 5
     *                   <p>This functions returns an integer or false if there isn't a string.</p>
11544 2
     */
11545
    public static function substr_count(
11546
        string $haystack,
11547 5
        string $needle,
11548
        int $offset = 0,
11549
        int $length = null,
11550
        string $encoding = 'UTF-8',
11551
        bool $clean_utf8 = false
11552
    ) {
11553
        if ($needle === '') {
11554 5
            return false;
11555 2
        }
11556 2
11557 2
        if ($haystack === '') {
11558
            if (\PHP_VERSION_ID >= 80000) {
11559
                return 0;
11560 2
            }
11561
11562
            return 0;
11563 2
        }
11564 2
11565
        if ($length === 0) {
11566 2
            return 0;
11567
        }
11568
11569
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
11570
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
11571 5
        }
11572
11573 5
        if ($clean_utf8) {
11574
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
11575
            // if invalid characters are found in $haystack before $needle
11576
            $needle = self::clean($needle);
11577
            $haystack = self::clean($haystack);
11578
        }
11579
11580
        if ($offset || $length > 0) {
11581 5
            if ($length === null) {
11582 5
                $length_tmp = self::strlen($haystack, $encoding);
11583 5
                if ($length_tmp === false) {
11584
                    return false;
11585
                }
11586 2
                $length = $length_tmp;
11587
            }
11588
11589
            if ($encoding === 'UTF-8') {
11590
                $haystack = (string) \mb_substr($haystack, $offset, $length);
11591
            } else {
11592
                $haystack = (string) \mb_substr($haystack, $offset, $length, $encoding);
11593
            }
11594
        }
11595
11596
        if (
11597
            $encoding !== 'UTF-8'
11598
            &&
11599
            self::$SUPPORT['mbstring'] === false
11600
        ) {
11601
            /**
11602
             * @psalm-suppress ImpureFunctionCall - is is only a warning
11603
             */
11604
            \trigger_error('UTF8::substr_count() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
11605
        }
11606
11607
        if (self::$SUPPORT['mbstring'] === true) {
11608
            if ($encoding === 'UTF-8') {
11609
                return \mb_substr_count($haystack, $needle);
11610
            }
11611
11612
            return \mb_substr_count($haystack, $needle, $encoding);
11613
        }
11614
11615
        \preg_match_all('/' . \preg_quote($needle, '/') . '/us', $haystack, $matches, \PREG_SET_ORDER);
11616
11617
        return \count($matches);
11618
    }
11619 4
11620
    /**
11621
     * Count the number of substring occurrences.
11622
     *
11623
     * @param string   $haystack <p>
11624
     *                           The string being checked.
11625 4
     *                           </p>
11626 1
     * @param string   $needle   <p>
11627
     *                           The string being found.
11628
     *                           </p>
11629
     * @param int      $offset   [optional] <p>
11630 3
     *                           The offset where to start counting
11631
     *                           </p>
11632 3
     * @param int|null $length   [optional] <p>
11633
     *                           The maximum length after the specified offset to search for the
11634
     *                           substring. It outputs a warning if the offset plus the length is
11635
     *                           greater than the haystack length.
11636
     *                           </p>
11637
     *
11638
     * @psalm-pure
11639
     *
11640
     * @return false|int
11641
     *                   <p>The number of times the
11642
     *                   needle substring occurs in the
11643
     *                   haystack string.</p>
11644
     */
11645
    public static function substr_count_in_byte(
11646
        string $haystack,
11647
        string $needle,
11648
        int $offset = 0,
11649
        int $length = null
11650
    ) {
11651
        if ($haystack === '' || $needle === '') {
11652
            return 0;
11653
        }
11654
11655
        if (
11656
            ($offset || $length !== null)
11657
            &&
11658
            self::$SUPPORT['mbstring_func_overload'] === true
11659
        ) {
11660
            if ($length === null) {
11661
                $length_tmp = self::strlen($haystack);
11662
                if ($length_tmp === false) {
11663
                    return false;
11664 3
                }
11665
                $length = $length_tmp;
11666
            }
11667
11668
            if (
11669 3
                (
11670 3
                    $length !== 0
11671
                    &&
11672
                    $offset !== 0
11673
                )
11674
                &&
11675
                ($length + $offset) <= 0
11676
                &&
11677
                \PHP_VERSION_ID < 71000 // output from "substr_count()" have changed in PHP 7.1
11678
            ) {
11679
                return false;
11680
            }
11681
11682
            /** @var false|string $haystack_tmp - needed for PhpStan (stubs error) */
11683
            $haystack_tmp = \substr($haystack, $offset, $length);
11684
            if ($haystack_tmp === false) {
11685
                $haystack_tmp = '';
11686
            }
11687
            $haystack = (string) $haystack_tmp;
11688
        }
11689
11690 15
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
11691
            // "mb_" is available if overload is used, so use it ...
11692
            return \mb_substr_count($haystack, $needle, 'CP850'); // 8-BIT
11693
        }
11694
11695
        if ($length === null) {
11696 15
            return \substr_count($haystack, $needle, $offset);
11697 2
        }
11698
11699
        return \substr_count($haystack, $needle, $offset, $length);
11700 13
    }
11701 7
11702
    /**
11703
     * Returns the number of occurrences of $substring in the given string.
11704
     * By default, the comparison is case-sensitive, but can be made insensitive
11705 7
     * by setting $case_sensitive to false.
11706 7
     *
11707 7
     * @param string $str            <p>The input string.</p>
11708
     * @param string $substring      <p>The substring to search for.</p>
11709
     * @param bool   $case_sensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
11710
     * @param string $encoding       [optional] <p>Set the charset for e.g. "mb_" function</p>
11711 6
     *
11712
     * @psalm-pure
11713 6
     *
11714 3
     * @return int
11715
     */
11716
    public static function substr_count_simple(
11717 3
        string $str,
11718 3
        string $substring,
11719 3
        bool $case_sensitive = true,
11720 3
        string $encoding = 'UTF-8'
11721
    ): int {
11722
        if ($str === '' || $substring === '') {
11723
            return 0;
11724
        }
11725
11726
        if ($encoding === 'UTF-8') {
11727
            if ($case_sensitive) {
11728
                return (int) \mb_substr_count($str, $substring);
11729
            }
11730
11731
            return (int) \mb_substr_count(
11732
                \mb_strtoupper($str),
11733
                \mb_strtoupper($substring)
11734
            );
11735
        }
11736
11737
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
11738
11739
        if ($case_sensitive) {
11740 2
            return (int) \mb_substr_count($str, $substring, $encoding);
11741
        }
11742 2
11743 2
        return (int) \mb_substr_count(
11744
            self::strtocasefold($str, true, false, $encoding, null, false),
11745
            self::strtocasefold($substring, true, false, $encoding, null, false),
11746 2
            $encoding
11747 2
        );
11748
    }
11749
11750 2
    /**
11751 2
     * Removes a prefix ($needle) from the beginning of the string ($haystack), case-insensitive.
11752
     *
11753
     * EXMAPLE: <code>
11754 2
     * UTF8::substr_ileft('ΚόσμεMiddleEnd', 'Κόσμε'); // 'MiddleEnd'
11755
     * UTF8::substr_ileft('ΚόσμεMiddleEnd', 'κόσμε'); // 'MiddleEnd'
11756
     * </code>
11757
     *
11758
     * @param string $haystack <p>The string to search in.</p>
11759
     * @param string $needle   <p>The substring to search for.</p>
11760
     *
11761
     * @psalm-pure
11762
     *
11763
     * @return string
11764
     *                <p>Return the sub-string.</p>
11765
     */
11766
    public static function substr_ileft(string $haystack, string $needle): string
11767
    {
11768
        if ($haystack === '') {
11769
            return '';
11770
        }
11771 1
11772
        if ($needle === '') {
11773
            return $haystack;
11774 1
        }
11775
11776
        if (self::str_istarts_with($haystack, $needle)) {
11777
            $haystack = (string) \mb_substr($haystack, (int) self::strlen($needle));
11778
        }
11779 1
11780
        return $haystack;
11781
    }
11782
11783 1
    /**
11784
     * Get part of a string process in bytes.
11785
     *
11786
     * @param string   $str    <p>The string being checked.</p>
11787
     * @param int      $offset <p>The first position used in str.</p>
11788 1
     * @param int|null $length [optional] <p>The maximum length of the returned string.</p>
11789
     *
11790
     * @psalm-pure
11791
     *
11792
     * @return false|string
11793
     *                      The portion of <i>str</i> specified by the <i>offset</i> and
11794
     *                      <i>length</i> parameters.</p><p>If <i>str</i> is shorter than <i>offset</i>
11795
     *                      characters long, <b>FALSE</b> will be returned.
11796
     */
11797
    public static function substr_in_byte(string $str, int $offset = 0, int $length = null)
11798
    {
11799
        // empty string
11800
        if ($str === '' || $length === 0) {
11801
            return '';
11802
        }
11803
11804
        // whole string
11805
        if (!$offset && $length === null) {
11806
            return $str;
11807 2
        }
11808
11809 2
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
11810 2
            // "mb_" is available if overload is used, so use it ...
11811
            return \mb_substr($str, $offset, $length, 'CP850'); // 8-BIT
11812
        }
11813 2
11814 2
        return \substr($str, $offset, $length ?? 2147483647);
11815
    }
11816
11817 2
    /**
11818 2
     * Removes a suffix ($needle) from the end of the string ($haystack), case-insensitive.
11819
     *
11820
     * EXAMPLE: <code>
11821 2
     * UTF8::substr_iright('BeginMiddleΚόσμε', 'Κόσμε'); // 'BeginMiddle'
11822
     * UTF8::substr_iright('BeginMiddleΚόσμε', 'κόσμε'); // 'BeginMiddle'
11823
     * </code>
11824
     *
11825
     * @param string $haystack <p>The string to search in.</p>
11826
     * @param string $needle   <p>The substring to search for.</p>
11827
     *
11828
     * @psalm-pure
11829
     *
11830
     * @return string
11831
     *                <p>Return the sub-string.<p>
11832
     */
11833
    public static function substr_iright(string $haystack, string $needle): string
11834
    {
11835
        if ($haystack === '') {
11836
            return '';
11837
        }
11838
11839
        if ($needle === '') {
11840 2
            return $haystack;
11841
        }
11842 2
11843 2
        if (self::str_iends_with($haystack, $needle)) {
11844
            $haystack = (string) \mb_substr($haystack, 0, (int) self::strlen($haystack) - (int) self::strlen($needle));
11845
        }
11846 2
11847 2
        return $haystack;
11848
    }
11849
11850 2
    /**
11851 2
     * Removes a prefix ($needle) from the beginning of the string ($haystack).
11852
     *
11853
     * EXAMPLE: <code>
11854 2
     * UTF8::substr_left('ΚόσμεMiddleEnd', 'Κόσμε'); // 'MiddleEnd'
11855
     * UTF8::substr_left('ΚόσμεMiddleEnd', 'κόσμε'); // 'ΚόσμεMiddleEnd'
11856
     * </code>
11857
     *
11858
     * @param string $haystack <p>The string to search in.</p>
11859
     * @param string $needle   <p>The substring to search for.</p>
11860
     *
11861
     * @psalm-pure
11862
     *
11863
     * @return string
11864
     *                <p>Return the sub-string.</p>
11865
     */
11866
    public static function substr_left(string $haystack, string $needle): string
11867
    {
11868
        if ($haystack === '') {
11869
            return '';
11870
        }
11871
11872
        if ($needle === '') {
11873
            return $haystack;
11874
        }
11875
11876
        if (self::str_starts_with($haystack, $needle)) {
11877
            $haystack = (string) \mb_substr($haystack, (int) self::strlen($needle));
11878
        }
11879
11880
        return $haystack;
11881
    }
11882
11883
    /**
11884
     * Replace text within a portion of a string.
11885
     *
11886
     * EXAMPLE: <code>UTF8::substr_replace(array('Iñtërnâtiônàlizætiøn', 'foo'), 'æ', 1); // array('Iæñtërnâtiônàlizætiøn', 'fæoo')</code>
11887
     *
11888
     * source: https://gist.github.com/stemar/8287074
11889
     *
11890
     * @param string|string[] $str         <p>The input string or an array of stings.</p>
11891 10
     * @param string|string[] $replacement <p>The replacement string or an array of stings.</p>
11892
     * @param int|int[]       $offset      <p>
11893
     *                                     If start is positive, the replacing will begin at the start'th offset
11894
     *                                     into string.
11895
     *                                     <br><br>
11896
     *                                     If start is negative, the replacing will begin at the start'th character
11897
     *                                     from the end of string.
11898 10
     *                                     </p>
11899 1
     * @param int|int[]|null  $length      [optional] <p>If given and is positive, it represents the length of the
11900
     *                                     portion of string which is to be replaced. If it is negative, it
11901
     *                                     represents the number of characters from the end of string at which to
11902 1
     *                                     stop replacing. If it is not given, then it will default to strlen(
11903 1
     *                                     string ); i.e. end the replacing at the end of string. Of course, if
11904
     *                                     length is zero then this function will have the effect of inserting
11905 1
     *                                     replacement into string at the given start offset.</p>
11906
     * @param string          $encoding    [optional] <p>Set the charset for e.g. "mb_" function</p>
11907
     *
11908
     * @psalm-pure
11909 1
     *
11910 1
     * @return string|string[]
11911 1
     *                         <p>The result string is returned. If string is an array then array is returned.</p>
11912 1
     *
11913
     * @template TSubstrReplace
11914 1
     * @phpstan-param TSubstrReplace $str
11915
     * @phpstan-return TSubstrReplace
11916 1
     */
11917
    public static function substr_replace(
11918
        $str,
11919
        $replacement,
11920 1
        $offset,
11921 1
        $length = null,
11922 1
        string $encoding = 'UTF-8'
11923 1
    ) {
11924 1
        if (\is_array($str)) {
11925 1
            $num = \count($str);
11926
11927 1
            // the replacement
11928
            if (\is_array($replacement)) {
11929 1
                $replacement = \array_slice($replacement, 0, $num);
11930
            } else {
11931
                $replacement = \array_pad([$replacement], $num, $replacement);
11932
            }
11933
11934 1
            // the offset
11935
            if (\is_array($offset)) {
11936
                $offset = \array_slice($offset, 0, $num);
11937 10
                foreach ($offset as &$value_tmp) {
11938 1
                    $value_tmp = (int) $value_tmp === $value_tmp ? $value_tmp : 0;
11939 1
                }
11940
                unset($value_tmp);
11941 1
            } else {
11942
                $offset = \array_pad([$offset], $num, $offset);
11943
            }
11944
11945
            // the length
11946 10
            if ($length === null) {
11947 10
                $length = \array_fill(0, $num, 0);
11948
            } elseif (\is_array($length)) {
11949 10
                $length = \array_slice($length, 0, $num);
11950
                foreach ($length as &$value_tmp_V2) {
11951
                    $value_tmp_V2 = (int) $value_tmp_V2 === $value_tmp_V2 ? $value_tmp_V2 : $num;
11952
                }
11953 10
                unset($value_tmp_V2);
11954
            } else {
11955
                $length = \array_pad([$length], $num, $length);
11956
            }
11957 10
11958 5
            // recursive call
11959
            /** @phpstan-ignore-next-line - phpstan currently can't handle recursive calls */
11960
            return \array_map([self::class, 'substr_replace'], $str, $replacement, $offset, $length);
11961 9
        }
11962 9
11963
        if (\is_array($replacement)) {
11964 9
            if ($replacement !== []) {
11965 1
                $replacement = $replacement[0];
11966 9
            } else {
11967 1
                $replacement = '';
11968
            }
11969
        }
11970 9
11971 1
        // init
11972 9
        $str = (string) $str;
11973 4
        $replacement = (string) $replacement;
11974
11975
        if (\is_array($length)) {
11976 9
            throw new \InvalidArgumentException('Parameter "$length" can only be an array, if "$str" is also an array.');
11977 4
        }
11978
11979
        if (\is_array($offset)) {
11980 9
            throw new \InvalidArgumentException('Parameter "$offset" can only be an array, if "$str" is also an array.');
11981 9
        }
11982 9
11983
        if ($str === '') {
11984
            return $replacement;
11985
        }
11986
11987
        if (self::$SUPPORT['mbstring'] === true) {
11988
            $string_length = (int) self::strlen($str, $encoding);
11989
11990
            if ($offset < 0) {
11991
                $offset = (int) \max(0, $string_length + $offset);
11992
            } elseif ($offset > $string_length) {
11993
                $offset = $string_length;
11994
            }
11995
11996
            if ($length !== null && $length < 0) {
11997
                $length = (int) \max(0, $string_length - $offset + $length);
11998
            } elseif ($length === null || $length > $string_length) {
11999
                $length = $string_length;
12000
            }
12001
12002
            if (($offset + $length) > $string_length) {
12003
                $length = $string_length - $offset;
12004
            }
12005
12006
            return ((string) \mb_substr($str, 0, $offset, $encoding)) .
12007
                   $replacement .
12008
                   ((string) \mb_substr($str, $offset + $length, $string_length - $offset - $length, $encoding));
12009
        }
12010
12011
        //
12012
        // fallback for ascii only
12013
        //
12014
12015
        if (ASCII::is_ascii($str)) {
12016
            return ($length === null) ?
12017
                \substr_replace($str, $replacement, $offset) :
12018
                \substr_replace($str, $replacement, $offset, $length);
12019
        }
12020
12021
        //
12022
        // fallback via vanilla php
12023
        //
12024
12025
        \preg_match_all('/./us', $str, $str_matches);
12026
        \preg_match_all('/./us', $replacement, $replacement_matches);
12027
12028
        if ($length === null) {
12029
            $length_tmp = self::strlen($str, $encoding);
12030
            if ($length_tmp === false) {
12031
                // e.g.: non mbstring support + invalid chars
12032
                return '';
12033 2
            }
12034
            $length = $length_tmp;
12035
        }
12036
12037
        \array_splice($str_matches[0], $offset, $length, $replacement_matches[0]);
12038 2
12039 2
        return \implode('', $str_matches[0]);
12040
    }
12041
12042 2
    /**
12043 2
     * Removes a suffix ($needle) from the end of the string ($haystack).
12044
     *
12045
     * EXAMPLE: <code>
12046
     * UTF8::substr_right('BeginMiddleΚόσμε', 'Κόσμε'); // 'BeginMiddle'
12047 2
     * UTF8::substr_right('BeginMiddleΚόσμε', 'κόσμε'); // 'BeginMiddleΚόσμε'
12048
     * </code>
12049 2
     *
12050
     * @param string $haystack <p>The string to search in.</p>
12051 2
     * @param string $needle   <p>The substring to search for.</p>
12052
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
12053
     *
12054 2
     * @psalm-pure
12055
     *
12056
     * @return string
12057
     *                <p>Return the sub-string.</p>
12058
     */
12059
    public static function substr_right(
12060
        string $haystack,
12061
        string $needle,
12062
        string $encoding = 'UTF-8'
12063 2
    ): string {
12064
        if ($haystack === '') {
12065
            return '';
12066
        }
12067
12068
        if ($needle === '') {
12069
            return $haystack;
12070
        }
12071
12072
        if (
12073
            $encoding === 'UTF-8'
12074
            &&
12075
            \substr($haystack, -\strlen($needle)) === $needle
12076
        ) {
12077
            return (string) \mb_substr($haystack, 0, (int) \mb_strlen($haystack) - (int) \mb_strlen($needle));
12078
        }
12079
12080 6
        if (\substr($haystack, -\strlen($needle)) === $needle) {
12081
            return (string) self::substr(
12082 6
                $haystack,
12083 1
                0,
12084
                (int) self::strlen($haystack, $encoding) - (int) self::strlen($needle, $encoding),
12085
                $encoding
12086 6
            );
12087
        }
12088
12089 2
        return $haystack;
12090
    }
12091
12092 6
    /**
12093 4
     * Returns a case swapped version of the string.
12094
     *
12095
     * EXAMPLE: <code>UTF8::swapCase('déJÀ σσς iıII'); // 'DÉjà ΣΣΣ IIii'</code>
12096 4
     *
12097
     * @param string $str        <p>The input string.</p>
12098
     * @param string $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
12099
     * @param bool   $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
12100
     *
12101
     * @psalm-pure
12102
     *
12103
     * @return string
12104
     *                <p>Each character's case swapped.</p>
12105
     */
12106
    public static function swapCase(string $str, string $encoding = 'UTF-8', bool $clean_utf8 = false): string
12107
    {
12108
        if ($str === '') {
12109
            return '';
12110
        }
12111
12112
        if ($clean_utf8) {
12113
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
12114
            // if invalid characters are found in $haystack before $needle
12115
            $str = self::clean($str);
12116
        }
12117
12118
        if ($encoding === 'UTF-8') {
12119
            return (string) (\mb_strtolower($str) ^ \mb_strtoupper($str) ^ $str);
12120
        }
12121
12122
        return (string) (self::strtolower($str, $encoding) ^ self::strtoupper($str, $encoding) ^ $str);
12123
    }
12124
12125
    /**
12126
     * Checks whether symfony-polyfills are used.
12127
     *
12128
     * @psalm-pure
12129
     *
12130
     * @return bool
12131
     *              <p><strong>true</strong> if in use, <strong>false</strong> otherwise</p>
12132
     *
12133
     * @internal <p>Please do not use it anymore, we will make is private in next major version.</p>
12134
     */
12135 6
    public static function symfony_polyfill_used(): bool
12136
    {
12137 6
        // init
12138 3
        $return = false;
12139 3
12140 1
        $return_tmp = \extension_loaded('mbstring');
12141
        if (!$return_tmp && \function_exists('mb_strlen')) {
12142 2
            $return = true;
12143
        }
12144
12145 6
        $return_tmp = \extension_loaded('iconv');
12146
        if (!$return_tmp && \function_exists('iconv')) {
12147
            $return = true;
12148
        }
12149
12150
        return $return;
12151
    }
12152
12153
    /**
12154
     * @param string $str
12155
     * @param int    $tab_length
12156
     *
12157
     * @psalm-pure
12158
     *
12159
     * @return string
12160
     */
12161
    public static function tabs_to_spaces(string $str, int $tab_length = 4): string
12162
    {
12163
        if ($tab_length === 4) {
12164
            $spaces = '    ';
12165 5
        } elseif ($tab_length === 2) {
12166
            $spaces = '  ';
12167
        } else {
12168
            $spaces = \str_repeat(' ', $tab_length);
12169
        }
12170
12171
        return \str_replace("\t", $spaces, $str);
12172 5
    }
12173
12174
    /**
12175
     * Converts the first character of each word in the string to uppercase
12176
     * and all other chars to lowercase.
12177
     *
12178
     * @param string      $str                           <p>The input string.</p>
12179 5
     * @param string      $encoding                      [optional] <p>Set the charset for e.g. "mb_" function</p>
12180
     * @param bool        $clean_utf8                    [optional] <p>Remove non UTF-8 chars from the string.</p>
12181 5
     * @param string|null $lang                          [optional] <p>Set the language for special cases: az, el, lt,
12182
     *                                                   tr</p>
12183 5
     * @param bool        $try_to_keep_the_string_length [optional] <p>true === try to keep the string length: e.g. ẞ
12184 3
     *                                                   -> ß</p>
12185
     *
12186
     * @psalm-pure
12187 2
     *
12188
     * @return string
12189 2
     *                <p>A string with all characters of $str being title-cased.</p>
12190
     */
12191
    public static function titlecase(
12192
        string $str,
12193
        string $encoding = 'UTF-8',
12194
        bool $clean_utf8 = false,
12195
        string $lang = null,
12196
        bool $try_to_keep_the_string_length = false
12197
    ): string {
12198
        if ($clean_utf8) {
12199
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
12200
            // if invalid characters are found in $haystack before $needle
12201
            $str = self::clean($str);
12202
        }
12203
12204
        if (
12205
            $lang === null
12206
            &&
12207
            !$try_to_keep_the_string_length
12208
        ) {
12209
            if ($encoding === 'UTF-8') {
12210
                return \mb_convert_case($str, \MB_CASE_TITLE);
12211
            }
12212
12213
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
12214
12215
            return \mb_convert_case($str, \MB_CASE_TITLE, $encoding);
12216
        }
12217 37
12218
        return self::str_titleize(
12219
            $str,
12220
            null,
12221
            $encoding,
12222 37
            false,
12223
            $lang,
12224
            $try_to_keep_the_string_length,
12225
            false
12226
        );
12227
    }
12228
12229
    /**
12230
     * Convert a string into ASCII.
12231
     *
12232 25
     * EXAMPLE: <code>UTF8::to_ascii('déjà σσς iıii'); // 'deja sss iiii'</code>
12233
     *
12234
     * @param string $str     <p>The input string.</p>
12235 25
     * @param string $unknown [optional] <p>Character use if character unknown. (default is ?)</p>
12236
     * @param bool   $strict  [optional] <p>Use "transliterator_transliterate()" from PHP-Intl | WARNING: bad
12237 25
     *                        performance</p>
12238 2
     *
12239
     * @psalm-pure
12240
     *
12241
     * @return string
12242 23
     */
12243
    public static function to_ascii(
12244
        string $str,
12245
        string $unknown = '?',
12246
        bool $strict = false
12247
    ): string {
12248
        return ASCII::to_transliterate($str, $unknown, $strict);
12249
    }
12250
12251
    /**
12252
     * @param bool|float|int|string $str
12253 23
     *
12254 13
     * @psalm-pure
12255
     *
12256
     * @return bool
12257 10
     */
12258 10
    public static function to_boolean($str): bool
12259 2
    {
12260
        // init
12261
        $str = (string) $str;
12262 8
12263 6
        if ($str === '') {
12264
            return false;
12265
        }
12266 2
12267
        // Info: http://php.net/manual/en/filter.filters.validate.php
12268
        $map = [
12269
            'true'  => true,
12270
            '1'     => true,
12271
            'on'    => true,
12272
            'yes'   => true,
12273
            'false' => false,
12274
            '0'     => false,
12275
            'off'   => false,
12276
            'no'    => false,
12277
        ];
12278
12279
        if (isset($map[$str])) {
12280
            return $map[$str];
12281 1
        }
12282
12283
        $key = \strtolower($str);
12284
        if (isset($map[$key])) {
12285
            return $map[$key];
12286 1
        }
12287 1
12288
        if (\is_numeric($str)) {
12289
            return ((float) $str) > 0;
12290
        }
12291
12292
        return (bool) \trim($str);
12293
    }
12294
12295
    /**
12296
     * Convert given string to safe filename (and keep string case).
12297
     *
12298
     * @param string $str
12299
     * @param bool   $use_transliterate No transliteration, conversion etc. is done by default - unsafe characters are
12300
     *                                  simply replaced with hyphen.
12301
     * @param string $fallback_char
12302
     *
12303
     * @psalm-pure
12304
     *
12305
     * @return string
12306
     */
12307
    public static function to_filename(
12308 8
        string $str,
12309
        bool $use_transliterate = false,
12310 8
        string $fallback_char = '-'
12311 2
    ): string {
12312 2
        return ASCII::to_filename(
12313
            $str,
12314
            $use_transliterate,
12315 2
            $fallback_char
12316
        );
12317
    }
12318 8
12319 8
    /**
12320 2
     * Convert a string into "ISO-8859"-encoding (Latin-1).
12321
     *
12322
     * EXAMPLE: <code>UTF8::to_utf8(UTF8::to_iso8859('  -ABC-中文空白-  ')); // '  -ABC-????-  '</code>
12323 8
     *
12324
     * @param string|string[] $str
12325
     *
12326
     * @psalm-pure
12327
     *
12328
     * @return string|string[]
12329
     *
12330
     * @template TToIso8859
12331
     * @phpstan-param TToIso8859 $str
12332
     * @phpstan-return TToIso8859
12333
     */
12334
    public static function to_iso8859($str)
12335
    {
12336
        if (\is_array($str)) {
12337
            foreach ($str as &$v) {
12338
                $v = self::to_iso8859($v);
12339
            }
12340
12341
            return $str;
12342
        }
12343
12344
        $str = (string) $str;
12345
        if ($str === '') {
12346
            return '';
12347
        }
12348
12349
        return self::utf8_decode($str);
12350 41
    }
12351
12352 41
    /**
12353 4
     * This function leaves UTF-8 characters alone, while converting almost all non-UTF8 to UTF8.
12354 4
     *
12355
     * <ul>
12356
     * <li>It decode UTF-8 codepoints and Unicode escape sequences.</li>
12357
     * <li>It assumes that the encoding of the original string is either WINDOWS-1252 or ISO-8859.</li>
12358 4
     * <li>WARNING: It does not remove invalid UTF-8 characters, so you maybe need to use "UTF8::clean()" for this
12359
     * case.</li>
12360
     * </ul>
12361
     *
12362 41
     * EXAMPLE: <code>UTF8::to_utf8(["\u0063\u0061\u0074"]); // array('cat')</code>
12363
     *
12364 41
     * @param string|string[] $str                        <p>Any string or array of strings.</p>
12365
     * @param bool            $decode_html_entity_to_utf8 <p>Set to true, if you need to decode html-entities.</p>
12366
     *
12367
     * @psalm-pure
12368
     *
12369
     * @return string|string[]
12370
     *                         <p>The UTF-8 encoded string</p>
12371
     *
12372
     * @template TToUtf8
12373
     * @phpstan-param TToUtf8 $str
12374
     * @phpstan-return TToUtf8
12375
     */
12376
    public static function to_utf8($str, bool $decode_html_entity_to_utf8 = false)
12377
    {
12378
        if (\is_array($str)) {
12379
            foreach ($str as &$v) {
12380
                $v = self::to_utf8_string($v, $decode_html_entity_to_utf8);
12381
            }
12382
12383
            /** @phpstan-var TToUtf8 $str */
12384
            return $str;
12385
        }
12386
12387 41
        /** @phpstan-var TToUtf8 $str */
12388
        $str = self::to_utf8_string($str, $decode_html_entity_to_utf8);
12389 41
12390 7
        return $str;
12391
    }
12392
12393 41
    /**
12394 41
     * This function leaves UTF-8 characters alone, while converting almost all non-UTF8 to UTF8.
12395
     *
12396 41
     * <ul>
12397 41
     * <li>It decode UTF-8 codepoints and Unicode escape sequences.</li>
12398
     * <li>It assumes that the encoding of the original string is either WINDOWS-1252 or ISO-8859.</li>
12399 41
     * <li>WARNING: It does not remove invalid UTF-8 characters, so you maybe need to use "UTF8::clean()" for this
12400
     * case.</li>
12401 37
     * </ul>
12402
     *
12403 34
     * EXAMPLE: <code>UTF8::to_utf8_string("\u0063\u0061\u0074"); // 'cat'</code>
12404
     *
12405 34
     * @param string $str                        <p>Any string.</p>
12406 20
     * @param bool   $decode_html_entity_to_utf8 <p>Set to true, if you need to decode html-entities.</p>
12407 20
     *
12408
     * @psalm-pure
12409 34
     *
12410
     * @return string
12411 34
     *                <p>The UTF-8 encoded string</p>
12412
     */
12413 34
    public static function to_utf8_string(string $str, bool $decode_html_entity_to_utf8 = false): string
12414 34
    {
12415
        if ($str === '') {
12416 34
            return $str;
12417 17
        }
12418 17
12419
        $max = \strlen($str);
12420 34
        $buf = '';
12421
12422 27
        for ($i = 0; $i < $max; ++$i) {
12423
            $c1 = $str[$i];
12424 27
12425 27
            if ($c1 >= "\xC0") { // should be converted to UTF8, if it's not UTF8 already
12426 27
12427
                if ($c1 <= "\xDF") { // looks like 2 bytes UTF8
12428 27
12429 10
                    $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
12430 10
12431
                    if ($c2 >= "\x80" && $c2 <= "\xBF") { // yeah, almost sure it's UTF8 already
12432 27
                        $buf .= $c1 . $c2;
12433
                        ++$i;
12434
                    } else { // not valid UTF8 - convert it
12435
                        $buf .= self::to_utf8_convert_helper($c1);
12436 37
                    }
12437
                } elseif ($c1 >= "\xE0" && $c1 <= "\xEF") { // looks like 3 bytes UTF8
12438 39
12439
                    $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
12440 3
                    $c3 = $i + 2 >= $max ? "\x00" : $str[$i + 2];
12441
12442
                    if ($c2 >= "\x80" && $c2 <= "\xBF" && $c3 >= "\x80" && $c3 <= "\xBF") { // yeah, almost sure it's UTF8 already
12443 39
                        $buf .= $c1 . $c2 . $c3;
12444
                        $i += 2;
12445
                    } else { // not valid UTF8 - convert it
12446
                        $buf .= self::to_utf8_convert_helper($c1);
12447
                    }
12448 41
                } elseif ($c1 >= "\xF0" && $c1 <= "\xF7") { // looks like 4 bytes UTF8
12449 41
12450
                    $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
12451
                    $c3 = $i + 2 >= $max ? "\x00" : $str[$i + 2];
12452
                    $c4 = $i + 3 >= $max ? "\x00" : $str[$i + 3];
12453
12454
                    if ($c2 >= "\x80" && $c2 <= "\xBF" && $c3 >= "\x80" && $c3 <= "\xBF" && $c4 >= "\x80" && $c4 <= "\xBF") { // yeah, almost sure it's UTF8 already
12455
                        $buf .= $c1 . $c2 . $c3 . $c4;
12456
                        $i += 3;
12457 41
                    } else { // not valid UTF8 - convert it
12458 13
                        $buf .= self::to_utf8_convert_helper($c1);
12459 13
                    }
12460
                } else { // doesn't look like UTF8, but should be converted
12461
12462 1
                    $buf .= self::to_utf8_convert_helper($c1);
12463 1
                }
12464 1
            } elseif (($c1 & "\xC0") === "\x80") { // needs conversion
12465 1
12466 1
                $buf .= self::to_utf8_convert_helper($c1);
12467
            } else { // it doesn't need conversion
12468
12469
                $buf .= $c1;
12470
            }
12471
        }
12472
12473 13
        // decode unicode escape sequences + unicode surrogate pairs
12474 8
        $buf = \preg_replace_callback(
12475
            '/\\\\u([dD][89abAB][0-9a-fA-F]{2})\\\\u([dD][cdefCDEF][\da-fA-F]{2})|\\\\u([0-9a-fA-F]{4})/',
12476
            /**
12477 10
             * @param array $matches
12478
             *
12479
             * @psalm-pure
12480
             *
12481
             * @return string
12482 10
             */
12483 41
            static function (array $matches): string {
12484 41
                if (isset($matches[3])) {
12485
                    $cp = (int) \hexdec($matches[3]);
12486
                } else {
12487 41
                    // http://unicode.org/faq/utf_bom.html#utf16-4
12488
                    $cp = ((int) \hexdec($matches[1]) << 10)
12489
                          + (int) \hexdec($matches[2])
12490
                          + 0x10000
12491
                          - (0xD800 << 10)
12492 41
                          - 0xDC00;
12493 3
                }
12494
12495
                // https://github.com/php/php-src/blob/php-7.3.2/ext/standard/html.c#L471
12496 41
                //
12497
                // php_utf32_utf8(unsigned char *buf, unsigned k)
12498
12499
                if ($cp < 0x80) {
12500
                    return (string) self::chr($cp);
12501
                }
12502
12503
                if ($cp < 0xA0) {
12504
                    /** @noinspection UnnecessaryCastingInspection */
12505
                    return (string) self::chr(0xC0 | $cp >> 6) . (string) self::chr(0x80 | $cp & 0x3F);
12506
                }
12507
12508
                return self::decimal_to_chr($cp);
12509 1
            },
12510
            $buf
12511 1
        );
12512 1
12513
        if ($buf === null) {
12514
            return '';
12515 1
        }
12516
12517
        // decode UTF-8 codepoints
12518
        if ($decode_html_entity_to_utf8) {
12519
            $buf = self::html_entity_decode($buf);
12520
        }
12521
12522
        return $buf;
12523
    }
12524
12525
    /**
12526
     * Returns the given string as an integer, or null if the string isn't numeric.
12527
     *
12528
     * @param string $str
12529 1
     *
12530
     * @psalm-pure
12531 1
     *
12532
     * @return int|null
12533
     *                  <p>null if the string isn't numeric</p>
12534
     */
12535
    public static function to_int(string $str)
12536 1
    {
12537
        if (\is_numeric($str)) {
12538
            return (int) $str;
12539 1
        }
12540
12541 1
        return null;
12542
    }
12543 1
12544
    /**
12545 1
     * Returns the given input as string, or null if the input isn't int|float|string
12546
     * and do not implement the "__toString()" method.
12547 1
     *
12548
     * @param float|int|object|string|null $input
12549
     *
12550
     * @psalm-pure
12551 1
     *
12552 1
     * @return string|null
12553
     *                     <p>null if the input isn't int|float|string and has no "__toString()" method</p>
12554
     */
12555 1
    public static function to_string($input)
12556
    {
12557
        if ($input === null) {
12558
            return null;
12559
        }
12560
12561
        /** @var string $input_type - hack for psalm */
12562
        $input_type = \gettype($input);
12563
12564
        if (
12565
            $input_type === 'string'
12566
            ||
12567
            $input_type === 'integer'
12568
            ||
12569
            $input_type === 'float'
12570
            ||
12571
            $input_type === 'double'
12572
        ) {
12573
            return (string) $input;
12574
        }
12575
12576 57
        /** @phpstan-ignore-next-line - "gettype": FP? */
12577
        if ($input_type === 'object' && \method_exists($input, '__toString')) {
12578 57
            return (string) $input;
12579 9
        }
12580
12581
        return null;
12582 50
    }
12583 50
12584
    /**
12585 28
     * Strip whitespace or other characters from the beginning and end of a UTF-8 string.
12586 28
     *
12587
     * INFO: This is slower then "trim()"
12588 22
     *
12589
     * We can only use the original-function, if we use <= 7-Bit in the string / chars
12590
     * but the check for ASCII (7-Bit) cost more time, then we can safe here.
12591 50
     *
12592
     * EXAMPLE: <code>UTF8::trim('   -ABC-中文空白-  '); // '-ABC-中文空白-'</code>
12593
     *
12594 8
     * @param string      $str   <p>The string to be trimmed</p>
12595
     * @param string|null $chars [optional] <p>Optional characters to be stripped</p>
12596
     *
12597
     * @psalm-pure
12598 8
     *
12599
     * @return string
12600
     *                <p>The trimmed string.</p>
12601 8
     */
12602
    public static function trim(string $str = '', string $chars = null): string
12603
    {
12604
        if ($str === '') {
12605
            return '';
12606
        }
12607
12608
        if (self::$SUPPORT['mbstring'] === true) {
12609
            if ($chars !== null) {
12610
                /** @noinspection PregQuoteUsageInspection */
12611
                $chars = \preg_quote($chars);
12612
                $pattern = "^[${chars}]+|[${chars}]+\$";
12613
            } else {
12614
                $pattern = '^[\\s]+|[\\s]+$';
12615
            }
12616
12617
            return (string) \mb_ereg_replace($pattern, '', $str);
12618
        }
12619
12620
        if ($chars !== null) {
12621
            $chars = \preg_quote($chars, '/');
12622 69
            $pattern = "^[${chars}]+|[${chars}]+\$";
12623
        } else {
12624
            $pattern = '^[\\s]+|[\\s]+$';
12625
        }
12626
12627
        return self::regex_replace($str, $pattern, '');
12628
    }
12629 69
12630 3
    /**
12631
     * Makes string's first char uppercase.
12632
     *
12633 68
     * EXAMPLE: <code>UTF8::ucfirst('ñtërnâtiônàlizætiøn foo'); // 'Ñtërnâtiônàlizætiøn foo'</code>
12634
     *
12635
     * @param string      $str                           <p>The input string.</p>
12636 1
     * @param string      $encoding                      [optional] <p>Set the charset for e.g. "mb_" function</p>
12637
     * @param bool        $clean_utf8                    [optional] <p>Remove non UTF-8 chars from the string.</p>
12638
     * @param string|null $lang                          [optional] <p>Set the language for special cases: az, el, lt,
12639 68
     *                                                   tr</p>
12640
     * @param bool        $try_to_keep_the_string_length [optional] <p>true === try to keep the string length: e.g. ẞ
12641 68
     *                                                   -> ß</p>
12642 22
     *
12643
     * @psalm-pure
12644 22
     *
12645 22
     * @return string
12646 22
     *                <p>The resulting string with with char uppercase.</p>
12647
     */
12648
    public static function ucfirst(
12649 22
        string $str,
12650
        string $encoding = 'UTF-8',
12651
        bool $clean_utf8 = false,
12652
        string $lang = null,
12653
        bool $try_to_keep_the_string_length = false
12654
    ): string {
12655
        if ($str === '') {
12656
            return '';
12657
        }
12658 47
12659
        if ($clean_utf8) {
12660 47
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
12661
            // if invalid characters are found in $haystack before $needle
12662 47
            $str = self::clean($str);
12663 47
        }
12664 47
12665 47
        $use_mb_functions = $lang === null && !$try_to_keep_the_string_length;
12666
12667
        if ($encoding === 'UTF-8') {
12668
            $str_part_two = (string) \mb_substr($str, 1);
12669
12670
            if ($use_mb_functions) {
12671
                $str_part_one = \mb_strtoupper(
12672
                    (string) \mb_substr($str, 0, 1)
12673
                );
12674
            } else {
12675
                $str_part_one = self::strtoupper(
12676
                    (string) \mb_substr($str, 0, 1),
12677
                    $encoding,
12678 68
                    false,
12679
                    $lang,
12680
                    $try_to_keep_the_string_length
12681
                );
12682
            }
12683
        } else {
12684
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
12685
12686
            $str_part_two = (string) self::substr($str, 1, null, $encoding);
12687
12688
            if ($use_mb_functions) {
12689
                $str_part_one = \mb_strtoupper(
12690
                    (string) \mb_substr($str, 0, 1, $encoding),
12691
                    $encoding
12692
                );
12693
            } else {
12694
                $str_part_one = self::strtoupper(
12695
                    (string) self::substr($str, 0, 1, $encoding),
12696
                    $encoding,
12697 9
                    false,
12698
                    $lang,
12699
                    $try_to_keep_the_string_length
12700
                );
12701
            }
12702
        }
12703
12704 9
        return $str_part_one . $str_part_two;
12705 2
    }
12706
12707
    /**
12708
     * Uppercase for all words in the string.
12709
     *
12710
     * EXAMPLE: <code>UTF8::ucwords('iñt ërn âTi ônà liz æti øn'); // 'Iñt Ërn ÂTi Ônà Liz Æti Øn'</code>
12711 8
     *
12712
     * @param string   $str        <p>The input string.</p>
12713
     * @param string[] $exceptions [optional] <p>Exclusion for some words.</p>
12714 1
     * @param string   $char_list  [optional] <p>Additional chars that contains to words and do not start a new
12715
     *                             word.</p>
12716
     * @param string   $encoding   [optional] <p>Set the charset.</p>
12717 8
     * @param bool     $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
12718
     *
12719
     * @psalm-pure
12720 8
     *
12721
     * @return string
12722 8
     */
12723
    public static function ucwords(
12724
        string $str,
12725
        array $exceptions = [],
12726
        string $char_list = '',
12727 8
        string $encoding = 'UTF-8',
12728 8
        bool $clean_utf8 = false
12729
    ): string {
12730 8
        if (!$str) {
12731 8
            return '';
12732 8
        }
12733 8
12734
        // INFO: mb_convert_case($str, MB_CASE_TITLE);
12735
        // -> MB_CASE_TITLE didn't only uppercase the first letter, it also lowercase all other letters
12736
12737 8
        if ($clean_utf8) {
12738
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
12739 8
            // if invalid characters are found in $haystack before $needle
12740
            $str = self::clean($str);
12741 8
        }
12742
12743 1
        $use_php_default_functions = !(bool) ($char_list . \implode('', $exceptions));
12744
12745
        if (
12746
            $use_php_default_functions
12747 8
            &&
12748
            ASCII::is_ascii($str)
12749
        ) {
12750
            return \ucwords($str);
12751
        }
12752
12753
        $words = self::str_to_words($str, $char_list);
12754
        $use_exceptions = $exceptions !== [];
12755
12756
        $words_str = '';
12757
        foreach ($words as &$word) {
12758
            if (!$word) {
12759
                continue;
12760
            }
12761
12762
            if (
12763
                !$use_exceptions
12764
                ||
12765
                !\in_array($word, $exceptions, true)
12766
            ) {
12767
                $words_str .= self::ucfirst($word, $encoding);
12768
            } else {
12769
                $words_str .= $word;
12770
            }
12771
        }
12772
12773 4
        return $words_str;
12774
    }
12775 4
12776 3
    /**
12777
     * Multi decode HTML entity + fix urlencoded-win1252-chars.
12778
     *
12779 4
     * EXAMPLE: <code>UTF8::urldecode('tes%20öäü%20\u00edtest+test'); // 'tes öäü ítest test'</code>
12780
     *
12781 4
     * e.g:
12782
     * 'test+test'                     => 'test test'
12783 3
     * 'D&#252;sseldorf'               => 'Düsseldorf'
12784
     * 'D%FCsseldorf'                  => 'Düsseldorf'
12785
     * 'D&#xFC;sseldorf'               => 'Düsseldorf'
12786
     * 'D%26%23xFC%3Bsseldorf'         => 'Düsseldorf'
12787
     * 'Düsseldorf'                   => 'Düsseldorf'
12788 3
     * 'D%C3%BCsseldorf'               => 'Düsseldorf'
12789 3
     * 'D%C3%83%C2%BCsseldorf'         => 'Düsseldorf'
12790 3
     * 'D%25C3%2583%25C2%25BCsseldorf' => 'Düsseldorf'
12791 3
     *
12792
     * @param string $str          <p>The input string.</p>
12793
     * @param bool   $multi_decode <p>Decode as often as possible.</p>
12794 3
     *
12795
     * @psalm-pure
12796
     *
12797
     * @return string
12798
     */
12799 1
    public static function urldecode(string $str, bool $multi_decode = true): string
12800 1
    {
12801 1
        if ($str === '') {
12802 1
            return '';
12803
        }
12804
12805
        $str = self::urldecode_unicode_helper($str);
12806
12807 4
        if ($multi_decode) {
12808
            do {
12809
                $str_compare = $str;
12810
12811
                /**
12812
                 * @psalm-suppress PossiblyInvalidArgument
12813
                 */
12814
                $str = \urldecode(
12815
                    self::html_entity_decode(
12816
                        self::to_utf8($str),
12817
                        \ENT_QUOTES | \ENT_HTML5
12818
                    )
12819
                );
12820
            } while ($str_compare !== $str);
12821
        } else {
12822 14
            /**
12823
             * @psalm-suppress PossiblyInvalidArgument
12824 14
             */
12825 6
            $str = \urldecode(
12826
                self::html_entity_decode(
12827
                    self::to_utf8($str),
12828
                    \ENT_QUOTES | \ENT_HTML5
12829 14
                )
12830 14
            );
12831
        }
12832 14
12833
        return self::fix_simple_utf8($str);
12834
    }
12835
12836 14
    /**
12837
     * Decodes a UTF-8 string to ISO-8859-1.
12838
     *
12839
     * EXAMPLE: <code>UTF8::encode('UTF-8', UTF8::utf8_decode('-ABC-中文空白-')); // '-ABC-????-'</code>
12840 14
     *
12841 14
     * @param string $str             <p>The input string.</p>
12842 14
     * @param bool   $keep_utf8_chars
12843 14
     *
12844 13
     * @psalm-pure
12845 13
     *
12846 13
     * @return string
12847
     */
12848 13
    public static function utf8_decode(string $str, bool $keep_utf8_chars = false): string
12849
    {
12850 13
        if ($str === '') {
12851
            return '';
12852
        }
12853
12854
        // save for later comparision
12855 13
        $str_backup = $str;
12856 11
        $len = \strlen($str);
12857 11
12858
        if (self::$ORD === null) {
12859 11
            self::$ORD = self::getData('ord');
12860
        }
12861
12862 12
        if (self::$CHR === null) {
12863
            self::$CHR = self::getData('chr');
12864
        }
12865
12866
        $no_char_found = '?';
12867 14
        for ($i = 0, $j = 0; $i < $len; ++$i, ++$j) {
12868 14
            switch ($str[$i] & "\xF0") {
12869
                case "\xC0":
12870
                case "\xD0":
12871
                    $c = (self::$ORD[$str[$i] & "\x1F"] << 6) | self::$ORD[$str[++$i] & "\x3F"];
12872
                    $str[$j] = $c < 256 ? self::$CHR[$c] : $no_char_found;
12873 14
12874
                    break;
12875 14
12876
                case "\xF0":
12877 2
                    ++$i;
12878
12879
                // no break
12880 14
12881
                case "\xE0":
12882
                    $str[$j] = $no_char_found;
12883
                    $i += 2;
12884
12885
                    break;
12886
12887
                default:
12888
                    $str[$j] = $str[$i];
12889
            }
12890
        }
12891
12892
        /** @var false|string $return - needed for PhpStan (stubs error) */
12893
        $return = \substr($str, 0, $j);
12894 16
        if ($return === false) {
12895
            $return = '';
12896 16
        }
12897 14
12898
        if (
12899
            $keep_utf8_chars
12900
            &&
12901 16
            (int) self::strlen($return) >= (int) self::strlen($str_backup)
12902
        ) {
12903 16
            return $str_backup;
12904
        }
12905
12906
        return $return;
12907 16
    }
12908
12909
    /**
12910
     * Encodes an ISO-8859-1 string to UTF-8.
12911
     *
12912
     * EXAMPLE: <code>UTF8::utf8_decode(UTF8::utf8_encode('-ABC-中文空白-')); // '-ABC-中文空白-'</code>
12913
     *
12914
     * @param string $str <p>The input string.</p>
12915
     *
12916
     * @psalm-pure
12917
     *
12918
     * @return string
12919
     */
12920
    public static function utf8_encode(string $str): string
12921 2
    {
12922
        if ($str === '') {
12923 2
            return '';
12924
        }
12925
12926
        /** @var false|string $str - the polyfill maybe return false */
12927
        $str = \utf8_encode($str);
0 ignored issues
show
Bug introduced by
It seems like $str can also be of type false; however, parameter $string of utf8_encode() does only seem to accept string, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

12927
        $str = \utf8_encode(/** @scrutinizer ignore-type */ $str);
Loading history...
12928
12929
        if ($str === false) {
12930
            return '';
12931
        }
12932
12933
        return $str;
12934
    }
12935
12936
    /**
12937
     * Returns an array with all utf8 whitespace characters.
12938
     *
12939 2
     * @see http://www.bogofilter.org/pipermail/bogofilter/2003-March/001889.html
12940
     *
12941
     * @psalm-pure
12942
     *
12943
     * @return string[]
12944 2
     *                  An array with all known whitespace characters as values and the type of whitespace as keys
12945 2
     *                  as defined in above URL
12946
     */
12947
    public static function whitespace_table(): array
12948 2
    {
12949
        return self::$WHITESPACE_TABLE;
12950
    }
12951 2
12952
    /**
12953 2
     * Limit the number of words in a string.
12954
     *
12955 2
     * EXAMPLE: <code>UTF8::words_limit('fòô bàř fòô', 2, ''); // 'fòô bàř'</code>
12956
     *
12957
     * @param string $str        <p>The input string.</p>
12958 2
     * @param int    $limit      <p>The limit of words as integer.</p>
12959
     * @param string $str_add_on <p>Replacement for the striped string.</p>
12960
     *
12961
     * @psalm-pure
12962
     *
12963
     * @return string
12964
     */
12965
    public static function words_limit(
12966
        string $str,
12967
        int $limit = 100,
12968
        string $str_add_on = '…'
12969
    ): string {
12970
        if ($str === '' || $limit < 1) {
12971
            return '';
12972
        }
12973
12974
        \preg_match('/^\\s*+(?:[^\\s]++\\s*+){1,' . $limit . '}/u', $str, $matches);
12975
12976
        if (
12977
            !isset($matches[0])
12978
            ||
12979
            \mb_strlen($str) === (int) \mb_strlen($matches[0])
12980
        ) {
12981
            return $str;
12982 12
        }
12983
12984
        return \rtrim($matches[0]) . $str_add_on;
12985
    }
12986
12987
    /**
12988 12
     * Wraps a string to a given number of characters
12989 4
     *
12990
     * EXAMPLE: <code>UTF8::wordwrap('Iñtërnâtiônàlizætiøn', 2, '<br>', true)); // 'Iñ<br>të<br>rn<br>ât<br>iô<br>nà<br>li<br>zæ<br>ti<br>øn'</code>
12991
     *
12992 10
     * @see http://php.net/manual/en/function.wordwrap.php
12993
     *
12994
     * @param string $str   <p>The input string.</p>
12995 10
     * @param int    $width [optional] <p>The column width.</p>
12996 10
     * @param string $break [optional] <p>The line is broken using the optional break parameter.</p>
12997 10
     * @param bool   $cut   [optional] <p>
12998 10
     *                      If the cut is set to true, the string is
12999 3
     *                      always wrapped at or before the specified width. So if you have
13000 3
     *                      a word that is larger than the given width, it is broken apart.
13001
     *                      </p>
13002
     *
13003 10
     * @psalm-pure
13004 10
     *
13005 10
     * @return string
13006 3
     *                <p>The given string wrapped at the specified column.</p>
13007
     */
13008 10
    public static function wordwrap(
13009
        string $str,
13010
        int $width = 75,
13011
        string $break = "\n",
13012
        bool $cut = false
13013 10
    ): string {
13014 10
        if ($str === '' || $break === '') {
13015 10
            return '';
13016 10
        }
13017 10
13018
        $str_split = \explode($break, $str);
13019 10
13020
        /** @var string[] $charsArray */
13021 10
        $charsArray = [];
13022 8
        $word_split = '';
13023 8
        foreach ($str_split as $i => $i_value) {
13024 8
            if ($i) {
13025 8
                $charsArray[] = $break;
13026
                $word_split .= '#';
13027 8
            }
13028
13029
            foreach (self::str_split($i_value) as $c) {
13030 8
                $charsArray[] = $c;
13031
                if ($c === ' ') {
13032
                    $word_split .= ' ';
13033
                } else {
13034
                    $word_split .= '?';
13035
                }
13036 8
            }
13037
        }
13038 8
13039
        $str_return = '';
13040 5
        $j = 0;
13041
        $b = -1;
13042
        $i = -1;
13043 8
        $word_split = \wordwrap($word_split, $width, '#', $cut);
13044
13045
        $max = \mb_strlen($word_split);
13046 8
        /** @noinspection PhpAssignmentInConditionInspection - is ok here */
13047
        while (($b = \mb_strpos($word_split, '#', $b + 1)) !== false) {
13048
            for (++$i; $i < $b; ++$i) {
13049
                if (isset($charsArray[$j])) {
13050
                    $str_return .= $charsArray[$j];
13051 10
                    unset($charsArray[$j]);
13052
                }
13053
                ++$j;
13054
13055
                // prevent endless loop, e.g. if there is a error in the "mb_*" polyfill
13056
                if ($i > $max) {
13057
                    break 2;
13058
                }
13059
            }
13060
13061
            if (
13062
                $break === $charsArray[$j]
13063
                ||
13064
                $charsArray[$j] === ' '
13065
            ) {
13066
                unset($charsArray[$j++]);
13067
            }
13068
13069
            $str_return .= $break;
13070
13071
            // prevent endless loop, e.g. if there is a error in the "mb_*" polyfill
13072
            if ($b > $max) {
13073
                break;
13074
            }
13075
        }
13076
13077
        return $str_return . \implode('', $charsArray);
13078 1
    }
13079
13080
    /**
13081
     * Line-Wrap the string after $limit, but split the string by "$delimiter" before ...
13082
     *    ... so that we wrap the per line.
13083
     *
13084
     * @param string      $str             <p>The input string.</p>
13085
     * @param int         $width           [optional] <p>The column width.</p>
13086 1
     * @param string      $break           [optional] <p>The line is broken using the optional break parameter.</p>
13087 1
     * @param bool        $cut             [optional] <p>
13088
     *                                     If the cut is set to true, the string is
13089 1
     *                                     always wrapped at or before the specified width. So if you have
13090
     *                                     a word that is larger than the given width, it is broken apart.
13091
     *                                     </p>
13092 1
     * @param bool        $add_final_break [optional] <p>
13093 1
     *                                     If this flag is true, then the method will add a $break at the end
13094 1
     *                                     of the result string.
13095 1
     *                                     </p>
13096
     * @param string|null $delimiter       [optional] <p>
13097
     *                                     You can change the default behavior, where we split the string by newline.
13098
     *                                     </p>
13099 1
     *
13100 1
     * @psalm-pure
13101
     *
13102 1
     * @return string
13103
     */
13104
    public static function wordwrap_per_line(
13105 1
        string $str,
13106
        int $width = 75,
13107
        string $break = "\n",
13108
        bool $cut = false,
13109
        bool $add_final_break = true,
13110
        string $delimiter = null
13111
    ): string {
13112
        if ($delimiter === null) {
13113
            $strings = \preg_split('/\\r\\n|\\r|\\n/', $str);
13114
        } else {
13115
            $strings = \explode($delimiter, $str);
13116 2
        }
13117
13118 2
        $string_helper_array = [];
13119
        if ($strings !== false) {
13120
            foreach ($strings as $value) {
13121
                $string_helper_array[] = self::wordwrap($value, $width, $break, $cut);
13122
            }
13123
        }
13124
13125
        if ($add_final_break) {
13126
            $final_break = $break;
13127
        } else {
13128
            $final_break = '';
13129
        }
13130
13131
        return \implode($delimiter ?? "\n", $string_helper_array) . $final_break;
13132
    }
13133
13134
    /**
13135
     * Returns an array of Unicode White Space characters.
13136
     *
13137
     * @psalm-pure
13138
     *
13139
     * @return string[]
13140
     *                  <p>An array with numeric code point as key and White Space Character as value.</p>
13141 110
     */
13142
    public static function ws(): array
13143 110
    {
13144 15
        return self::$WHITESPACE;
13145
    }
13146
13147 103
    /**
13148 2
     * Convert an UTF-8 encoded string to a single-byte string suitable for
13149
     * functions such as levenshtein.
13150 2
     *
13151 2
     * The function simply uses (and updates) a tailored dynamic encoding
13152
     * (in/out map parameter) where non-ascii characters are remapped to
13153
     * the range [128-255] in order of appearance.
13154 2
     *
13155
     * Thus it supports up to 128 different multibyte code points max over
13156
     * the whole set of strings sharing this encoding.
13157
     *
13158
     * @param  string $str  UTF-8 string to be converted to extended ASCII.
13159 103
     * @param  array  $map  Reference of the map.
13160
     *
13161
     * @return void
13162
     */
13163
    private static function convertMbAscii(string &$str, array &$map)
13164 103
    {
13165
        // find all utf-8 characters
13166
        $matches = [];
13167 2
        if (! preg_match_all('/[\xC0-\xF7][\x80-\xBF]+/', $str, $matches)) {
13168
            return; // plain ascii string
13169 2
        }
13170 2
13171
        // update the encoding map with the characters not already met
13172 2
        $mapCount = \count($map);
13173
        foreach ($matches[0] as $mbc) {
13174
            if (! isset($map[$mbc])) {
13175
                $map[$mbc] = \chr(128 + $mapCount);
13176 2
                $mapCount++;
13177 2
            }
13178 2
        }
13179
13180 2
        // finally remap non-ascii characters
13181
        $str = \strtr($str, $map);
13182
    }
13183 2
13184
    /**
13185 2
     * Checks whether the passed string contains only byte sequences that are valid UTF-8 characters.
13186 2
     *
13187
     * EXAMPLE: <code>
13188 2
     * UTF8::is_utf8_string('Iñtërnâtiônàlizætiøn']); // true
13189 2
     * //
13190 2
     * UTF8::is_utf8_string("Iñtërnâtiônàlizætiøn\xA0\xA1"); // false
13191 2
     * </code>
13192 2
     *
13193
     * @see          http://hsivonen.iki.fi/php-utf8/
13194 2
     *
13195 2
     * @param string $str    <p>The string to be checked.</p>
13196 2
     * @param bool   $strict <p>Check also if the string is not UTF-16 or UTF-32.</p>
13197 2
     *
13198
     * @psalm-pure
13199
     *
13200
     * @return bool
13201
     *
13202
     * @noinspection ReturnTypeCanBeDeclaredInspection
13203
     */
13204
    private static function is_utf8_string(string $str, bool $strict = false)
13205
    {
13206
        if ($str === '') {
13207
            return true;
13208
        }
13209
13210
        if ($strict) {
13211
            $is_binary = self::is_binary($str, true);
13212
13213
            if ($is_binary && self::is_utf16($str, false) !== false) {
13214
                return false;
13215
            }
13216
13217
            if ($is_binary && self::is_utf32($str, false) !== false) {
13218
                return false;
13219
            }
13220
        }
13221
13222
        if (self::$SUPPORT['pcre_utf8']) {
13223
            // If even just the first character can be matched, when the /u
13224
            // modifier is used, then it's valid UTF-8. If the UTF-8 is somehow
13225
            // invalid, nothing at all will match, even if the string contains
13226 2
            // some valid sequences
13227
            return \preg_match('/^./us', $str) === 1;
13228 2
        }
13229
13230
        $mState = 0; // cached expected number of octets after the current octet
13231
        // until the beginning of the next UTF8 character sequence
13232
        $mUcs4 = 0; // cached Unicode character
13233
        $mBytes = 1; // cached expected number of octets in the current sequence
13234 2
13235 2
        if (self::$ORD === null) {
13236 2
            self::$ORD = self::getData('ord');
13237 2
        }
13238
13239
        $len = \strlen($str);
13240 2
        for ($i = 0; $i < $len; ++$i) {
13241
            $in = self::$ORD[$str[$i]];
13242
13243
            if ($mState === 0) {
13244
                // When mState is zero we expect either a US-ASCII character or a
13245 2
                // multi-octet sequence.
13246
                if ((0x80 & $in) === 0) {
13247 2
                    // US-ASCII, pass straight through.
13248
                    $mBytes = 1;
13249 2
                } elseif ((0xE0 & $in) === 0xC0) {
13250
                    // First octet of 2 octet sequence.
13251 2
                    $mUcs4 = $in;
13252
                    $mUcs4 = ($mUcs4 & 0x1F) << 6;
13253
                    $mState = 1;
13254 2
                    $mBytes = 2;
13255
                } elseif ((0xF0 & $in) === 0xE0) {
13256
                    // First octet of 3 octet sequence.
13257 2
                    $mUcs4 = $in;
13258
                    $mUcs4 = ($mUcs4 & 0x0F) << 12;
13259
                    $mState = 2;
13260
                    $mBytes = 3;
13261
                } elseif ((0xF8 & $in) === 0xF0) {
13262 2
                    // First octet of 4 octet sequence.
13263 2
                    $mUcs4 = $in;
13264 2
                    $mUcs4 = ($mUcs4 & 0x07) << 18;
13265
                    $mState = 3;
13266
                    $mBytes = 4;
13267
                } elseif ((0xFC & $in) === 0xF8) {
13268
                    /* First octet of 5 octet sequence.
13269
                     *
13270
                     * This is illegal because the encoded codepoint must be either
13271
                     * (a) not the shortest form or
13272
                     * (b) outside the Unicode range of 0-0x10FFFF.
13273 2
                     * Rather than trying to resynchronize, we will carry on until the end
13274
                     * of the sequence and let the later error handling code catch it.
13275
                     */
13276
                    $mUcs4 = $in;
13277
                    $mUcs4 = ($mUcs4 & 0x03) << 24;
13278
                    $mState = 4;
13279
                    $mBytes = 5;
13280
                } elseif ((0xFE & $in) === 0xFC) {
13281
                    // First octet of 6 octet sequence, see comments for 5 octet sequence.
13282
                    $mUcs4 = $in;
13283
                    $mUcs4 = ($mUcs4 & 1) << 30;
13284
                    $mState = 5;
13285
                    $mBytes = 6;
13286
                } else {
13287 33
                    // Current octet is neither in the US-ASCII range nor a legal first
13288
                    // octet of a multi-octet sequence.
13289
                    return false;
13290
                }
13291
            } elseif ((0xC0 & $in) === 0x80) {
13292 33
13293 33
                // When mState is non-zero, we expect a continuation of the multi-octet
13294
                // sequence
13295 33
13296 2
                // Legal continuation.
13297 2
                $shift = ($mState - 1) * 6;
13298 2
                $tmp = $in;
13299 2
                $tmp = ($tmp & 0x0000003F) << $shift;
13300
                $mUcs4 |= $tmp;
13301
                // Prefix: End of the multi-octet sequence. mUcs4 now contains the final
13302 31
                // Unicode code point to be output.
13303 31
                if (--$mState === 0) {
13304 31
                    // Check for illegal sequences and code points.
13305 31
                    //
13306
                    // From Unicode 3.1, non-shortest form is illegal
13307
                    if (
13308
                        ($mBytes === 2 && $mUcs4 < 0x0080)
13309 33
                        ||
13310
                        ($mBytes === 3 && $mUcs4 < 0x0800)
13311
                        ||
13312
                        ($mBytes === 4 && $mUcs4 < 0x10000)
13313
                        ||
13314
                        ($mBytes > 4)
13315 31
                        ||
13316 31
                        // From Unicode 3.2, surrogate characters are illegal.
13317 1
                        (($mUcs4 & 0xFFFFF800) === 0xD800)
13318
                        ||
13319
                        // Code points outside the Unicode range are illegal.
13320 31
                        ($mUcs4 > 0x10FFFF)
13321 2
                    ) {
13322
                        return false;
13323 29
                    }
13324
                    // initialize UTF8 cache
13325
                    $mState = 0;
13326
                    $mUcs4 = 0;
13327 33
                    $mBytes = 1;
13328
                }
13329
            } else {
13330
                // ((0xC0 & (*in) != 0x80) && (mState != 0))
13331
                // Incomplete multi-octet sequence.
13332
                return false;
13333
            }
13334
        }
13335
13336
        return $mState === 0;
13337
    }
13338
13339
    /**
13340
     * @param string $str
13341 7
     * @param bool   $use_lowercase      <p>Use uppercase by default, otherwise use lowercase.</p>
13342
     * @param bool   $use_full_case_fold <p>Convert not only common cases.</p>
13343
     *
13344
     * @psalm-pure
13345
     *
13346 7
     * @return string
13347
     *
13348
     * @noinspection ReturnTypeCanBeDeclaredInspection
13349
     */
13350
    private static function fixStrCaseHelper(
13351
        string $str,
13352
        bool $use_lowercase = false,
13353
        bool $use_full_case_fold = false
13354
    ) {
13355
        $upper = self::$COMMON_CASE_FOLD['upper'];
13356 1
        $lower = self::$COMMON_CASE_FOLD['lower'];
13357
13358 1
        if ($use_lowercase) {
13359 1
            $str = \str_replace(
13360 1
                $upper,
13361
                $lower,
13362
                $str
13363
            );
13364
        } else {
13365
            $str = \str_replace(
13366 1
                $lower,
13367 1
                $upper,
13368 1
                $str
13369 1
            );
13370 1
        }
13371
13372
        if ($use_full_case_fold) {
13373 1
            /**
13374 1
             * @psalm-suppress ImpureStaticVariable
13375
             *
13376 1
             * @var array<mixed>|null
13377 1
             */
13378 1
            static $FULL_CASE_FOLD = null;
13379
            if ($FULL_CASE_FOLD === null) {
13380
                $FULL_CASE_FOLD = self::getData('caseFolding_full');
13381 1
            }
13382
13383
            if ($use_lowercase) {
13384
                $str = \str_replace($FULL_CASE_FOLD[0], $FULL_CASE_FOLD[1], $str);
13385
            } else {
13386
                $str = \str_replace($FULL_CASE_FOLD[1], $FULL_CASE_FOLD[0], $str);
13387
            }
13388
        }
13389
13390
        return $str;
13391
    }
13392
13393
    /**
13394
     * get data from "/data/*.php"
13395
     *
13396
     * @param string $file
13397
     *
13398
     * @psalm-pure
13399
     *
13400
     * @return array
13401
     *
13402
     * @noinspection ReturnTypeCanBeDeclaredInspection
13403
     */
13404
    private static function getData(string $file)
13405
    {
13406
        /** @noinspection PhpIncludeInspection */
13407
        /** @noinspection UsingInclusionReturnValueInspection */
13408
        /** @psalm-suppress UnresolvableInclude */
13409
        return include __DIR__ . '/data/' . $file . '.php';
13410
    }
13411
13412
    /**
13413
     * @psalm-pure
13414
     *
13415
     * @return true|null
13416
     *
13417
     * @noinspection ReturnTypeCanBeDeclaredInspection
13418
     */
13419 2
    private static function initEmojiData()
13420
    {
13421
        if (self::$EMOJI_KEYS_CACHE === null) {
13422
            if (self::$EMOJI === null) {
13423
                self::$EMOJI = self::getData('emoji');
13424
            }
13425 2
13426
            /**
13427 2
             * @psalm-suppress ImpureFunctionCall - static sort function is used
13428
             */
13429 2
            \uksort(
13430
                self::$EMOJI,
13431 2
                static function (string $a, string $b): int {
13432
                    return \strlen($b) <=> \strlen($a);
13433 2
                }
13434
            );
13435
13436
            self::$EMOJI_KEYS_CACHE = \array_keys(self::$EMOJI);
13437 2
            self::$EMOJI_VALUES_CACHE = self::$EMOJI;
13438
13439 2
            foreach (self::$EMOJI_KEYS_CACHE as $key) {
13440
                $tmp_key = \crc32($key);
13441 2
                self::$EMOJI_KEYS_REVERSIBLE_CACHE[] = '_-_PORTABLE_UTF8_-_' . $tmp_key . '_-_' . \strrev((string) $tmp_key) . '_-_8FTU_ELBATROP_-_';
13442
            }
13443
13444 2
            return true;
13445
        }
13446
13447 2
        return null;
13448
    }
13449
13450
    /**
13451
     * Checks whether mbstring "overloaded" is active on the server.
13452
     *
13453
     * @psalm-pure
13454
     *
13455
     * @return bool
13456
     */
13457
    private static function mbstring_overloaded(): bool
13458
    {
13459
        /**
13460 36
         * INI directive 'mbstring.func_overload' is deprecated since PHP 7.2
13461
         */
13462
13463
        /** @noinspection PhpComposerExtensionStubsInspection */
13464
        /** @noinspection PhpUsageOfSilenceOperatorInspection */
13465
        /** @noinspection DeprecatedIniOptionsInspection */
13466
        return \defined('MB_OVERLOAD_STRING')
13467 36
               &&
13468
               ((int) @\ini_get('mbstring.func_overload') & \MB_OVERLOAD_STRING);
13469 36
    }
13470
13471 36
    /**
13472 24
     * @param array    $strings
13473
     * @param bool     $remove_empty_values
13474
     * @param int|null $remove_short_values
13475 16
     *
13476
     * @psalm-pure
13477
     *
13478
     * @return array
13479 16
     *
13480 15
     * @noinspection ReturnTypeCanBeDeclaredInspection
13481
     */
13482 15
    private static function reduce_string_array(
13483 15
        array $strings,
13484 1
        bool $remove_empty_values,
13485 1
        int $remove_short_values = null
13486
    ) {
13487
        // init
13488
        $return = [];
13489
13490
        foreach ($strings as &$str) {
13491 16
            if (
13492 16
                $remove_short_values !== null
13493
                &&
13494
                \mb_strlen($str) <= $remove_short_values
13495 16
            ) {
13496 16
                continue;
13497
            }
13498
13499
            if (
13500
                $remove_empty_values
13501 16
                &&
13502
                \trim($str) === ''
13503 16
            ) {
13504
                continue;
13505
            }
13506
13507
            $return[] = $str;
13508
        }
13509
13510
        return $return;
13511
    }
13512
13513
    /**
13514
     * rxClass
13515
     *
13516
     * @param string $s
13517
     * @param string $class
13518
     *
13519 1
     * @return string
13520
     *                    *
13521
     * @psalm-pure
13522
     */
13523
    private static function rxClass(string $s, string $class = '')
13524
    {
13525 1
        /**
13526 1
         * @psalm-suppress ImpureStaticVariable
13527
         *
13528
         * @var array<string,string>
13529
         */
13530 1
        static $RX_CLASS_CACHE = [];
13531
13532
        $cache_key = $s . '_' . $class;
13533
13534
        if (isset($RX_CLASS_CACHE[$cache_key])) {
13535
            return $RX_CLASS_CACHE[$cache_key];
13536
        }
13537
13538
        $class_array[] = $class;
0 ignored issues
show
Comprehensibility Best Practice introduced by
$class_array was never initialized. Although not strictly required by PHP, it is generally a good practice to add $class_array = array(); before regardless.
Loading history...
13539
13540
        /** @noinspection SuspiciousLoopInspection */
13541
        /** @noinspection AlterInForeachInspection */
13542
        foreach (self::str_split($s) as &$s) {
0 ignored issues
show
Bug introduced by
The expression self::str_split($s) cannot be used as a reference.

Let?s assume that you have the following foreach statement:

foreach ($array as &$itemValue) { }

$itemValue is assigned by reference. This is possible because the expression (in the example $array) can be used as a reference target.

However, if we were to replace $array with something different like the result of a function call as in

foreach (getArray() as &$itemValue) { }

then assigning by reference is not possible anymore as there is no target that could be modified.

Available Fixes

1. Do not assign by reference
foreach (getArray() as $itemValue) { }
2. Assign to a local variable first
$array = getArray();
foreach ($array as &$itemValue) {}
3. Return a reference
function &getArray() { $array = array(); return $array; }

foreach (getArray() as &$itemValue) { }
Loading history...
13543
            if ($s === '-') {
13544
                $class_array[0] = '-' . $class_array[0];
13545
            } elseif (!isset($s[2])) {
13546
                $class_array[0] .= \preg_quote($s, '/');
13547
            } elseif (self::strlen($s) === 1) {
13548
                $class_array[0] .= $s;
13549
            } else {
13550
                $class_array[] = $s;
13551
            }
13552
        }
13553
13554
        if ($class_array[0]) {
13555
            $class_array[0] = '[' . $class_array[0] . ']';
13556
        }
13557
13558
        if (\count($class_array) === 1) {
13559
            $return = $class_array[0];
13560
        } else {
13561
            $return = '(?:' . \implode('|', $class_array) . ')';
13562
        }
13563
13564
        $RX_CLASS_CACHE[$cache_key] = $return;
13565
13566
        return $return;
13567
    }
13568
13569
    /**
13570 1
     * Personal names such as "Marcus Aurelius" are sometimes typed incorrectly using lowercase ("marcus aurelius").
13571 1
     *
13572 1
     * @param string $names
13573
     * @param string $delimiter
13574
     * @param string $encoding
13575 1
     *
13576
     * @psalm-pure
13577 1
     *
13578 1
     * @return string
13579 1
     *
13580 1
     * @noinspection ReturnTypeCanBeDeclaredInspection
13581
     */
13582 1
    private static function str_capitalize_name_helper(
13583
        string $names,
13584
        string $delimiter,
13585 1
        string $encoding = 'UTF-8'
13586
    ) {
13587
        // init
13588 1
        $name_helper_array = \explode($delimiter, $names);
13589 1
        if ($name_helper_array === false) {
13590 1
            return '';
13591
        }
13592 1
13593
        $special_cases = [
13594
            'names' => [
13595 1
                'ab',
13596
                'af',
13597 1
                'al',
13598 1
                'and',
13599
                'ap',
13600
                'bint',
13601 1
                'binte',
13602
                'da',
13603
                'de',
13604 1
                'del',
13605
                'den',
13606
                'der',
13607
                'di',
13608
                'dit',
13609
                'ibn',
13610
                'la',
13611
                'mac',
13612
                'nic',
13613
                'of',
13614
                'ter',
13615
                'the',
13616
                'und',
13617
                'van',
13618 6
                'von',
13619
                'y',
13620 6
                'zu',
13621 6
            ],
13622 2
            'prefixes' => [
13623
                'al-',
13624
                "d'",
13625 6
                'ff',
13626 6
                "l'",
13627 6
                'mac',
13628 6
                'mc',
13629
                'nic',
13630
            ],
13631
        ];
13632
13633
        foreach ($name_helper_array as &$name) {
13634
            if (\in_array($name, $special_cases['names'], true)) {
13635
                continue;
13636
            }
13637
13638
            $continue = false;
13639
13640
            if ($delimiter === '-') {
13641 29
                foreach ((array) $special_cases['names'] as &$beginning) {
13642
                    if (\strncmp($name, $beginning, \strlen($beginning)) === 0) {
13643
                        $continue = true;
13644 29
13645
                        break;
13646 29
                    }
13647
                }
13648
                unset($beginning);
13649
            }
13650 29
13651
            foreach ((array) $special_cases['prefixes'] as &$beginning) {
13652
                if (\strncmp($name, $beginning, \strlen($beginning)) === 0) {
13653
                    $continue = true;
13654 29
13655 1
                    break;
13656
                }
13657
            }
13658 29
            unset($beginning);
13659 29
13660 29
            if ($continue) {
13661
                continue;
13662
            }
13663
13664
            $name = self::ucfirst($name, $encoding);
13665
        }
13666
13667
        return \implode($delimiter, $name_helper_array);
13668 29
    }
13669
13670
    /**
13671
     * Generic case-sensitive transformation for collation matching.
13672
     *
13673
     * @param string $str <p>The input string</p>
13674
     *
13675
     * @psalm-pure
13676
     *
13677
     * @return string|null
13678
     *
13679
     * @noinspection ReturnTypeCanBeDeclaredInspection
13680 9
     */
13681
    private static function strtonatfold(string $str)
13682 9
    {
13683 9
        $str = \Normalizer::normalize($str, \Normalizer::NFD);
13684
        if ($str === false) {
13685
            return '';
13686 7
        }
13687 7
13688 7
        return \preg_replace(
13689
            '/\p{Mn}+/u',
13690
            '',
13691 7
            $str
13692
        );
13693
    }
13694
13695
    /**
13696
     * @param int|string $input
13697
     *
13698
     * @psalm-pure
13699
     *
13700
     * @return string
13701
     *
13702
     * @noinspection ReturnTypeCanBeDeclaredInspection
13703
     */
13704
    private static function to_utf8_convert_helper($input)
13705
    {
13706
        // init
13707
        $buf = '';
13708
13709
        if (self::$ORD === null) {
13710
            self::$ORD = self::getData('ord');
13711
        }
13712
13713
        if (self::$CHR === null) {
13714
            self::$CHR = self::getData('chr');
13715
        }
13716
13717
        if (self::$WIN1252_TO_UTF8 === null) {
13718
            self::$WIN1252_TO_UTF8 = self::getData('win1252_to_utf8');
13719
        }
13720
13721
        $ordC1 = self::$ORD[$input];
13722
        if (isset(self::$WIN1252_TO_UTF8[$ordC1])) { // found in Windows-1252 special cases
13723
            $buf .= self::$WIN1252_TO_UTF8[$ordC1];
13724
        } else {
13725
            /** @noinspection OffsetOperationsInspection */
13726
            $cc1 = self::$CHR[$ordC1 / 64] | "\xC0";
13727
            $cc2 = ((string) $input & "\x3F") | "\x80";
0 ignored issues
show
Bug introduced by
Are you sure you want to use the bitwise | or did you mean ||?
Loading history...
Bug introduced by
Are you sure you want to use the bitwise & or did you mean &&?
Loading history...
13728
            $buf .= $cc1 . $cc2;
13729
        }
13730
13731
        return $buf;
13732
    }
13733
13734
    /**
13735
     * @param string $str
13736
     *
13737
     * @psalm-pure
13738
     *
13739
     * @return string
13740
     *
13741
     * @noinspection ReturnTypeCanBeDeclaredInspection
13742
     */
13743
    private static function urldecode_unicode_helper(string $str)
13744
    {
13745
        if (\strpos($str, '%u') === false) {
13746
            return $str;
13747
        }
13748
13749
        $pattern = '/%u([0-9a-fA-F]{3,4})/';
13750
        if (\preg_match($pattern, $str)) {
13751
            $str = (string) \preg_replace($pattern, '&#x\\1;', $str);
13752
        }
13753
13754
        return $str;
13755
    }
13756
}
13757