Passed
Push — master ( 37259a...5530eb )
by Lars
03:50 queued 33s
created

UTF8   F

Complexity

Total Complexity 1745

Size/Duplication

Total Lines 13664
Duplicated Lines 0 %

Test Coverage

Coverage 80.93%

Importance

Changes 108
Bugs 51 Features 6
Metric Value
eloc 4222
c 108
b 51
f 6
dl 0
loc 13664
ccs 3098
cts 3828
cp 0.8093
rs 0.8
wmc 1745

272 Methods

Rating   Name   Duplication   Size   Complexity  
A words_limit() 0 20 5
D normalize_encoding() 0 147 16
A normalize_line_ending() 0 3 1
A titlecase() 0 35 5
A is_ascii() 0 3 1
A tabs_to_spaces() 0 11 3
A symfony_polyfill_used() 0 16 5
A is_html() 0 14 2
A file_has_bom() 0 8 2
B file_get_contents() 0 56 11
A filter_input() 0 16 3
A encode_mimeheader() 0 26 5
A filter_var_array() 0 15 2
F extract_text() 0 175 34
A filter_var() 0 15 2
A filter_input_array() 0 15 3
B chr_to_decimal() 0 38 8
A add_bom_to_string() 0 7 2
A count_chars() 0 11 1
A ctype_loaded() 0 3 1
D chr() 0 107 19
A chunk_split() 0 3 1
A css_identifier() 0 55 6
A css_stripe_media_queries() 0 6 1
A clean() 0 47 6
A __construct() 0 2 1
B between() 0 48 8
A codepoints() 0 36 5
A chr_map() 0 5 1
A cleanup() 0 24 2
A char_at() 0 7 2
A chars() 0 4 1
A chr_size_list() 0 17 3
A checkForSupport() 0 46 4
A collapse_whitespace() 0 7 2
A access() 0 11 4
A callback() 0 3 1
A binary_to_str() 0 12 3
A bom() 0 3 1
A array_change_key_case() 0 23 5
A str_substr_after_first_separator() 0 28 6
A max() 0 14 3
B str_camelize() 0 74 10
A parse_str() 0 18 4
A str_contains() 0 15 3
B str_to_lines() 0 28 8
A substr_in_byte() 0 18 6
A stripos_in_byte() 0 12 4
A is_bom() 0 10 3
A is_hexadecimal() 0 7 2
A get_unique_string() 0 21 3
A strnatcasecmp() 0 5 1
A substr_left() 0 15 4
D strlen() 0 104 19
A str_isubstr_last() 0 25 4
A to_int() 0 7 2
A str_replace_beginning() 0 25 6
A has_uppercase() 0 7 2
A remove_left() 0 28 4
C stripos() 0 67 14
A str_offset_exists() 0 10 2
D strrchr() 0 104 20
A to_filename() 0 9 1
A str_iends_with() 0 11 3
A max_chr_width() 0 8 2
C utf8_decode() 0 59 13
A ltrim() 0 26 5
A emoji_decode() 0 21 3
A is_utf8() 0 13 4
A remove_html() 0 3 1
B str_longest_common_suffix() 0 54 10
C wordwrap() 0 70 14
B ucfirst() 0 57 7
A str_pad_both() 0 12 1
A str_substr_last() 0 33 6
A mbstring_loaded() 0 3 1
A str_limit() 0 26 6
A html_escape() 0 6 1
A string() 0 18 4
B str_obfuscate() 0 47 8
B rxClass() 0 44 8
B get_file_type() 0 60 7
A str_ensure_right() 0 13 4
B str_titleize_for_humans() 0 170 7
D is_utf16() 0 76 17
C filter() 0 59 14
A normalize_whitespace() 0 11 1
A str_starts_with() 0 16 4
A str_humanize() 0 15 1
A decode_mimeheader() 0 8 3
C substr_count_in_byte() 0 55 15
A strlen_in_byte() 0 12 3
A str_ireplace_ending() 0 21 6
A rtrim() 0 26 5
C str_longest_common_substring() 0 76 16
A regex_replace() 0 20 3
A getData() 0 6 1
B strtolower() 0 58 10
A urldecode() 0 35 4
A str_isubstr_before_first_separator() 0 19 5
B strrev() 0 43 10
A replace_all() 0 11 2
D substr_replace() 0 123 27
A strstr_in_byte() 0 15 4
A emoji_encode() 0 21 3
A str_matches_pattern() 0 3 1
A is_alpha() 0 7 2
C str_titleize() 0 69 12
A str_split_array() 0 17 2
B get_random_string() 0 54 10
A ws() 0 3 1
A str_replace_first() 0 20 2
A fix_utf8() 0 30 4
A str_pad_right() 0 12 1
B ucwords() 0 51 9
A first_char() 0 14 4
A to_boolean() 0 35 5
C stristr() 0 79 17
A strncasecmp() 0 10 1
B strwidth() 0 43 8
A trim() 0 26 5
A is_serialized() 0 11 3
A str_upper_camelize() 0 8 1
A is_uppercase() 0 7 2
A substr_compare() 0 33 6
D substr_count() 0 73 17
A strnatcmp() 0 9 2
D str_pad() 0 146 16
A urldecode_unicode_helper() 0 12 3
A str_ireplace() 0 31 5
A str_replace_ending() 0 24 6
A string_has_bom() 0 9 3
B strtr() 0 41 11
B str_contains_all() 0 22 9
A str_isubstr_after_last_separator() 0 26 5
D range() 0 71 23
B strspn() 0 30 10
A strcasecmp() 0 21 1
A rawurldecode() 0 35 4
B str_capitalize_name_helper() 0 86 10
A utf8_encode() 0 14 3
A normalize_msword() 0 3 1
C str_detect_encoding() 0 111 14
A spaces_to_tabs() 0 11 3
A str_istarts_with() 0 11 3
A is_blank() 0 7 2
A str_replace() 0 18 1
A substr_iright() 0 15 4
D getCharDirection() 0 104 117
A htmlspecialchars() 0 15 3
A replace() 0 11 2
A decimal_to_chr() 0 5 1
A to_iso8859() 0 16 4
A has_whitespace() 0 7 2
A strip_tags() 0 18 4
A pcre_utf8_support() 0 4 1
A str_isubstr_before_last_separator() 0 24 6
D str_truncate_safe() 0 86 18
A substr_right() 0 31 6
D str_split() 0 134 29
A str_ends_with_any() 0 13 4
A strrpos_in_byte() 0 12 4
F strrpos() 0 136 31
A remove_right() 0 25 4
A remove_html_breaks() 0 3 1
A showSupport() 0 16 3
A remove_invisible_characters() 0 11 1
A single_chr_html_encode() 0 18 4
A str_replace_last() 0 19 2
A str_substr_before_last_separator() 0 31 6
B is_binary() 0 39 10
A intlChar_loaded() 0 3 1
B strtocasefold() 0 33 7
A lcfirst() 0 44 5
B is_url() 0 40 7
A finfo_loaded() 0 3 1
B str_truncate() 0 43 7
F strripos() 0 113 25
A strpos_in_byte() 0 12 4
A str_ends_with() 0 16 4
A fits_inside() 0 3 1
A to_ascii() 0 6 1
A is_binary_file() 0 16 4
A intl_loaded() 0 3 1
A reduce_string_array() 0 29 6
B str_longest_common_prefix() 0 51 8
A mbstring_overloaded() 0 12 2
A str_pad_left() 0 12 1
A str_substr_first() 0 33 6
A html_stripe_empty_tags() 0 6 1
A remove_bom() 0 22 5
A str_repeat() 0 5 1
A strpbrk() 0 11 4
D to_utf8_string() 0 110 33
A whitespace_table() 0 3 1
A substr_count_simple() 0 31 6
A str_iends_with_any() 0 13 4
A str_substr_after_last_separator() 0 31 6
A str_isubstr_after_first_separator() 0 26 5
A json_loaded() 0 3 1
B str_snakeize() 0 57 6
A is_lowercase() 0 7 2
A str_sort() 0 16 3
A to_utf8() 0 15 3
A str_underscored() 0 3 1
A str_offset_get() 0 14 4
A strip_whitespace() 0 7 2
A str_capitalize_name() 0 8 1
B str_limit_after_word() 0 51 11
A iconv_loaded() 0 3 1
A lcwords() 0 34 6
A swapCase() 0 17 4
A substr_ileft() 0 15 4
A is_empty() 0 3 1
B html_encode() 0 54 11
A str_dasherize() 0 3 1
A str_ensure_left() 0 11 3
F encode() 0 144 37
C is_utf32() 0 71 16
C ord() 0 68 16
B to_string() 0 27 8
A is_alphanumeric() 0 7 2
A strtonatfold() 0 11 2
A json_decode() 0 13 2
C strcspn() 0 48 12
A fix_simple_utf8() 0 32 5
B is_json() 0 26 8
A is_printable() 0 3 1
A fixStrCaseHelper() 0 41 5
A int_to_hex() 0 7 2
C str_split_pattern() 0 54 13
D strstr() 0 107 21
A has_lowercase() 0 7 2
A json_encode() 0 9 2
A str_isubstr_first() 0 25 4
A is_base64() 0 17 5
A str_last_char() 0 16 4
A str_ireplace_beginning() 0 22 6
A hex_to_int() 0 14 3
A hex_to_chr() 0 4 1
A htmlentities() 0 28 3
A str_substr_before_first_separator() 0 32 6
F substr() 0 137 31
A wordwrap_per_line() 0 28 5
A str_surround() 0 3 1
A strncmp() 0 19 4
A str_insert() 0 28 4
A getSupportInfo() 0 13 3
A replace_diamond_question_mark() 0 43 5
A chr_to_hex() 0 11 3
D is_utf8_string() 0 133 28
B str_delimit() 0 31 8
A to_utf8_convert_helper() 0 28 5
B strtoupper() 0 58 10
A min() 0 14 3
A is_punctuation() 0 3 1
C html_entity_decode() 0 58 13
A str_starts_with_any() 0 17 5
B strrichr() 0 54 11
A str_istarts_with_any() 0 17 5
B str_contains_any() 0 28 8
A initEmojiData() 0 29 4
A remove_duplicates() 0 16 4
B str_slice() 0 33 10
F strpos() 0 151 33
A str_shuffle() 0 33 6
A strcmp() 0 11 2
A str_word_count() 0 23 5
A strripos_in_byte() 0 12 4
A str_to_binary() 0 10 2
B str_to_words() 0 36 8
A emoji_from_country_code() 0 17 3

How to fix   Complexity   

Complex Class

Complex classes like UTF8 often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.

Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.

While breaking up the class, it is a good idea to analyze how other classes use UTF8, and based on these observations, apply Extract Interface, too.

1
<?php
2
3
declare(strict_types=1);
4
5
namespace voku\helper;
6
7
/**
8
 * @psalm-immutable
9
 */
10
final class UTF8
11
{
12
    /**
13
     * Bom => Byte-Length
14
     *
15
     * INFO: https://en.wikipedia.org/wiki/Byte_order_mark
16
     *
17
     * @var array<string, int>
18
     */
19
    private static $BOM = [
20
        "\xef\xbb\xbf"     => 3, // UTF-8 BOM
21
        ''              => 6, // UTF-8 BOM as "WINDOWS-1252" (one char has [maybe] more then one byte ...)
22
        "\x00\x00\xfe\xff" => 4, // UTF-32 (BE) BOM
23
        '  þÿ'             => 6, // UTF-32 (BE) BOM as "WINDOWS-1252"
24
        "\xff\xfe\x00\x00" => 4, // UTF-32 (LE) BOM
25
        'ÿþ  '             => 6, // UTF-32 (LE) BOM as "WINDOWS-1252"
26
        "\xfe\xff"         => 2, // UTF-16 (BE) BOM
27
        'þÿ'               => 4, // UTF-16 (BE) BOM as "WINDOWS-1252"
28
        "\xff\xfe"         => 2, // UTF-16 (LE) BOM
29
        'ÿþ'               => 4, // UTF-16 (LE) BOM as "WINDOWS-1252"
30
    ];
31
32
    /**
33
     * Numeric code point => UTF-8 Character
34
     *
35
     * url: http://www.w3schools.com/charsets/ref_utf_punctuation.asp
36
     *
37
     * @var array<int, string>
38
     */
39
    private static $WHITESPACE = [
40
        // NULL Byte
41
        0 => "\x0",
42
        // Tab
43
        9 => "\x9",
44
        // New Line
45
        10 => "\xa",
46
        // Vertical Tab
47
        11 => "\xb",
48
        // Carriage Return
49
        13 => "\xd",
50
        // Ordinary Space
51
        32 => "\x20",
52
        // NO-BREAK SPACE
53
        160 => "\xc2\xa0",
54
        // OGHAM SPACE MARK
55
        5760 => "\xe1\x9a\x80",
56
        // MONGOLIAN VOWEL SEPARATOR
57
        6158 => "\xe1\xa0\x8e",
58
        // EN QUAD
59
        8192 => "\xe2\x80\x80",
60
        // EM QUAD
61
        8193 => "\xe2\x80\x81",
62
        // EN SPACE
63
        8194 => "\xe2\x80\x82",
64
        // EM SPACE
65
        8195 => "\xe2\x80\x83",
66
        // THREE-PER-EM SPACE
67
        8196 => "\xe2\x80\x84",
68
        // FOUR-PER-EM SPACE
69
        8197 => "\xe2\x80\x85",
70
        // SIX-PER-EM SPACE
71
        8198 => "\xe2\x80\x86",
72
        // FIGURE SPACE
73
        8199 => "\xe2\x80\x87",
74
        // PUNCTUATION SPACE
75
        8200 => "\xe2\x80\x88",
76
        // THIN SPACE
77
        8201 => "\xe2\x80\x89",
78
        // HAIR SPACE
79
        8202 => "\xe2\x80\x8a",
80
        // LINE SEPARATOR
81
        8232 => "\xe2\x80\xa8",
82
        // PARAGRAPH SEPARATOR
83
        8233 => "\xe2\x80\xa9",
84
        // NARROW NO-BREAK SPACE
85
        8239 => "\xe2\x80\xaf",
86
        // MEDIUM MATHEMATICAL SPACE
87
        8287 => "\xe2\x81\x9f",
88
        // HALFWIDTH HANGUL FILLER
89
        65440 => "\xef\xbe\xa0",
90
        // IDEOGRAPHIC SPACE
91
        12288 => "\xe3\x80\x80",
92
    ];
93
94
    /**
95
     * @var array<string, string>
96
     */
97
    private static $WHITESPACE_TABLE = [
98
        'SPACE'                     => "\x20",
99
        'NO-BREAK SPACE'            => "\xc2\xa0",
100
        'OGHAM SPACE MARK'          => "\xe1\x9a\x80",
101
        'EN QUAD'                   => "\xe2\x80\x80",
102
        'EM QUAD'                   => "\xe2\x80\x81",
103
        'EN SPACE'                  => "\xe2\x80\x82",
104
        'EM SPACE'                  => "\xe2\x80\x83",
105
        'THREE-PER-EM SPACE'        => "\xe2\x80\x84",
106
        'FOUR-PER-EM SPACE'         => "\xe2\x80\x85",
107
        'SIX-PER-EM SPACE'          => "\xe2\x80\x86",
108
        'FIGURE SPACE'              => "\xe2\x80\x87",
109
        'PUNCTUATION SPACE'         => "\xe2\x80\x88",
110
        'THIN SPACE'                => "\xe2\x80\x89",
111
        'HAIR SPACE'                => "\xe2\x80\x8a",
112
        'LINE SEPARATOR'            => "\xe2\x80\xa8",
113
        'PARAGRAPH SEPARATOR'       => "\xe2\x80\xa9",
114
        'ZERO WIDTH SPACE'          => "\xe2\x80\x8b",
115
        'NARROW NO-BREAK SPACE'     => "\xe2\x80\xaf",
116
        'MEDIUM MATHEMATICAL SPACE' => "\xe2\x81\x9f",
117
        'IDEOGRAPHIC SPACE'         => "\xe3\x80\x80",
118
        'HALFWIDTH HANGUL FILLER'   => "\xef\xbe\xa0",
119
    ];
120
121
    /**
122
     * @var array
123
     *
124
     * @phpstan-var array{upper: string[], lower: string[]}
125
     */
126
    private static $COMMON_CASE_FOLD = [
127
        'upper' => [
128
            'µ',
129
            'ſ',
130
            "\xCD\x85",
131
            'ς',
132
            'ẞ',
133
            "\xCF\x90",
134
            "\xCF\x91",
135
            "\xCF\x95",
136
            "\xCF\x96",
137
            "\xCF\xB0",
138
            "\xCF\xB1",
139
            "\xCF\xB5",
140
            "\xE1\xBA\x9B",
141
            "\xE1\xBE\xBE",
142
        ],
143
        'lower' => [
144
            'μ',
145
            's',
146
            'ι',
147
            'σ',
148
            'ß',
149
            'β',
150
            'θ',
151
            'φ',
152
            'π',
153
            'κ',
154
            'ρ',
155
            'ε',
156
            "\xE1\xB9\xA1",
157
            'ι',
158
        ],
159
    ];
160
161
    /**
162
     * @var array
163
     *
164
     * @phpstan-var array<string, mixed>
165
     */
166
    private static $SUPPORT = [];
167
168
    /**
169
     * @var string[]|null
170
     *
171
     * @phpstan-var array<string, string>|null
172
     */
173
    private static $BROKEN_UTF8_FIX;
174
175
    /**
176
     * @var string[]|null
177
     *
178
     * @phpstan-var array<int, string>|null
179
     */
180
    private static $WIN1252_TO_UTF8;
181
182
    /**
183
     * @var string[]|null
184
     *
185
     * @phpstan-var array<int ,string>|null
186
     */
187
    private static $INTL_TRANSLITERATOR_LIST;
188
189
    /**
190
     * @var string[]|null
191
     *
192
     * @phpstan-var array<string>|null
193
     */
194
    private static $ENCODINGS;
195
196
    /**
197
     * @var int[]|null
198
     *
199
     * @phpstan-var array<string ,int>|null
200
     */
201
    private static $ORD;
202
203
    /**
204
     * @var string[]|null
205
     *
206
     * @phpstan-var array<string, string>|null
207
     */
208
    private static $EMOJI;
209
210
    /**
211
     * @var string[]|null
212
     *
213
     * @phpstan-var array<string>|null
214
     */
215
    private static $EMOJI_VALUES_CACHE;
216
217
    /**
218
     * @var string[]|null
219
     *
220
     * @phpstan-var array<string>|null
221
     */
222
    private static $EMOJI_KEYS_CACHE;
223
224
    /**
225
     * @var string[]|null
226
     *
227
     * @phpstan-var array<string>|null
228
     */
229
    private static $EMOJI_KEYS_REVERSIBLE_CACHE;
230
231
    /**
232
     * @var string[]|null
233
     *
234
     * @phpstan-var array<int, string>|null
235
     */
236
    private static $CHR;
237
238
    /**
239
     * __construct()
240
     */
241 34
    public function __construct()
242
    {
243 34
    }
244
245
    /**
246
     * Return the character at the specified position: $str[1] like functionality.
247
     *
248
     * EXAMPLE: <code>UTF8::access('fòô', 1); // 'ò'</code>
249
     *
250
     * @param string $str      <p>A UTF-8 string.</p>
251
     * @param int    $pos      <p>The position of character to return.</p>
252
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
253
     *
254
     * @psalm-pure
255
     *
256
     * @return string
257
     *                <p>Single multi-byte character.</p>
258
     */
259 3
    public static function access(string $str, int $pos, string $encoding = 'UTF-8'): string
260
    {
261 3
        if ($str === '' || $pos < 0) {
262 2
            return '';
263
        }
264
265 3
        if ($encoding === 'UTF-8') {
266 3
            return (string) \mb_substr($str, $pos, 1);
267
        }
268
269
        return (string) self::substr($str, $pos, 1, $encoding);
270
    }
271
272
    /**
273
     * Prepends UTF-8 BOM character to the string and returns the whole string.
274
     *
275
     * INFO: If BOM already existed there, the Input string is returned.
276
     *
277
     * EXAMPLE: <code>UTF8::add_bom_to_string('fòô'); // "\xEF\xBB\xBF" . 'fòô'</code>
278
     *
279
     * @param string $str <p>The input string.</p>
280
     *
281
     * @psalm-pure
282
     *
283
     * @return string
284
     *                <p>The output string that contains BOM.</p>
285
     */
286 2
    public static function add_bom_to_string(string $str): string
287
    {
288 2
        if (!self::string_has_bom($str)) {
289 2
            $str = self::bom() . $str;
290
        }
291
292 2
        return $str;
293
    }
294
295
    /**
296
     * Changes all keys in an array.
297
     *
298
     * @param array<string, mixed> $array    <p>The array to work on</p>
299
     * @param int                  $case     [optional] <p> Either <strong>CASE_UPPER</strong><br>
300
     *                                       or <strong>CASE_LOWER</strong> (default)</p>
301
     * @param string               $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
302
     *
303
     * @psalm-pure
304
     *
305
     * @return string[]
306
     *                  <p>An array with its keys lower- or uppercased.</p>
307
     */
308 2
    public static function array_change_key_case(
309
        array $array,
310
        int $case = \CASE_LOWER,
311
        string $encoding = 'UTF-8'
312
    ): array {
313
        if (
314 2
            $case !== \CASE_LOWER
315
            &&
316 2
            $case !== \CASE_UPPER
317
        ) {
318
            $case = \CASE_LOWER;
319
        }
320
321 2
        $return = [];
322 2
        foreach ($array as $key => &$value) {
323 2
            $key = $case === \CASE_LOWER
324 2
                ? self::strtolower($key, $encoding)
325 2
                : self::strtoupper($key, $encoding);
326
327 2
            $return[$key] = $value;
328
        }
329
330 2
        return $return;
331
    }
332
333
    /**
334
     * Returns the substring between $start and $end, if found, or an empty
335
     * string. An optional offset may be supplied from which to begin the
336
     * search for the start string.
337
     *
338
     * @param string $str
339
     * @param string $start    <p>Delimiter marking the start of the substring.</p>
340
     * @param string $end      <p>Delimiter marking the end of the substring.</p>
341
     * @param int    $offset   [optional] <p>Index from which to begin the search. Default: 0</p>
342
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
343
     *
344
     * @psalm-pure
345
     *
346
     * @return string
347
     */
348 16
    public static function between(
349
        string $str,
350
        string $start,
351
        string $end,
352
        int $offset = 0,
353
        string $encoding = 'UTF-8'
354
    ): string {
355 16
        if ($encoding === 'UTF-8') {
356 8
            $start_position = \mb_strpos($str, $start, $offset);
357 8
            if ($start_position === false) {
358 1
                return '';
359
            }
360
361 7
            $substr_index = $start_position + (int) \mb_strlen($start);
362 7
            $end_position = \mb_strpos($str, $end, $substr_index);
363
            if (
364 7
                $end_position === false
365
                ||
366 7
                $end_position === $substr_index
367
            ) {
368 2
                return '';
369
            }
370
371 5
            return (string) \mb_substr($str, $substr_index, $end_position - $substr_index);
372
        }
373
374 8
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
375
376 8
        $start_position = self::strpos($str, $start, $offset, $encoding);
377 8
        if ($start_position === false) {
378 1
            return '';
379
        }
380
381 7
        $substr_index = $start_position + (int) self::strlen($start, $encoding);
382 7
        $end_position = self::strpos($str, $end, $substr_index, $encoding);
383
        if (
384 7
            $end_position === false
385
            ||
386 7
            $end_position === $substr_index
387
        ) {
388 2
            return '';
389
        }
390
391 5
        return (string) self::substr(
392 5
            $str,
393
            $substr_index,
394 5
            $end_position - $substr_index,
395
            $encoding
396
        );
397
    }
398
399
    /**
400
     * Convert binary into a string.
401
     *
402
     * INFO: opposite to UTF8::str_to_binary()
403
     *
404
     * EXAMPLE: <code>UTF8::binary_to_str('11110000100111111001100010000011'); // '😃'</code>
405
     *
406
     * @param string $bin 1|0
407
     *
408
     * @psalm-pure
409
     *
410
     * @return string
411
     */
412 2
    public static function binary_to_str($bin): string
413
    {
414 2
        if (!isset($bin[0])) {
415
            return '';
416
        }
417
418 2
        $convert = \base_convert($bin, 2, 16);
419 2
        if ($convert === '0') {
420 1
            return '';
421
        }
422
423 2
        return \pack('H*', $convert);
424
    }
425
426
    /**
427
     * Returns the UTF-8 Byte Order Mark Character.
428
     *
429
     * INFO: take a look at UTF8::$bom for e.g. UTF-16 and UTF-32 BOM values
430
     *
431
     * EXAMPLE: <code>UTF8::bom(); // "\xEF\xBB\xBF"</code>
432
     *
433
     * @psalm-pure
434
     *
435
     * @return string
436
     *                <p>UTF-8 Byte Order Mark.</p>
437
     */
438 4
    public static function bom(): string
439
    {
440 4
        return "\xef\xbb\xbf";
441
    }
442
443
    /**
444
     * @alias of UTF8::chr_map()
445
     *
446
     * @param callable $callback
447
     * @param string   $str
448
     *
449
     * @psalm-pure
450
     *
451
     * @return string[]
452
     *
453
     * @see   UTF8::chr_map()
454
     */
455 2
    public static function callback($callback, string $str): array
456
    {
457 2
        return self::chr_map($callback, $str);
458
    }
459
460
    /**
461
     * Returns the character at $index, with indexes starting at 0.
462
     *
463
     * @param string $str      <p>The input string.</p>
464
     * @param int    $index    <p>Position of the character.</p>
465
     * @param string $encoding [optional] <p>Default is UTF-8</p>
466
     *
467
     * @psalm-pure
468
     *
469
     * @return string
470
     *                <p>The character at $index.</p>
471
     */
472 9
    public static function char_at(string $str, int $index, string $encoding = 'UTF-8'): string
473
    {
474 9
        if ($encoding === 'UTF-8') {
475 5
            return (string) \mb_substr($str, $index, 1);
476
        }
477
478 4
        return (string) self::substr($str, $index, 1, $encoding);
479
    }
480
481
    /**
482
     * Returns an array consisting of the characters in the string.
483
     *
484
     * @param string $str <p>The input string.</p>
485
     *
486
     * @psalm-pure
487
     *
488
     * @return string[]
489
     *                  <p>An array of chars.</p>
490
     */
491 4
    public static function chars(string $str): array
492
    {
493
        /** @var string[] */
494 4
        return self::str_split($str);
0 ignored issues
show
Bug Best Practice introduced by
The expression return self::str_split($str) returns an array which contains values of type string[] which are incompatible with the documented value type string.
Loading history...
495
    }
496
497
    /**
498
     * This method will auto-detect your server environment for UTF-8 support.
499
     *
500
     * @return true|null
501
     *
502
     * @internal <p>You don't need to run it manually, it will be triggered if it's needed.</p>
503
     */
504 4
    public static function checkForSupport()
505
    {
506 4
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
507
            self::$SUPPORT['already_checked_via_portable_utf8'] = true;
508
509
            // http://php.net/manual/en/book.mbstring.php
510
            self::$SUPPORT['mbstring'] = self::mbstring_loaded();
511
512
            self::$SUPPORT['mbstring_func_overload'] = self::mbstring_overloaded();
513
            if (self::$SUPPORT['mbstring'] === true) {
514
                \mb_internal_encoding('UTF-8');
515
                \mb_regex_encoding('UTF-8');
516
                self::$SUPPORT['mbstring_internal_encoding'] = 'UTF-8';
517
            }
518
519
            // http://php.net/manual/en/book.iconv.php
520
            self::$SUPPORT['iconv'] = self::iconv_loaded();
521
522
            // http://php.net/manual/en/book.intl.php
523
            self::$SUPPORT['intl'] = self::intl_loaded();
524
525
            // http://php.net/manual/en/class.intlchar.php
526
            self::$SUPPORT['intlChar'] = self::intlChar_loaded();
527
528
            // http://php.net/manual/en/book.ctype.php
529
            self::$SUPPORT['ctype'] = self::ctype_loaded();
530
531
            // http://php.net/manual/en/class.finfo.php
532
            self::$SUPPORT['finfo'] = self::finfo_loaded();
533
534
            // http://php.net/manual/en/book.json.php
535
            self::$SUPPORT['json'] = self::json_loaded();
536
537
            // http://php.net/manual/en/book.pcre.php
538
            self::$SUPPORT['pcre_utf8'] = self::pcre_utf8_support();
539
540
            self::$SUPPORT['symfony_polyfill_used'] = self::symfony_polyfill_used();
541
            if (self::$SUPPORT['symfony_polyfill_used'] === true) {
542
                \mb_internal_encoding('UTF-8');
543
                self::$SUPPORT['mbstring_internal_encoding'] = 'UTF-8';
544
            }
545
546
            return true;
547
        }
548
549 4
        return null;
550
    }
551
552
    /**
553
     * Generates a UTF-8 encoded character from the given code point.
554
     *
555
     * INFO: opposite to UTF8::ord()
556
     *
557
     * EXAMPLE: <code>UTF8::chr(0x2603); // '☃'</code>
558
     *
559
     * @param int    $code_point <p>The code point for which to generate a character.</p>
560
     * @param string $encoding   [optional] <p>Default is UTF-8</p>
561
     *
562
     * @psalm-pure
563
     *
564
     * @return string|null
565
     *                     <p>Multi-byte character, returns null on failure or empty input.</p>
566
     */
567 21
    public static function chr($code_point, string $encoding = 'UTF-8')
568
    {
569
        // init
570
        /**
571
         * @psalm-suppress ImpureStaticVariable
572
         *
573
         * @var array<string,string>
574
         */
575 21
        static $CHAR_CACHE = [];
576
577 21
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
578 5
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
579
        }
580
581
        if (
582 21
            $encoding !== 'UTF-8'
583
            &&
584 21
            $encoding !== 'ISO-8859-1'
585
            &&
586 21
            $encoding !== 'WINDOWS-1252'
587
            &&
588 21
            self::$SUPPORT['mbstring'] === false
589
        ) {
590
            /**
591
             * @psalm-suppress ImpureFunctionCall - is is only a warning
592
             */
593
            \trigger_error('UTF8::chr() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
594
        }
595
596 21
        if (!\is_int($code_point) || $code_point <= 0) {
0 ignored issues
show
introduced by
The condition is_int($code_point) is always true.
Loading history...
597 5
            return null;
598
        }
599
600 21
        $cache_key = $code_point . '_' . $encoding;
601 21
        if (isset($CHAR_CACHE[$cache_key])) {
602 19
            return $CHAR_CACHE[$cache_key];
603
        }
604
605 10
        if ($code_point <= 0x80) { // only for "simple"-chars
606
607 9
            if (self::$CHR === null) {
608 1
                self::$CHR = self::getData('chr');
609
            }
610
611
            /**
612
             * @psalm-suppress PossiblyNullArrayAccess
613
             */
614 9
            $chr = self::$CHR[$code_point];
615
616 9
            if ($encoding !== 'UTF-8') {
617 1
                $chr = self::encode($encoding, $chr);
618
            }
619
620 9
            return $CHAR_CACHE[$cache_key] = $chr;
621
        }
622
623
        //
624
        // fallback via "IntlChar"
625
        //
626
627 6
        if (self::$SUPPORT['intlChar'] === true) {
628 6
            $chr = \IntlChar::chr($code_point);
629
630 6
            if ($encoding !== 'UTF-8') {
631
                $chr = self::encode($encoding, $chr);
632
            }
633
634 6
            return $CHAR_CACHE[$cache_key] = $chr;
635
        }
636
637
        //
638
        // fallback via vanilla php
639
        //
640
641
        if (self::$CHR === null) {
642
            self::$CHR = self::getData('chr');
643
        }
644
645
        $code_point = (int) $code_point;
646
        if ($code_point <= 0x7FF) {
647
            /**
648
             * @psalm-suppress PossiblyNullArrayAccess
649
             */
650
            $chr = self::$CHR[($code_point >> 6) + 0xC0] .
651
                   self::$CHR[($code_point & 0x3F) + 0x80];
652
        } elseif ($code_point <= 0xFFFF) {
653
            /**
654
             * @psalm-suppress PossiblyNullArrayAccess
655
             */
656
            $chr = self::$CHR[($code_point >> 12) + 0xE0] .
657
                   self::$CHR[(($code_point >> 6) & 0x3F) + 0x80] .
658
                   self::$CHR[($code_point & 0x3F) + 0x80];
659
        } else {
660
            /**
661
             * @psalm-suppress PossiblyNullArrayAccess
662
             */
663
            $chr = self::$CHR[($code_point >> 18) + 0xF0] .
664
                   self::$CHR[(($code_point >> 12) & 0x3F) + 0x80] .
665
                   self::$CHR[(($code_point >> 6) & 0x3F) + 0x80] .
666
                   self::$CHR[($code_point & 0x3F) + 0x80];
667
        }
668
669
        if ($encoding !== 'UTF-8') {
670
            $chr = self::encode($encoding, $chr);
671
        }
672
673
        return $CHAR_CACHE[$cache_key] = $chr;
674
    }
675
676
    /**
677
     * Applies callback to all characters of a string.
678
     *
679
     * EXAMPLE: <code>UTF8::chr_map([UTF8::class, 'strtolower'], 'Κόσμε'); // ['κ','ό', 'σ', 'μ', 'ε']</code>
680
     *
681
     * @param callable $callback <p>The callback function.</p>
682
     * @param string   $str      <p>UTF-8 string to run callback on.</p>
683
     *
684
     * @psalm-pure
685
     *
686
     * @return string[]
687
     *                  <p>The outcome of the callback, as array.</p>
688
     */
689 2
    public static function chr_map($callback, string $str): array
690
    {
691 2
        return \array_map(
692 2
            $callback,
693 2
            self::str_split($str)
694
        );
695
    }
696
697
    /**
698
     * Generates an array of byte length of each character of a Unicode string.
699
     *
700
     * 1 byte => U+0000  - U+007F
701
     * 2 byte => U+0080  - U+07FF
702
     * 3 byte => U+0800  - U+FFFF
703
     * 4 byte => U+10000 - U+10FFFF
704
     *
705
     * EXAMPLE: <code>UTF8::chr_size_list('中文空白-test'); // [3, 3, 3, 3, 1, 1, 1, 1, 1]</code>
706
     *
707
     * @param string $str <p>The original unicode string.</p>
708
     *
709
     * @psalm-pure
710
     *
711
     * @return int[]
712
     *               <p>An array of byte lengths of each character.</p>
713
     */
714 4
    public static function chr_size_list(string $str): array
715
    {
716 4
        if ($str === '') {
717 4
            return [];
718
        }
719
720 4
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
721
            return \array_map(
722
                static function (string $data): int {
723
                    // "mb_" is available if overload is used, so use it ...
724
                    return \mb_strlen($data, 'CP850'); // 8-BIT
725
                },
726
                self::str_split($str)
727
            );
728
        }
729
730 4
        return \array_map('\strlen', self::str_split($str));
731
    }
732
733
    /**
734
     * Get a decimal code representation of a specific character.
735
     *
736
     * INFO: opposite to UTF8::decimal_to_chr()
737
     *
738
     * EXAMPLE: <code>UTF8::chr_to_decimal('§'); // 0xa7</code>
739
     *
740
     * @param string $char <p>The input character.</p>
741
     *
742
     * @psalm-pure
743
     *
744
     * @return int
745
     */
746 5
    public static function chr_to_decimal(string $char): int
747
    {
748 5
        if (self::$SUPPORT['iconv'] === true) {
749 5
            $chr_tmp = \iconv('UTF-8', 'UCS-4LE', $char);
750 5
            if ($chr_tmp !== false) {
751
                /** @phpstan-ignore-next-line - "unpack": only false if the format string contains errors */
752 5
                return \unpack('V', $chr_tmp)[1];
753
            }
754
        }
755
756
        $code = self::ord($char[0]);
757
        $bytes = 1;
758
759
        if (!($code & 0x80)) {
760
            // 0xxxxxxx
761
            return $code;
762
        }
763
764
        if (($code & 0xe0) === 0xc0) {
765
            // 110xxxxx
766
            $bytes = 2;
767
            $code &= ~0xc0;
768
        } elseif (($code & 0xf0) === 0xe0) {
769
            // 1110xxxx
770
            $bytes = 3;
771
            $code &= ~0xe0;
772
        } elseif (($code & 0xf8) === 0xf0) {
773
            // 11110xxx
774
            $bytes = 4;
775
            $code &= ~0xf0;
776
        }
777
778
        for ($i = 2; $i <= $bytes; ++$i) {
779
            // 10xxxxxx
780
            $code = ($code << 6) + (self::ord($char[$i - 1]) & ~0x80);
781
        }
782
783
        return $code;
784
    }
785
786
    /**
787
     * Get hexadecimal code point (U+xxxx) of a UTF-8 encoded character.
788
     *
789
     * EXAMPLE: <code>UTF8::chr_to_hex('§'); // U+00a7</code>
790
     *
791
     * @param int|string $char   <p>The input character</p>
792
     * @param string     $prefix [optional]
793
     *
794
     * @psalm-pure
795
     *
796
     * @return string
797
     *                <p>The code point encoded as U+xxxx.</p>
798
     */
799 2
    public static function chr_to_hex($char, string $prefix = 'U+'): string
800
    {
801 2
        if ($char === '') {
802 2
            return '';
803
        }
804
805 2
        if ($char === '&#0;') {
806
            $char = '';
807
        }
808
809 2
        return self::int_to_hex(self::ord((string) $char), $prefix);
810
    }
811
812
    /**
813
     * Splits a string into smaller chunks and multiple lines, using the specified line ending character.
814
     *
815
     * EXAMPLE: <code>UTF8::chunk_split('ABC-ÖÄÜ-中文空白-κόσμε', 3); // "ABC\r\n-ÖÄ\r\nÜ-中\r\n文空白\r\n-κό\r\nσμε"</code>
816
     *
817
     * @param string $body         <p>The original string to be split.</p>
818
     * @param int    $chunk_length [optional] <p>The maximum character length of a chunk.</p>
819
     * @param string $end          [optional] <p>The character(s) to be inserted at the end of each chunk.</p>
820
     *
821
     * @psalm-pure
822
     *
823
     * @return string
824
     *                <p>The chunked string.</p>
825
     */
826 4
    public static function chunk_split(string $body, int $chunk_length = 76, string $end = "\r\n"): string
827
    {
828 4
        return \implode($end, self::str_split($body, $chunk_length));
829
    }
830
831
    /**
832
     * Accepts a string and removes all non-UTF-8 characters from it + extras if needed.
833
     *
834
     * EXAMPLE: <code>UTF8::clean("\xEF\xBB\xBF„Abcdef\xc2\xa0\x20…” — 😃 - Düsseldorf", true, true); // '„Abcdef  …” — 😃 - Düsseldorf'</code>
835
     *
836
     * @param string $str                                     <p>The string to be sanitized.</p>
837
     * @param bool   $remove_bom                              [optional] <p>Set to true, if you need to remove
838
     *                                                        UTF-BOM.</p>
839
     * @param bool   $normalize_whitespace                    [optional] <p>Set to true, if you need to normalize the
840
     *                                                        whitespace.</p>
841
     * @param bool   $normalize_msword                        [optional] <p>Set to true, if you need to normalize MS
842
     *                                                        Word chars e.g.: "…"
843
     *                                                        => "..."</p>
844
     * @param bool   $keep_non_breaking_space                 [optional] <p>Set to true, to keep non-breaking-spaces,
845
     *                                                        in
846
     *                                                        combination with
847
     *                                                        $normalize_whitespace</p>
848
     * @param bool   $replace_diamond_question_mark           [optional] <p>Set to true, if you need to remove diamond
849
     *                                                        question mark e.g.: "�"</p>
850
     * @param bool   $remove_invisible_characters             [optional] <p>Set to false, if you not want to remove
851
     *                                                        invisible characters e.g.: "\0"</p>
852
     * @param bool   $remove_invisible_characters_url_encoded [optional] <p>Set to true, if you not want to remove
853
     *                                                        invisible url encoded characters e.g.: "%0B"<br> WARNING:
854
     *                                                        maybe contains false-positives e.g. aa%0Baa -> aaaa.
855
     *                                                        </p>
856
     *
857
     * @psalm-pure
858
     *
859
     * @return string
860
     *                <p>An clean UTF-8 encoded string.</p>
861
     */
862 90
    public static function clean(
863
        string $str,
864
        bool $remove_bom = false,
865
        bool $normalize_whitespace = false,
866
        bool $normalize_msword = false,
867
        bool $keep_non_breaking_space = false,
868
        bool $replace_diamond_question_mark = false,
869
        bool $remove_invisible_characters = true,
870
        bool $remove_invisible_characters_url_encoded = false
871
    ): string {
872
        // http://stackoverflow.com/questions/1401317/remove-non-utf8-characters-from-string
873
        // caused connection reset problem on larger strings
874
875 90
        $regex = '/
876
          (
877
            (?: [\x00-\x7F]               # single-byte sequences   0xxxxxxx
878
            |   [\xC0-\xDF][\x80-\xBF]    # double-byte sequences   110xxxxx 10xxxxxx
879
            |   [\xE0-\xEF][\x80-\xBF]{2} # triple-byte sequences   1110xxxx 10xxxxxx * 2
880
            |   [\xF0-\xF7][\x80-\xBF]{3} # quadruple-byte sequence 11110xxx 10xxxxxx * 3
881
            ){1,100}                      # ...one or more times
882
          )
883
        | ( [\x80-\xBF] )                 # invalid byte in range 10000000 - 10111111
884
        | ( [\xC0-\xFF] )                 # invalid byte in range 11000000 - 11111111
885
        /x';
886 90
        $str = (string) \preg_replace($regex, '$1', $str);
887
888 90
        if ($replace_diamond_question_mark) {
889 33
            $str = self::replace_diamond_question_mark($str);
890
        }
891
892 90
        if ($remove_invisible_characters) {
893 90
            $str = self::remove_invisible_characters($str, $remove_invisible_characters_url_encoded);
894
        }
895
896 90
        if ($normalize_whitespace) {
897 37
            $str = self::normalize_whitespace($str, $keep_non_breaking_space);
898
        }
899
900 90
        if ($normalize_msword) {
901 4
            $str = self::normalize_msword($str);
902
        }
903
904 90
        if ($remove_bom) {
905 37
            $str = self::remove_bom($str);
906
        }
907
908 90
        return $str;
909
    }
910
911
    /**
912
     * Clean-up a string and show only printable UTF-8 chars at the end  + fix UTF-8 encoding.
913
     *
914
     * EXAMPLE: <code>UTF8::cleanup("\xEF\xBB\xBF„Abcdef\xc2\xa0\x20…” — 😃 - Düsseldorf", true, true); // '„Abcdef  …” — 😃 - Düsseldorf'</code>
915
     *
916
     * @param string $str <p>The input string.</p>
917
     *
918
     * @psalm-pure
919
     *
920
     * @return string
921
     */
922 33
    public static function cleanup($str): string
923
    {
924
        // init
925 33
        $str = (string) $str;
926
927 33
        if ($str === '') {
928 5
            return '';
929
        }
930
931
        // fixed ISO <-> UTF-8 Errors
932 33
        $str = self::fix_simple_utf8($str);
933
934
        // remove all none UTF-8 symbols
935
        // && remove diamond question mark (�)
936
        // && remove remove invisible characters (e.g. "\0")
937
        // && remove BOM
938
        // && normalize whitespace chars (but keep non-breaking-spaces)
939 33
        return self::clean(
940 33
            $str,
941 33
            true,
942 33
            true,
943 33
            false,
944 33
            true,
945 33
            true
946
        );
947
    }
948
949
    /**
950
     * Accepts a string or a array of strings and returns an array of Unicode code points.
951
     *
952
     * INFO: opposite to UTF8::string()
953
     *
954
     * EXAMPLE: <code>
955
     * UTF8::codepoints('κöñ'); // array(954, 246, 241)
956
     * // ... OR ...
957
     * UTF8::codepoints('κöñ', true); // array('U+03ba', 'U+00f6', 'U+00f1')
958
     * </code>
959
     *
960
     * @param string|string[] $arg         <p>A UTF-8 encoded string or an array of such strings.</p>
961
     * @param bool            $use_u_style <p>If True, will return code points in U+xxxx format,
962
     *                                     default, code points will be returned as integers.</p>
963
     *
964
     * @psalm-pure
965
     *
966
     * @return int[]|string[]
967
     *                        <p>
968
     *                        The array of code points:<br>
969
     *                        int[] for $u_style === false<br>
970
     *                        string[] for $u_style === true<br>
971
     *                        </p>
972
     */
973 12
    public static function codepoints($arg, bool $use_u_style = false): array
974
    {
975 12
        if (\is_string($arg)) {
976 12
            $arg = self::str_split($arg);
977
        }
978
979
        /**
980
         * @psalm-suppress DocblockTypeContradiction
981
         */
982 12
        if (!\is_array($arg)) {
0 ignored issues
show
introduced by
The condition is_array($arg) is always true.
Loading history...
983 4
            return [];
984
        }
985
986 12
        if ($arg === []) {
987 7
            return [];
988
        }
989
990 11
        $arg = \array_map(
991
            [
992 11
                self::class,
993
                'ord',
994
            ],
995 11
            $arg
996
        );
997
998 11
        if ($use_u_style) {
999 2
            $arg = \array_map(
1000
                [
1001 2
                    self::class,
1002
                    'int_to_hex',
1003
                ],
1004 2
                $arg
1005
            );
1006
        }
1007
1008 11
        return $arg;
1009
    }
1010
1011
    /**
1012
     * Trims the string and replaces consecutive whitespace characters with a
1013
     * single space. This includes tabs and newline characters, as well as
1014
     * multibyte whitespace such as the thin space and ideographic space.
1015
     *
1016
     * @param string $str <p>The input string.</p>
1017
     *
1018
     * @psalm-pure
1019
     *
1020
     * @return string
1021
     *                <p>A string with trimmed $str and condensed whitespace.</p>
1022
     */
1023 13
    public static function collapse_whitespace(string $str): string
1024
    {
1025 13
        if (self::$SUPPORT['mbstring'] === true) {
1026 13
            return \trim((string) \mb_ereg_replace('[[:space:]]+', ' ', $str));
1027
        }
1028
1029
        return \trim(self::regex_replace($str, '[[:space:]]+', ' '));
1030
    }
1031
1032
    /**
1033
     * Returns count of characters used in a string.
1034
     *
1035
     * EXAMPLE: <code>UTF8::count_chars('κaκbκc'); // array('κ' => 3, 'a' => 1, 'b' => 1, 'c' => 1)</code>
1036
     *
1037
     * @param string $str                     <p>The input string.</p>
1038
     * @param bool   $clean_utf8              [optional] <p>Remove non UTF-8 chars from the string.</p>
1039
     * @param bool   $try_to_use_mb_functions [optional] <p>Set to false, if you don't want to use
1040
     *
1041
     * @psalm-pure
1042
     *
1043
     * @return int[]
1044
     *               <p>An associative array of Character as keys and
1045
     *               their count as values.</p>
1046
     */
1047 19
    public static function count_chars(
1048
        string $str,
1049
        bool $clean_utf8 = false,
1050
        bool $try_to_use_mb_functions = true
1051
    ): array {
1052 19
        return \array_count_values(
1053 19
            self::str_split(
1054 19
                $str,
1055 19
                1,
1056
                $clean_utf8,
1057
                $try_to_use_mb_functions
1058
            )
1059
        );
1060
    }
1061
1062
    /**
1063
     * Create a valid CSS identifier for e.g. "class"- or "id"-attributes.
1064
     *
1065
     * EXAMPLE: <code>UTF8::css_identifier('123foo/bar!!!'); // _23foo-bar</code>
1066
     *
1067
     * copy&past from https://github.com/drupal/core/blob/8.8.x/lib/Drupal/Component/Utility/Html.php#L95
1068
     *
1069
     * @param string   $str        <p>INFO: if no identifier is given e.g. " " or "", we will create a unique string automatically</p>
1070
     * @param string[] $filter
1071
     * @param bool     $strip_tags
1072
     * @param bool     $strtolower
1073
     *
1074
     * @psalm-pure
1075
     *
1076
     * @return string
1077
     *
1078
     * @phpstan-param array<string,string> $filter
1079
     */
1080 1
    public static function css_identifier(
1081
        string $str = '',
1082
        array $filter = [
1083
            ' ' => '-',
1084
            '/' => '-',
1085
            '[' => '',
1086
            ']' => '',
1087
        ],
1088
        bool $strip_tags = false,
1089
        bool $strtolower = true
1090
    ): string {
1091
        // We could also use strtr() here but its much slower than str_replace(). In
1092
        // order to keep '__' to stay '__' we first replace it with a different
1093
        // placeholder after checking that it is not defined as a filter.
1094 1
        $double_underscore_replacements = 0;
1095
1096
        // Fallback ...
1097 1
        if (\trim($str) === '') {
1098 1
            $str = \uniqid('auto-generated-css-class', true);
1099
        } else {
1100 1
            $str = self::clean($str);
1101
        }
1102
1103 1
        if ($strip_tags) {
1104
            $str = \strip_tags($str);
1105
        }
1106
1107 1
        if ($strtolower) {
1108 1
            $str = \strtolower($str);
1109
        }
1110
1111 1
        if (!isset($filter['__'])) {
1112 1
            $str = \str_replace('__', '##', $str, $double_underscore_replacements);
1113
        }
1114
1115 1
        $str = \str_replace(\array_keys($filter), \array_values($filter), $str);
1116
        // Replace temporary placeholder '##' with '__' only if the original
1117
        // $identifier contained '__'.
1118 1
        if ($double_underscore_replacements > 0) {
1119
            $str = \str_replace('##', '__', $str);
1120
        }
1121
1122
        // Valid characters in a CSS identifier are:
1123
        // - the hyphen (U+002D)
1124
        // - a-z (U+0030 - U+0039)
1125
        // - A-Z (U+0041 - U+005A)
1126
        // - the underscore (U+005F)
1127
        // - 0-9 (U+0061 - U+007A)
1128
        // - ISO 10646 characters U+00A1 and higher
1129
        // We strip out any character not in the above list.
1130 1
        $str = (string) \preg_replace('/[^\x{002D}\x{0030}-\x{0039}\x{0041}-\x{005A}\x{005F}\x{0061}-\x{007A}\x{00A1}-\x{FFFF}]/u', '', $str);
1131
        // Identifiers cannot start with a digit, two hyphens, or a hyphen followed by a digit.
1132 1
        $str = (string) \preg_replace(['/^[0-9]/', '/^(-[0-9])|^(--)/'], ['_', '__'], $str);
1133
1134 1
        return \trim($str, '-');
1135
    }
1136
1137
    /**
1138
     * Remove css media-queries.
1139
     *
1140
     * @param string $str
1141
     *
1142
     * @psalm-pure
1143
     *
1144
     * @return string
1145
     */
1146 1
    public static function css_stripe_media_queries(string $str): string
1147
    {
1148 1
        return (string) \preg_replace(
1149 1
            '#@media\\s+(?:only\\s)?(?:[\\s{(]|screen|all)\\s?[^{]+{.*}\\s*}\\s*#isumU',
1150 1
            '',
1151 1
            $str
1152
        );
1153
    }
1154
1155
    /**
1156
     * Checks whether ctype is available on the server.
1157
     *
1158
     * @psalm-pure
1159
     *
1160
     * @return bool
1161
     *              <p><strong>true</strong> if available, <strong>false</strong> otherwise</p>
1162
     *
1163
     * @internal <p>Please do not use it anymore, we will make is private in next major version.</p>
1164
     */
1165
    public static function ctype_loaded(): bool
1166
    {
1167
        return \extension_loaded('ctype');
1168
    }
1169
1170
    /**
1171
     * Converts an int value into a UTF-8 character.
1172
     *
1173
     * INFO: opposite to UTF8::string()
1174
     *
1175
     * EXAMPLE: <code>UTF8::decimal_to_chr(931); // 'Σ'</code>
1176
     *
1177
     * @param int|string $int
1178
     *
1179
     * @phpstan-param int|numeric-string $int
1180
     *
1181
     * @psalm-pure
1182
     *
1183
     * @return string
1184
     */
1185 20
    public static function decimal_to_chr($int): string
1186
    {
1187
        // We cannot use html_entity_decode() here, as it will not return
1188
        // characters for many values < 160.
1189 20
        return mb_convert_encoding('&#' . $int . ';', 'UTF-8', 'HTML-ENTITIES');
0 ignored issues
show
Bug Best Practice introduced by
The expression return mb_convert_encodi...TF-8', 'HTML-ENTITIES') could return the type array which is incompatible with the type-hinted return string. Consider adding an additional type-check to rule them out.
Loading history...
1190
    }
1191
1192
    /**
1193
     * Decodes a MIME header field
1194
     *
1195
     * @param string $str
1196
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
1197
     *
1198
     * @psalm-pure
1199
     *
1200
     * @return false|string
1201
     *                      <p>A decoded MIME field on success,
1202
     *                      or false if an error occurs during the decoding.</p>
1203
     */
1204 2
    public static function decode_mimeheader($str, string $encoding = 'UTF-8')
1205
    {
1206 2
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
1207 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
1208
        }
1209
1210
        // always fallback via symfony polyfill
1211 2
        return \iconv_mime_decode($str, \ICONV_MIME_DECODE_CONTINUE_ON_ERROR, $encoding);
1212
    }
1213
1214
    /**
1215
     * Convert any two-letter country code (ISO 3166-1) to the corresponding Emoji.
1216
     *
1217
     * @see https://en.wikipedia.org/wiki/ISO_3166-1
1218
     *
1219
     * @param string $country_code_iso_3166_1 <p>e.g. DE</p>
1220
     *
1221
     * @return string
1222
     *                <p>Emoji or empty string on error.</p>
1223
     */
1224 1
    public static function emoji_from_country_code(string $country_code_iso_3166_1): string
1225
    {
1226 1
        if ($country_code_iso_3166_1 === '') {
1227 1
            return '';
1228
        }
1229
1230 1
        if (self::strlen($country_code_iso_3166_1) !== 2) {
1231 1
            return '';
1232
        }
1233
1234 1
        $country_code_iso_3166_1 = \strtoupper($country_code_iso_3166_1);
1235
1236 1
        $flagOffset = 0x1F1E6;
1237 1
        $asciiOffset = 0x41;
1238
1239 1
        return (self::chr((self::ord($country_code_iso_3166_1[0]) - $asciiOffset + $flagOffset)) ?? '') .
1240 1
               (self::chr((self::ord($country_code_iso_3166_1[1]) - $asciiOffset + $flagOffset)) ?? '');
1241
    }
1242
1243
    /**
1244
     * Decodes a string which was encoded by "UTF8::emoji_encode()".
1245
     *
1246
     * INFO: opposite to UTF8::emoji_encode()
1247
     *
1248
     * EXAMPLE: <code>
1249
     * UTF8::emoji_decode('foo CHARACTER_OGRE', false); // 'foo 👹'
1250
     * //
1251
     * UTF8::emoji_decode('foo _-_PORTABLE_UTF8_-_308095726_-_627590803_-_8FTU_ELBATROP_-_', true); // 'foo 👹'
1252
     * </code>
1253
     *
1254
     * @param string $str                            <p>The input string.</p>
1255
     * @param bool   $use_reversible_string_mappings [optional] <p>
1256
     *                                               When <b>TRUE</b>, we se a reversible string mapping
1257
     *                                               between "emoji_encode" and "emoji_decode".</p>
1258
     *
1259
     * @psalm-pure
1260
     *
1261
     * @return string
1262
     */
1263 9
    public static function emoji_decode(
1264
        string $str,
1265
        bool $use_reversible_string_mappings = false
1266
    ): string {
1267 9
        if (self::$EMOJI_KEYS_CACHE === null) {
1268
            /** @phpstan-ignore-next-line - we need to load the data first */
1269
            self::initEmojiData();
1270
        }
1271
1272 9
        if ($use_reversible_string_mappings) {
1273 9
            return (string) \str_replace(
1274 9
                (array) self::$EMOJI_KEYS_REVERSIBLE_CACHE,
1275 9
                (array) self::$EMOJI_VALUES_CACHE,
1276 9
                $str
1277
            );
1278
        }
1279
1280 1
        return (string) \str_replace(
1281 1
            (array) self::$EMOJI_KEYS_CACHE,
1282 1
            (array) self::$EMOJI_VALUES_CACHE,
1283 1
            $str
1284
        );
1285
    }
1286
1287
    /**
1288
     * Encode a string with emoji chars into a non-emoji string.
1289
     *
1290
     * INFO: opposite to UTF8::emoji_decode()
1291
     *
1292
     * EXAMPLE: <code>
1293
     * UTF8::emoji_encode('foo 👹', false)); // 'foo CHARACTER_OGRE'
1294
     * //
1295
     * UTF8::emoji_encode('foo 👹', true)); // 'foo _-_PORTABLE_UTF8_-_308095726_-_627590803_-_8FTU_ELBATROP_-_'
1296
     * </code>
1297
     *
1298
     * @param string $str                            <p>The input string</p>
1299
     * @param bool   $use_reversible_string_mappings [optional] <p>
1300
     *                                               when <b>TRUE</b>, we use a reversible string mapping
1301
     *                                               between "emoji_encode" and "emoji_decode"</p>
1302
     *
1303
     * @psalm-pure
1304
     *
1305
     * @return string
1306
     */
1307 12
    public static function emoji_encode(
1308
        string $str,
1309
        bool $use_reversible_string_mappings = false
1310
    ): string {
1311 12
        if (self::$EMOJI_KEYS_CACHE === null) {
1312
            /** @phpstan-ignore-next-line - we need to load the data first */
1313 1
            self::initEmojiData();
1314
        }
1315
1316 12
        if ($use_reversible_string_mappings) {
1317 9
            return (string) \str_replace(
1318 9
                (array) self::$EMOJI_VALUES_CACHE,
1319 9
                (array) self::$EMOJI_KEYS_REVERSIBLE_CACHE,
1320 9
                $str
1321
            );
1322
        }
1323
1324 4
        return (string) \str_replace(
1325 4
            (array) self::$EMOJI_VALUES_CACHE,
1326 4
            (array) self::$EMOJI_KEYS_CACHE,
1327 4
            $str
1328
        );
1329
    }
1330
1331
    /**
1332
     * Encode a string with a new charset-encoding.
1333
     *
1334
     * INFO:  This function will also try to fix broken / double encoding,
1335
     *        so you can call this function also on a UTF-8 string and you don't mess up the string.
1336
     *
1337
     * EXAMPLE: <code>
1338
     * UTF8::encode('ISO-8859-1', '-ABC-中文空白-'); // '-ABC-????-'
1339
     * //
1340
     * UTF8::encode('UTF-8', '-ABC-中文空白-'); // '-ABC-中文空白-'
1341
     * //
1342
     * UTF8::encode('HTML', '-ABC-中文空白-'); // '-ABC-&#20013;&#25991;&#31354;&#30333;-'
1343
     * //
1344
     * UTF8::encode('BASE64', '-ABC-中文空白-'); // 'LUFCQy3kuK3mlofnqbrnmb0t'
1345
     * </code>
1346
     *
1347
     * @param string $to_encoding                   <p>e.g. 'UTF-16', 'UTF-8', 'ISO-8859-1', etc.</p>
1348
     * @param string $str                           <p>The input string</p>
1349
     * @param bool   $auto_detect_the_from_encoding [optional] <p>Force the new encoding (we try to fix broken / double
1350
     *                                              encoding for UTF-8)<br> otherwise we auto-detect the current
1351
     *                                              string-encoding</p>
1352
     * @param string $from_encoding                 [optional] <p>e.g. 'UTF-16', 'UTF-8', 'ISO-8859-1', etc.<br>
1353
     *                                              A empty string will trigger the autodetect anyway.</p>
1354
     *
1355
     * @psalm-pure
1356
     *
1357
     * @return string
1358
     *
1359
     * @psalm-suppress InvalidReturnStatement
1360
     */
1361 28
    public static function encode(
1362
        string $to_encoding,
1363
        string $str,
1364
        bool $auto_detect_the_from_encoding = true,
1365
        string $from_encoding = ''
1366
    ): string {
1367 28
        if ($str === '' || $to_encoding === '') {
1368 13
            return $str;
1369
        }
1370
1371 28
        if ($to_encoding !== 'UTF-8' && $to_encoding !== 'CP850') {
1372 7
            $to_encoding = self::normalize_encoding($to_encoding, 'UTF-8');
1373
        }
1374
1375 28
        if ($from_encoding && $from_encoding !== 'UTF-8' && $from_encoding !== 'CP850') {
1376 2
            $from_encoding = self::normalize_encoding($from_encoding);
1377
        }
1378
1379
        if (
1380 28
            $to_encoding
1381
            &&
1382 28
            $from_encoding
1383
            &&
1384 28
            $from_encoding === $to_encoding
1385
        ) {
1386
            return $str;
1387
        }
1388
1389 28
        if ($to_encoding === 'JSON') {
1390 1
            $return = self::json_encode($str);
1391 1
            if ($return === false) {
1392
                throw new \InvalidArgumentException('The input string [' . $str . '] can not be used for json_encode().');
1393
            }
1394
1395 1
            return $return;
1396
        }
1397 28
        if ($from_encoding === 'JSON') {
1398 1
            $str = self::json_decode($str);
1399 1
            $from_encoding = '';
1400
        }
1401
1402 28
        if ($to_encoding === 'BASE64') {
1403 2
            return \base64_encode($str);
1404
        }
1405 28
        if ($from_encoding === 'BASE64') {
1406 2
            $str = \base64_decode($str, true);
1407 2
            $from_encoding = '';
1408
        }
1409
1410 28
        if ($to_encoding === 'HTML-ENTITIES') {
1411 2
            return self::html_encode($str, true);
1412
        }
1413 28
        if ($from_encoding === 'HTML-ENTITIES') {
1414 2
            $str = self::html_entity_decode($str, \ENT_COMPAT);
1415 2
            $from_encoding = '';
1416
        }
1417
1418 28
        $from_encoding_auto_detected = false;
1419
        if (
1420 28
            $auto_detect_the_from_encoding
1421
            ||
1422 28
            !$from_encoding
1423
        ) {
1424 28
            $from_encoding_auto_detected = self::str_detect_encoding($str);
1425
        }
1426
1427
        // DEBUG
1428
        //var_dump($to_encoding, $from_encoding, $from_encoding_auto_detected, $str, "\n\n");
1429
1430 28
        if ($from_encoding_auto_detected !== false) {
1431 25
            $from_encoding = $from_encoding_auto_detected;
1432 6
        } elseif ($auto_detect_the_from_encoding) {
1433
            // fallback for the "autodetect"-mode
1434 6
            return self::to_utf8($str);
1435
        }
1436
1437
        if (
1438 25
            !$from_encoding
1439
            ||
1440 25
            $from_encoding === $to_encoding
1441
        ) {
1442 15
            return $str;
1443
        }
1444
1445
        if (
1446 20
            $to_encoding === 'UTF-8'
1447
            &&
1448
            (
1449 18
                $from_encoding === 'WINDOWS-1252'
1450
                ||
1451 20
                $from_encoding === 'ISO-8859-1'
1452
            )
1453
        ) {
1454 14
            return self::to_utf8($str);
1455
        }
1456
1457
        if (
1458 12
            $to_encoding === 'ISO-8859-1'
1459
            &&
1460
            (
1461 6
                $from_encoding === 'WINDOWS-1252'
1462
                ||
1463 12
                $from_encoding === 'UTF-8'
1464
            )
1465
        ) {
1466 6
            return self::to_iso8859($str);
1467
        }
1468
1469
        if (
1470 10
            $to_encoding !== 'UTF-8'
1471
            &&
1472 10
            $to_encoding !== 'ISO-8859-1'
1473
            &&
1474 10
            $to_encoding !== 'WINDOWS-1252'
1475
            &&
1476 10
            self::$SUPPORT['mbstring'] === false
1477
        ) {
1478
            /**
1479
             * @psalm-suppress ImpureFunctionCall - is is only a warning
1480
             */
1481
            \trigger_error('UTF8::encode() without mbstring cannot handle "' . $to_encoding . '" encoding', \E_USER_WARNING);
1482
        }
1483
1484 10
        if (self::$SUPPORT['mbstring'] === true) {
1485 10
            $str_encoded = \mb_convert_encoding(
1486 10
                $str,
1487 10
                $to_encoding,
1488 10
                $from_encoding
1489
            );
1490
1491 10
            if ($str_encoded) {
1492
                \assert(\is_string($str_encoded));
1493
1494 10
                return $str_encoded;
1495
            }
1496
        }
1497
1498
        /** @noinspection PhpUsageOfSilenceOperatorInspection - Detected an incomplete multibyte character in input string */
1499
        $return = @\iconv($from_encoding, $to_encoding, $str);
1500
        if ($return !== false) {
1501
            return $return;
1502
        }
1503
1504
        return $str;
1505
    }
1506
1507
    /**
1508
     * @param string $str
1509
     * @param string $from_charset      [optional] <p>Set the input charset.</p>
1510
     * @param string $to_charset        [optional] <p>Set the output charset.</p>
1511
     * @param string $transfer_encoding [optional] <p>Set the transfer encoding.</p>
1512
     * @param string $linefeed          [optional] <p>Set the used linefeed.</p>
1513
     * @param int    $indent            [optional] <p>Set the max length indent.</p>
1514
     *
1515
     * @psalm-pure
1516
     *
1517
     * @return false|string
1518
     *                      <p>An encoded MIME field on success,
1519
     *                      or false if an error occurs during the encoding.</p>
1520
     */
1521 1
    public static function encode_mimeheader(
1522
        string $str,
1523
        string $from_charset = 'UTF-8',
1524
        string $to_charset = 'UTF-8',
1525
        string $transfer_encoding = 'Q',
1526
        string $linefeed = "\r\n",
1527
        int $indent = 76
1528
    ) {
1529 1
        if ($from_charset !== 'UTF-8' && $from_charset !== 'CP850') {
1530
            $from_charset = self::normalize_encoding($from_charset, 'UTF-8');
1531
        }
1532
1533 1
        if ($to_charset !== 'UTF-8' && $to_charset !== 'CP850') {
1534 1
            $to_charset = self::normalize_encoding($to_charset, 'UTF-8');
1535
        }
1536
1537
        // always fallback via symfony polyfill
1538 1
        return \iconv_mime_encode(
1539 1
            '',
1540 1
            $str,
1541
            [
1542 1
                'scheme'           => $transfer_encoding,
1543 1
                'line-length'      => $indent,
1544 1
                'input-charset'    => $from_charset,
1545 1
                'output-charset'   => $to_charset,
1546 1
                'line-break-chars' => $linefeed,
1547
            ]
1548
        );
1549
    }
1550
1551
    /**
1552
     * Create an extract from a sentence, so if the search-string was found, it try to centered in the output.
1553
     *
1554
     * @param string   $str                       <p>The input string.</p>
1555
     * @param string   $search                    <p>The searched string.</p>
1556
     * @param int|null $length                    [optional] <p>Default: null === text->length / 2</p>
1557
     * @param string   $replacer_for_skipped_text [optional] <p>Default: …</p>
1558
     * @param string   $encoding                  [optional] <p>Set the charset for e.g. "mb_" function</p>
1559
     *
1560
     * @psalm-pure
1561
     *
1562
     * @return string
1563
     */
1564 1
    public static function extract_text(
1565
        string $str,
1566
        string $search = '',
1567
        int $length = null,
1568
        string $replacer_for_skipped_text = '…',
1569
        string $encoding = 'UTF-8'
1570
    ): string {
1571 1
        if ($str === '') {
1572 1
            return '';
1573
        }
1574
1575 1
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
1576
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
1577
        }
1578
1579 1
        $trim_chars = "\t\r\n -_()!~?=+/*\\,.:;\"'[]{}`&";
1580
1581 1
        if ($length === null) {
1582 1
            $length = (int) \round((int) self::strlen($str, $encoding) / 2);
1583
        }
1584
1585 1
        if ($search === '') {
1586 1
            if ($encoding === 'UTF-8') {
1587 1
                if ($length > 0) {
1588 1
                    $string_length = (int) \mb_strlen($str);
1589 1
                    $end = ($length - 1) > $string_length ? $string_length : ($length - 1);
1590
                } else {
1591 1
                    $end = 0;
1592
                }
1593
1594 1
                $pos = (int) \min(
1595 1
                    \mb_strpos($str, ' ', $end),
1596 1
                    \mb_strpos($str, '.', $end)
1597
                );
1598
            } else {
1599
                if ($length > 0) {
1600
                    $string_length = (int) self::strlen($str, $encoding);
1601
                    $end = ($length - 1) > $string_length ? $string_length : ($length - 1);
1602
                } else {
1603
                    $end = 0;
1604
                }
1605
1606
                $pos = (int) \min(
1607
                    self::strpos($str, ' ', $end, $encoding),
1608
                    self::strpos($str, '.', $end, $encoding)
1609
                );
1610
            }
1611
1612 1
            if ($pos) {
1613 1
                if ($encoding === 'UTF-8') {
1614 1
                    $str_sub = \mb_substr($str, 0, $pos);
1615
                } else {
1616
                    $str_sub = self::substr($str, 0, $pos, $encoding);
1617
                }
1618
1619 1
                if ($str_sub === false) {
1620
                    return '';
1621
                }
1622
1623 1
                return \rtrim($str_sub, $trim_chars) . $replacer_for_skipped_text;
1624
            }
1625
1626
            return $str;
1627
        }
1628
1629 1
        if ($encoding === 'UTF-8') {
1630 1
            $word_position = (int) \mb_stripos($str, $search);
1631 1
            $half_side = (int) ($word_position - $length / 2 + (int) \mb_strlen($search) / 2);
1632
        } else {
1633
            $word_position = (int) self::stripos($str, $search, 0, $encoding);
1634
            $half_side = (int) ($word_position - $length / 2 + (int) self::strlen($search, $encoding) / 2);
1635
        }
1636
1637 1
        $pos_start = 0;
1638 1
        if ($half_side > 0) {
1639 1
            if ($encoding === 'UTF-8') {
1640 1
                $half_text = \mb_substr($str, 0, $half_side);
1641
            } else {
1642
                $half_text = self::substr($str, 0, $half_side, $encoding);
1643
            }
1644 1
            if ($half_text !== false) {
1645 1
                if ($encoding === 'UTF-8') {
1646 1
                    $pos_start = (int) \max(
1647 1
                        \mb_strrpos($half_text, ' '),
1648 1
                        \mb_strrpos($half_text, '.')
1649
                    );
1650
                } else {
1651
                    $pos_start = (int) \max(
1652
                        self::strrpos($half_text, ' ', 0, $encoding),
1653
                        self::strrpos($half_text, '.', 0, $encoding)
1654
                    );
1655
                }
1656
            }
1657
        }
1658
1659 1
        if ($word_position && $half_side > 0) {
1660 1
            $offset = $pos_start + $length - 1;
1661 1
            $real_length = (int) self::strlen($str, $encoding);
1662
1663 1
            if ($offset > $real_length) {
1664
                $offset = $real_length;
1665
            }
1666
1667 1
            if ($encoding === 'UTF-8') {
1668 1
                $pos_end = (int) \min(
1669 1
                    \mb_strpos($str, ' ', $offset),
1670 1
                    \mb_strpos($str, '.', $offset)
1671 1
                ) - $pos_start;
1672
            } else {
1673
                $pos_end = (int) \min(
1674
                    self::strpos($str, ' ', $offset, $encoding),
1675
                    self::strpos($str, '.', $offset, $encoding)
1676
                ) - $pos_start;
1677
            }
1678
1679 1
            if (!$pos_end || $pos_end <= 0) {
1680 1
                if ($encoding === 'UTF-8') {
1681 1
                    $str_sub = \mb_substr($str, $pos_start, (int) \mb_strlen($str));
1682
                } else {
1683
                    $str_sub = self::substr($str, $pos_start, (int) self::strlen($str, $encoding), $encoding);
1684
                }
1685 1
                if ($str_sub !== false) {
1686 1
                    $extract = $replacer_for_skipped_text . \ltrim($str_sub, $trim_chars);
1687
                } else {
1688 1
                    $extract = '';
1689
                }
1690
            } else {
1691 1
                if ($encoding === 'UTF-8') {
1692 1
                    $str_sub = \mb_substr($str, $pos_start, $pos_end);
1693
                } else {
1694
                    $str_sub = self::substr($str, $pos_start, $pos_end, $encoding);
1695
                }
1696 1
                if ($str_sub !== false) {
1697 1
                    $extract = $replacer_for_skipped_text . \trim($str_sub, $trim_chars) . $replacer_for_skipped_text;
1698
                } else {
1699 1
                    $extract = '';
1700
                }
1701
            }
1702
        } else {
1703 1
            $offset = $length - 1;
1704 1
            $true_length = (int) self::strlen($str, $encoding);
1705
1706 1
            if ($offset > $true_length) {
1707
                $offset = $true_length;
1708
            }
1709
1710 1
            if ($encoding === 'UTF-8') {
1711 1
                $pos_end = (int) \min(
1712 1
                    \mb_strpos($str, ' ', $offset),
1713 1
                    \mb_strpos($str, '.', $offset)
1714
                );
1715
            } else {
1716
                $pos_end = (int) \min(
1717
                    self::strpos($str, ' ', $offset, $encoding),
1718
                    self::strpos($str, '.', $offset, $encoding)
1719
                );
1720
            }
1721
1722 1
            if ($pos_end) {
1723 1
                if ($encoding === 'UTF-8') {
1724 1
                    $str_sub = \mb_substr($str, 0, $pos_end);
1725
                } else {
1726
                    $str_sub = self::substr($str, 0, $pos_end, $encoding);
1727
                }
1728 1
                if ($str_sub !== false) {
1729 1
                    $extract = \rtrim($str_sub, $trim_chars) . $replacer_for_skipped_text;
1730
                } else {
1731 1
                    $extract = '';
1732
                }
1733
            } else {
1734 1
                $extract = $str;
1735
            }
1736
        }
1737
1738 1
        return $extract;
1739
    }
1740
1741
    /**
1742
     * Reads entire file into a string.
1743
     *
1744
     * EXAMPLE: <code>UTF8::file_get_contents('utf16le.txt'); // ...</code>
1745
     *
1746
     * WARNING: Do not use UTF-8 Option ($convert_to_utf8) for binary files (e.g.: images) !!!
1747
     *
1748
     * @see http://php.net/manual/en/function.file-get-contents.php
1749
     *
1750
     * @param string        $filename         <p>
1751
     *                                        Name of the file to read.
1752
     *                                        </p>
1753
     * @param bool          $use_include_path [optional] <p>
1754
     *                                        Prior to PHP 5, this parameter is called
1755
     *                                        use_include_path and is a bool.
1756
     *                                        As of PHP 5 the FILE_USE_INCLUDE_PATH can be used
1757
     *                                        to trigger include path
1758
     *                                        search.
1759
     *                                        </p>
1760
     * @param resource|null $context          [optional] <p>
1761
     *                                        A valid context resource created with
1762
     *                                        stream_context_create. If you don't need to use a
1763
     *                                        custom context, you can skip this parameter by &null;.
1764
     *                                        </p>
1765
     * @param int|null      $offset           [optional] <p>
1766
     *                                        The offset where the reading starts.
1767
     *                                        </p>
1768
     * @param int|null      $max_length       [optional] <p>
1769
     *                                        Maximum length of data read. The default is to read until end
1770
     *                                        of file is reached.
1771
     *                                        </p>
1772
     * @param int           $timeout          <p>The time in seconds for the timeout.</p>
1773
     * @param bool          $convert_to_utf8  <strong>WARNING!!!</strong> <p>Maybe you can't use this option for
1774
     *                                        some files, because they used non default utf-8 chars. Binary files
1775
     *                                        like images or pdf will not be converted.</p>
1776
     * @param string        $from_encoding    [optional] <p>e.g. 'UTF-16', 'UTF-8', 'ISO-8859-1', etc.<br>
1777
     *                                        A empty string will trigger the autodetect anyway.</p>
1778
     *
1779
     * @psalm-pure
1780
     *
1781
     * @return false|string
1782
     *                      <p>The function returns the read data as string or <b>false</b> on failure.</p>
1783
     */
1784 12
    public static function file_get_contents(
1785
        string $filename,
1786
        bool $use_include_path = false,
1787
        $context = null,
1788
        int $offset = null,
1789
        int $max_length = null,
1790
        int $timeout = 10,
1791
        bool $convert_to_utf8 = true,
1792
        string $from_encoding = ''
1793
    ) {
1794
        // init
1795
        /** @noinspection CallableParameterUseCaseInTypeContextInspection - is ok here */
1796 12
        $filename = Bootup::filter_sanitize_string_polyfill($filename);
1797 12
        if ($filename === false) {
1798
            return false;
1799
        }
1800
1801 12
        if ($timeout && $context === null) {
1802 9
            $context = \stream_context_create(
1803
                [
1804
                    'http' => [
1805 9
                        'timeout' => $timeout,
1806
                    ],
1807
                ]
1808
            );
1809
        }
1810
1811 12
        if ($offset === null) {
1812 12
            $offset = 0;
1813
        }
1814
1815 12
        if (\is_int($max_length)) {
1816 2
            $data = \file_get_contents($filename, $use_include_path, $context, $offset, $max_length);
1817
        } else {
1818 12
            $data = \file_get_contents($filename, $use_include_path, $context, $offset);
1819
        }
1820
1821
        // return false on error
1822 12
        if ($data === false) {
1823
            return false;
1824
        }
1825
1826 12
        if ($convert_to_utf8) {
1827
            if (
1828 12
                !self::is_binary($data, true)
1829
                ||
1830 9
                self::is_utf16($data, false) !== false
1831
                ||
1832 12
                self::is_utf32($data, false) !== false
1833
            ) {
1834 9
                $data = self::encode('UTF-8', $data, false, $from_encoding);
1835 9
                $data = self::cleanup($data);
1836
            }
1837
        }
1838
1839 12
        return $data;
1840
    }
1841
1842
    /**
1843
     * Checks if a file starts with BOM (Byte Order Mark) character.
1844
     *
1845
     * EXAMPLE: <code>UTF8::file_has_bom('utf8_with_bom.txt'); // true</code>
1846
     *
1847
     * @param string $file_path <p>Path to a valid file.</p>
1848
     *
1849
     * @throws \RuntimeException if file_get_contents() returned false
1850
     *
1851
     * @return bool
1852
     *              <p><strong>true</strong> if the file has BOM at the start, <strong>false</strong> otherwise</p>
1853
     *
1854
     * @psalm-pure
1855
     */
1856 2
    public static function file_has_bom(string $file_path): bool
1857
    {
1858 2
        $file_content = \file_get_contents($file_path);
1859 2
        if ($file_content === false) {
1860
            throw new \RuntimeException('file_get_contents() returned false for:' . $file_path);
1861
        }
1862
1863 2
        return self::string_has_bom($file_content);
1864
    }
1865
1866
    /**
1867
     * Normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1868
     *
1869
     * EXAMPLE: <code>UTF8::filter(array("\xE9", 'à', 'a')); // array('é', 'à', 'a')</code>
1870
     *
1871
     * @param array|object|string $var
1872
     * @param int                 $normalization_form
1873
     * @param string              $leading_combining
1874
     *
1875
     * @psalm-pure
1876
     *
1877
     * @return mixed
1878
     *
1879
     * @template TFilter
1880
     * @phpstan-param TFilter $var
1881
     * @phpstan-return TFilter
1882
     */
1883 64
    public static function filter(
1884
        $var,
1885
        int $normalization_form = \Normalizer::NFC,
1886
        string $leading_combining = '◌'
1887
    ) {
1888 64
        switch (\gettype($var)) {
1889 64
            case 'object':
1890 64
            case 'array':
1891 6
                foreach ($var as &$v) {
1892 6
                    $v = self::filter($v, $normalization_form, $leading_combining);
1893
                }
1894 6
                unset($v);
1895
1896 6
                break;
1897 64
            case 'string':
1898
1899 62
                if (\strpos($var, "\r") !== false) {
1900 2
                    $var = self::normalize_line_ending($var);
1901
                }
1902
1903 62
                if (!ASCII::is_ascii($var)) {
1904 32
                    if (\Normalizer::isNormalized($var, $normalization_form)) {
1905 26
                        $n = '-';
1906
                    } else {
1907 12
                        $n = \Normalizer::normalize($var, $normalization_form);
1908
1909 12
                        if ($n && isset($n[0])) {
1910 6
                            $var = $n;
1911
                        } else {
1912 8
                            $var = self::encode('UTF-8', $var);
1913
                        }
1914
                    }
1915
1916
                    \assert(\is_string($var));
1917
                    if (
1918 32
                        $n
1919
                        &&
1920 32
                        $var[0] >= "\x80"
1921
                        &&
1922 32
                        isset($n[0], $leading_combining[0])
1923
                        &&
1924 32
                        \preg_match('/^\\p{Mn}/u', $var)
1925
                    ) {
1926
                        // Prevent leading combining chars
1927
                        // for NFC-safe concatenations.
1928 2
                        $var = $leading_combining . $var;
1929
                    }
1930
                }
1931
1932 62
                break;
1933
            default:
1934
                // nothing
1935
        }
1936
1937
        /** @noinspection PhpSillyAssignmentInspection */
1938
        /** @phpstan-var TFilter $var */
1939 64
        $var = $var;
1940
1941 64
        return $var;
1942
    }
1943
1944
    /**
1945
     * "filter_input()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1946
     *
1947
     * Gets a specific external variable by name and optionally filters it.
1948
     *
1949
     * EXAMPLE: <code>
1950
     * // _GET['foo'] = 'bar';
1951
     * UTF8::filter_input(INPUT_GET, 'foo', FILTER_UNSAFE_RAW)); // 'bar'
1952
     * </code>
1953
     *
1954
     * @see http://php.net/manual/en/function.filter-input.php
1955
     *
1956
     * @param int            $type          <p>
1957
     *                                      One of <b>INPUT_GET</b>, <b>INPUT_POST</b>,
1958
     *                                      <b>INPUT_COOKIE</b>, <b>INPUT_SERVER</b>, or
1959
     *                                      <b>INPUT_ENV</b>.
1960
     *                                      </p>
1961
     * @param string         $variable_name <p>
1962
     *                                      Name of a variable to get.
1963
     *                                      </p>
1964
     * @param int            $filter        [optional] <p>
1965
     *                                      The ID of the filter to apply. The
1966
     *                                      manual page lists the available filters.
1967
     *                                      </p>
1968
     * @param int|int[]|null $options       [optional] <p>
1969
     *                                      Associative array of options or bitwise disjunction of flags. If filter
1970
     *                                      accepts options, flags can be provided in "flags" field of array.
1971
     *                                      </p>
1972
     *
1973
     * @psalm-pure
1974
     *
1975
     * @return mixed
1976
     *               <p>
1977
     *               Value of the requested variable on success, <b>FALSE</b> if the filter fails, or <b>NULL</b> if the
1978
     *               <i>variable_name</i> variable is not set. If the flag <b>FILTER_NULL_ON_FAILURE</b> is used, it
1979
     *               returns <b>FALSE</b> if the variable is not set and <b>NULL</b> if the filter fails.
1980
     *               </p>
1981
     */
1982 1
    public static function filter_input(
1983
        int $type,
1984
        string $variable_name,
1985
        int $filter = \FILTER_DEFAULT,
1986
        $options = null
1987
    ) {
1988
        /**
1989
         * @psalm-suppress ImpureFunctionCall - we use func_num_args only for args count matching here
1990
         */
1991 1
        if ($options === null || \func_num_args() < 4) {
1992 1
            $var = \filter_input($type, $variable_name, $filter);
1993
        } else {
1994
            $var = \filter_input($type, $variable_name, $filter, $options);
1995
        }
1996
1997 1
        return self::filter($var);
1998
    }
1999
2000
    /**
2001
     * "filter_input_array()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
2002
     *
2003
     * Gets external variables and optionally filters them.
2004
     *
2005
     * EXAMPLE: <code>
2006
     * // _GET['foo'] = 'bar';
2007
     * UTF8::filter_input_array(INPUT_GET, array('foo' => 'FILTER_UNSAFE_RAW')); // array('bar')
2008
     * </code>
2009
     *
2010
     * @see http://php.net/manual/en/function.filter-input-array.php
2011
     *
2012
     * @param int        $type       <p>
2013
     *                               One of <b>INPUT_GET</b>, <b>INPUT_POST</b>,
2014
     *                               <b>INPUT_COOKIE</b>, <b>INPUT_SERVER</b>, or
2015
     *                               <b>INPUT_ENV</b>.
2016
     *                               </p>
2017
     * @param array|null $definition [optional] <p>
2018
     *                               An array defining the arguments. A valid key is a string
2019
     *                               containing a variable name and a valid value is either a filter type, or an array
2020
     *                               optionally specifying the filter, flags and options. If the value is an
2021
     *                               array, valid keys are filter which specifies the
2022
     *                               filter type,
2023
     *                               flags which specifies any flags that apply to the
2024
     *                               filter, and options which specifies any options that
2025
     *                               apply to the filter. See the example below for a better understanding.
2026
     *                               </p>
2027
     *                               <p>
2028
     *                               This parameter can be also an integer holding a filter constant. Then all values in the
2029
     *                               input array are filtered by this filter.
2030
     *                               </p>
2031
     * @param bool       $add_empty  [optional] <p>
2032
     *                               Add missing keys as <b>NULL</b> to the return value.
2033
     *                               </p>
2034
     *
2035
     * @psalm-pure
2036
     *
2037
     * @return mixed
2038
     *               <p>
2039
     *               An array containing the values of the requested variables on success, or <b>FALSE</b> on failure.
2040
     *               An array value will be <b>FALSE</b> if the filter fails, or <b>NULL</b> if the variable is not
2041
     *               set. Or if the flag <b>FILTER_NULL_ON_FAILURE</b> is used, it returns <b>FALSE</b> if the variable
2042
     *               is not set and <b>NULL</b> if the filter fails.
2043
     *               </p>
2044
     */
2045 1
    public static function filter_input_array(
2046
        int $type,
2047
        $definition = null,
2048
        bool $add_empty = true
2049
    ) {
2050
        /**
2051
         * @psalm-suppress ImpureFunctionCall - we use func_num_args only for args count matching here
2052
         */
2053 1
        if ($definition === null || \func_num_args() < 2) {
2054
            $a = \filter_input_array($type);
2055
        } else {
2056 1
            $a = \filter_input_array($type, $definition, $add_empty);
2057
        }
2058
2059 1
        return self::filter($a);
2060
    }
2061
2062
    /**
2063
     * "filter_var()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
2064
     *
2065
     * Filters a variable with a specified filter.
2066
     *
2067
     * EXAMPLE: <code>UTF8::filter_var('-ABC-中文空白-', FILTER_VALIDATE_URL); // false</code>
2068
     *
2069
     * @see http://php.net/manual/en/function.filter-var.php
2070
     *
2071
     * @param float|int|string|null $variable <p>
2072
     *                                        Value to filter.
2073
     *                                        </p>
2074
     * @param int                   $filter   [optional] <p>
2075
     *                                        The ID of the filter to apply. The
2076
     *                                        manual page lists the available filters.
2077
     *                                        </p>
2078
     * @param int|int[]|null        $options  [optional] <p>
2079
     *                                        Associative array of options or bitwise disjunction of flags. If filter
2080
     *                                        accepts options, flags can be provided in "flags" field of array. For
2081
     *                                        the "callback" filter, callable type should be passed. The
2082
     *                                        callback must accept one argument, the value to be filtered, and return
2083
     *                                        the value after filtering/sanitizing it.
2084
     *                                        </p>
2085
     *                                        <p>
2086
     *                                        <code>
2087
     *                                        // for filters that accept options, use this format
2088
     *                                        $options = array(
2089
     *                                        'options' => array(
2090
     *                                        'default' => 3, // value to return if the filter fails
2091
     *                                        // other options here
2092
     *                                        'min_range' => 0
2093
     *                                        ),
2094
     *                                        'flags' => FILTER_FLAG_ALLOW_OCTAL,
2095
     *                                        );
2096
     *                                        $var = filter_var('0755', FILTER_VALIDATE_INT, $options);
2097
     *                                        // for filter that only accept flags, you can pass them directly
2098
     *                                        $var = filter_var('oops', FILTER_VALIDATE_BOOLEAN, FILTER_NULL_ON_FAILURE);
2099
     *                                        // for filter that only accept flags, you can also pass as an array
2100
     *                                        $var = filter_var('oops', FILTER_VALIDATE_BOOLEAN,
2101
     *                                        array('flags' => FILTER_NULL_ON_FAILURE));
2102
     *                                        // callback validate filter
2103
     *                                        function foo($value)
2104
     *                                        {
2105
     *                                        // Expected format: Surname, GivenNames
2106
     *                                        if (strpos($value, ", ") === false) return false;
2107
     *                                        list($surname, $givennames) = explode(", ", $value, 2);
2108
     *                                        $empty = (empty($surname) || empty($givennames));
2109
     *                                        $notstrings = (!is_string($surname) || !is_string($givennames));
2110
     *                                        if ($empty || $notstrings) {
2111
     *                                        return false;
2112
     *                                        } else {
2113
     *                                        return $value;
2114
     *                                        }
2115
     *                                        }
2116
     *                                        $var = filter_var('Doe, Jane Sue', FILTER_CALLBACK, array('options' => 'foo'));
2117
     *                                        </code>
2118
     *                                        </p>
2119
     *
2120
     * @psalm-pure
2121
     *
2122
     * @return mixed
2123
     *               <p>The filtered data, or <b>FALSE</b> if the filter fails.</p>
2124
     */
2125 2
    public static function filter_var(
2126
        $variable,
2127
        int $filter = \FILTER_DEFAULT,
2128
        $options = null
2129
    ) {
2130
        /**
2131
         * @psalm-suppress ImpureFunctionCall - we use func_num_args only for args count matching here
2132
         */
2133 2
        if (\func_num_args() < 3) {
2134 2
            $variable = \filter_var($variable, $filter);
2135
        } else {
2136 2
            $variable = \filter_var($variable, $filter, $options);
0 ignored issues
show
Bug introduced by
It seems like $options can also be of type null; however, parameter $options of filter_var() does only seem to accept array|integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

2136
            $variable = \filter_var($variable, $filter, /** @scrutinizer ignore-type */ $options);
Loading history...
2137
        }
2138
2139 2
        return self::filter($variable);
2140
    }
2141
2142
    /**
2143
     * "filter_var_array()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
2144
     *
2145
     * Gets multiple variables and optionally filters them.
2146
     *
2147
     * EXAMPLE: <code>
2148
     * $filters = [
2149
     *     'name'  => ['filter'  => FILTER_CALLBACK, 'options' => [UTF8::class, 'ucwords']],
2150
     *     'age'   => ['filter'  => FILTER_VALIDATE_INT, 'options' => ['min_range' => 1, 'max_range' => 120]],
2151
     *     'email' => FILTER_VALIDATE_EMAIL,
2152
     * ];
2153
     *
2154
     * $data = [
2155
     *     'name' => 'κόσμε',
2156
     *     'age' => '18',
2157
     *     'email' => '[email protected]'
2158
     * ];
2159
     *
2160
     * UTF8::filter_var_array($data, $filters, true); // ['name' => 'Κόσμε', 'age' => 18, 'email' => '[email protected]']
2161
     * </code>
2162
     *
2163
     * @see http://php.net/manual/en/function.filter-var-array.php
2164
     *
2165
     * @param array<mixed>   $data       <p>
2166
     *                                   An array with string keys containing the data to filter.
2167
     *                                   </p>
2168
     * @param array|int|null $definition [optional] <p>
2169
     *                                   An array defining the arguments. A valid key is a string
2170
     *                                   containing a variable name and a valid value is either a
2171
     *                                   filter type, or an
2172
     *                                   array optionally specifying the filter, flags and options.
2173
     *                                   If the value is an array, valid keys are filter
2174
     *                                   which specifies the filter type,
2175
     *                                   flags which specifies any flags that apply to the
2176
     *                                   filter, and options which specifies any options that
2177
     *                                   apply to the filter. See the example below for a better understanding.
2178
     *                                   </p>
2179
     *                                   <p>
2180
     *                                   This parameter can be also an integer holding a filter constant. Then all values
2181
     *                                   in the input array are filtered by this filter.
2182
     *                                   </p>
2183
     * @param bool           $add_empty  [optional] <p>
2184
     *                                   Add missing keys as <b>NULL</b> to the return value.
2185
     *                                   </p>
2186
     *
2187
     * @psalm-pure
2188
     *
2189
     * @return mixed
2190
     *               <p>
2191
     *               An array containing the values of the requested variables on success, or <b>FALSE</b> on failure.
2192
     *               An array value will be <b>FALSE</b> if the filter fails, or <b>NULL</b> if the variable is not
2193
     *               set.
2194
     *               </p>
2195
     */
2196 2
    public static function filter_var_array(
2197
        array $data,
2198
        $definition = null,
2199
        bool $add_empty = true
2200
    ) {
2201
        /**
2202
         * @psalm-suppress ImpureFunctionCall - we use func_num_args only for args count matching here
2203
         */
2204 2
        if (\func_num_args() < 2) {
2205 2
            $a = \filter_var_array($data);
2206
        } else {
2207 2
            $a = \filter_var_array($data, $definition, $add_empty);
0 ignored issues
show
Bug introduced by
It seems like $definition can also be of type null; however, parameter $options of filter_var_array() does only seem to accept array|integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

2207
            $a = \filter_var_array($data, /** @scrutinizer ignore-type */ $definition, $add_empty);
Loading history...
2208
        }
2209
2210 2
        return self::filter($a);
2211
    }
2212
2213
    /**
2214
     * Checks whether finfo is available on the server.
2215
     *
2216
     * @psalm-pure
2217
     *
2218
     * @return bool
2219
     *              <p><strong>true</strong> if available, <strong>false</strong> otherwise</p>
2220
     *
2221
     * @internal <p>Please do not use it anymore, we will make is private in next major version.</p>
2222
     */
2223
    public static function finfo_loaded(): bool
2224
    {
2225
        return \class_exists('finfo');
2226
    }
2227
2228
    /**
2229
     * Returns the first $n characters of the string.
2230
     *
2231
     * @param string $str      <p>The input string.</p>
2232
     * @param int    $n        <p>Number of characters to retrieve from the start.</p>
2233
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
2234
     *
2235
     * @psalm-pure
2236
     *
2237
     * @return string
2238
     */
2239 13
    public static function first_char(
2240
        string $str,
2241
        int $n = 1,
2242
        string $encoding = 'UTF-8'
2243
    ): string {
2244 13
        if ($str === '' || $n <= 0) {
2245 5
            return '';
2246
        }
2247
2248 8
        if ($encoding === 'UTF-8') {
2249 4
            return (string) \mb_substr($str, 0, $n);
2250
        }
2251
2252 4
        return (string) self::substr($str, 0, $n, $encoding);
2253
    }
2254
2255
    /**
2256
     * Check if the number of Unicode characters isn't greater than the specified integer.
2257
     *
2258
     * EXAMPLE: <code>UTF8::fits_inside('κόσμε', 6); // false</code>
2259
     *
2260
     * @param string $str      the original string to be checked
2261
     * @param int    $box_size the size in number of chars to be checked against string
2262
     *
2263
     * @psalm-pure
2264
     *
2265
     * @return bool
2266
     *              <p><strong>TRUE</strong> if string is less than or equal to $box_size, <strong>FALSE</strong> otherwise.</p>
2267
     */
2268 2
    public static function fits_inside(string $str, int $box_size): bool
2269
    {
2270 2
        return (int) self::strlen($str) <= $box_size;
2271
    }
2272
2273
    /**
2274
     * Try to fix simple broken UTF-8 strings.
2275
     *
2276
     * INFO: Take a look at "UTF8::fix_utf8()" if you need a more advanced fix for broken UTF-8 strings.
2277
     *
2278
     * EXAMPLE: <code>UTF8::fix_simple_utf8('Düsseldorf'); // 'Düsseldorf'</code>
2279
     *
2280
     * If you received an UTF-8 string that was converted from Windows-1252 as it was ISO-8859-1
2281
     * (ignoring Windows-1252 chars from 80 to 9F) use this function to fix it.
2282
     * See: http://en.wikipedia.org/wiki/Windows-1252
2283
     *
2284
     * @param string $str <p>The input string</p>
2285
     *
2286
     * @psalm-pure
2287
     *
2288
     * @return string
2289
     */
2290 46
    public static function fix_simple_utf8(string $str): string
2291
    {
2292 46
        if ($str === '') {
2293 4
            return '';
2294
        }
2295
2296
        /**
2297
         * @psalm-suppress ImpureStaticVariable
2298
         *
2299
         * @var array<mixed>|null
2300
         */
2301 46
        static $BROKEN_UTF8_TO_UTF8_KEYS_CACHE = null;
2302
2303
        /**
2304
         * @psalm-suppress ImpureStaticVariable
2305
         *
2306
         * @var array<mixed>|null
2307
         */
2308 46
        static $BROKEN_UTF8_TO_UTF8_VALUES_CACHE = null;
2309
2310 46
        if ($BROKEN_UTF8_TO_UTF8_KEYS_CACHE === null) {
2311 1
            if (self::$BROKEN_UTF8_FIX === null) {
2312 1
                self::$BROKEN_UTF8_FIX = self::getData('utf8_fix');
2313
            }
2314
2315 1
            $BROKEN_UTF8_TO_UTF8_KEYS_CACHE = \array_keys(self::$BROKEN_UTF8_FIX ?: []);
2316 1
            $BROKEN_UTF8_TO_UTF8_VALUES_CACHE = self::$BROKEN_UTF8_FIX;
2317
        }
2318
2319
        \assert(\is_array($BROKEN_UTF8_TO_UTF8_VALUES_CACHE));
2320
2321 46
        return \str_replace($BROKEN_UTF8_TO_UTF8_KEYS_CACHE, $BROKEN_UTF8_TO_UTF8_VALUES_CACHE, $str);
2322
    }
2323
2324
    /**
2325
     * Fix a double (or multiple) encoded UTF8 string.
2326
     *
2327
     * EXAMPLE: <code>UTF8::fix_utf8('Fédération'); // 'Fédération'</code>
2328
     *
2329
     * @param string|string[] $str you can use a string or an array of strings
2330
     *
2331
     * @psalm-pure
2332
     *
2333
     * @return string|string[]
2334
     *                         <p>Will return the fixed input-"array" or
2335
     *                         the fixed input-"string".</p>
2336
     *
2337
     * @template TFixUtf8
2338
     * @phpstan-param TFixUtf8 $str
2339
     * @phpstan-return TFixUtf8
2340
     */
2341 2
    public static function fix_utf8($str)
2342
    {
2343 2
        if (\is_array($str)) {
2344 2
            foreach ($str as &$v) {
2345 2
                $v = self::fix_utf8($v);
2346
            }
2347 2
            unset($v);
2348
2349
            /**
2350
             * @psalm-suppress InvalidReturnStatement
2351
             */
2352 2
            return $str;
2353
        }
2354
2355 2
        $str = (string) $str;
2356 2
        $last = '';
2357 2
        while ($last !== $str) {
2358 2
            $last = $str;
2359
            /**
2360
             * @psalm-suppress PossiblyInvalidArgument
2361
             */
2362 2
            $str = self::to_utf8(
2363 2
                self::utf8_decode($str, true)
2364
            );
2365
        }
2366
2367
        /**
2368
         * @psalm-suppress InvalidReturnStatement
2369
         */
2370 2
        return $str;
2371
    }
2372
2373
    /**
2374
     * Get character of a specific character.
2375
     *
2376
     * EXAMPLE: <code>UTF8::getCharDirection('ا'); // 'RTL'</code>
2377
     *
2378
     * @param string $char
2379
     *
2380
     * @psalm-pure
2381
     *
2382
     * @return string
2383
     *                <p>'RTL' or 'LTR'.</p>
2384
     */
2385 2
    public static function getCharDirection(string $char): string
2386
    {
2387 2
        if (self::$SUPPORT['intlChar'] === true) {
2388 2
            $tmp_return = \IntlChar::charDirection($char);
2389
2390
            // from "IntlChar"-Class
2391
            $char_direction = [
2392 2
                'RTL' => [1, 13, 14, 15, 21],
2393
                'LTR' => [0, 11, 12, 20],
2394
            ];
2395
2396 2
            if (\in_array($tmp_return, $char_direction['LTR'], true)) {
2397
                return 'LTR';
2398
            }
2399
2400 2
            if (\in_array($tmp_return, $char_direction['RTL'], true)) {
2401 2
                return 'RTL';
2402
            }
2403
        }
2404
2405 2
        $c = static::chr_to_decimal($char);
2406
2407 2
        if (!($c >= 0x5be && $c <= 0x10b7f)) {
2408 2
            return 'LTR';
2409
        }
2410
2411 2
        if ($c <= 0x85e) {
2412 2
            if ($c === 0x5be ||
2413 2
                $c === 0x5c0 ||
2414 2
                $c === 0x5c3 ||
2415 2
                $c === 0x5c6 ||
2416 2
                ($c >= 0x5d0 && $c <= 0x5ea) ||
2417 2
                ($c >= 0x5f0 && $c <= 0x5f4) ||
2418 2
                $c === 0x608 ||
2419 2
                $c === 0x60b ||
2420 2
                $c === 0x60d ||
2421 2
                $c === 0x61b ||
2422 2
                ($c >= 0x61e && $c <= 0x64a) ||
2423
                ($c >= 0x66d && $c <= 0x66f) ||
2424
                ($c >= 0x671 && $c <= 0x6d5) ||
2425
                ($c >= 0x6e5 && $c <= 0x6e6) ||
2426
                ($c >= 0x6ee && $c <= 0x6ef) ||
2427
                ($c >= 0x6fa && $c <= 0x70d) ||
2428
                $c === 0x710 ||
2429
                ($c >= 0x712 && $c <= 0x72f) ||
2430
                ($c >= 0x74d && $c <= 0x7a5) ||
2431
                $c === 0x7b1 ||
2432
                ($c >= 0x7c0 && $c <= 0x7ea) ||
2433
                ($c >= 0x7f4 && $c <= 0x7f5) ||
2434
                $c === 0x7fa ||
2435
                ($c >= 0x800 && $c <= 0x815) ||
2436
                $c === 0x81a ||
2437
                $c === 0x824 ||
2438
                $c === 0x828 ||
2439
                ($c >= 0x830 && $c <= 0x83e) ||
2440
                ($c >= 0x840 && $c <= 0x858) ||
2441 2
                $c === 0x85e
2442
            ) {
2443 2
                return 'RTL';
2444
            }
2445 2
        } elseif ($c === 0x200f) {
2446
            return 'RTL';
2447 2
        } elseif ($c >= 0xfb1d) {
2448 2
            if ($c === 0xfb1d ||
2449 2
                ($c >= 0xfb1f && $c <= 0xfb28) ||
2450 2
                ($c >= 0xfb2a && $c <= 0xfb36) ||
2451 2
                ($c >= 0xfb38 && $c <= 0xfb3c) ||
2452 2
                $c === 0xfb3e ||
2453 2
                ($c >= 0xfb40 && $c <= 0xfb41) ||
2454 2
                ($c >= 0xfb43 && $c <= 0xfb44) ||
2455 2
                ($c >= 0xfb46 && $c <= 0xfbc1) ||
2456 2
                ($c >= 0xfbd3 && $c <= 0xfd3d) ||
2457 2
                ($c >= 0xfd50 && $c <= 0xfd8f) ||
2458 2
                ($c >= 0xfd92 && $c <= 0xfdc7) ||
2459 2
                ($c >= 0xfdf0 && $c <= 0xfdfc) ||
2460 2
                ($c >= 0xfe70 && $c <= 0xfe74) ||
2461 2
                ($c >= 0xfe76 && $c <= 0xfefc) ||
2462 2
                ($c >= 0x10800 && $c <= 0x10805) ||
2463 2
                $c === 0x10808 ||
2464 2
                ($c >= 0x1080a && $c <= 0x10835) ||
2465 2
                ($c >= 0x10837 && $c <= 0x10838) ||
2466 2
                $c === 0x1083c ||
2467 2
                ($c >= 0x1083f && $c <= 0x10855) ||
2468 2
                ($c >= 0x10857 && $c <= 0x1085f) ||
2469 2
                ($c >= 0x10900 && $c <= 0x1091b) ||
2470 2
                ($c >= 0x10920 && $c <= 0x10939) ||
2471 2
                $c === 0x1093f ||
2472 2
                $c === 0x10a00 ||
2473 2
                ($c >= 0x10a10 && $c <= 0x10a13) ||
2474 2
                ($c >= 0x10a15 && $c <= 0x10a17) ||
2475 2
                ($c >= 0x10a19 && $c <= 0x10a33) ||
2476 2
                ($c >= 0x10a40 && $c <= 0x10a47) ||
2477 2
                ($c >= 0x10a50 && $c <= 0x10a58) ||
2478 2
                ($c >= 0x10a60 && $c <= 0x10a7f) ||
2479 2
                ($c >= 0x10b00 && $c <= 0x10b35) ||
2480 2
                ($c >= 0x10b40 && $c <= 0x10b55) ||
2481 2
                ($c >= 0x10b58 && $c <= 0x10b72) ||
2482 2
                ($c >= 0x10b78)
2483
            ) {
2484 2
                return 'RTL';
2485
            }
2486
        }
2487
2488 2
        return 'LTR';
2489
    }
2490
2491
    /**
2492
     * Check for php-support.
2493
     *
2494
     * @param string|null $key
2495
     *
2496
     * @psalm-pure
2497
     *
2498
     * @return mixed
2499
     *               Return the full support-"array", if $key === null<br>
2500
     *               return bool-value, if $key is used and available<br>
2501
     *               otherwise return <strong>null</strong>
2502
     */
2503 27
    public static function getSupportInfo(string $key = null)
2504
    {
2505 27
        if ($key === null) {
2506 4
            return self::$SUPPORT;
2507
        }
2508
2509 25
        if (self::$INTL_TRANSLITERATOR_LIST === null) {
2510 1
            self::$INTL_TRANSLITERATOR_LIST = self::getData('transliterator_list');
2511
        }
2512
        // compatibility fix for old versions
2513 25
        self::$SUPPORT['intl__transliterator_list_ids'] = self::$INTL_TRANSLITERATOR_LIST;
2514
2515 25
        return self::$SUPPORT[$key] ?? null;
2516
    }
2517
2518
    /**
2519
     * Warning: this method only works for some file-types (png, jpg)
2520
     *          if you need more supported types, please use e.g. "finfo"
2521
     *
2522
     * @param string $str
2523
     * @param array  $fallback <p>with this keys: 'ext', 'mime', 'type'
2524
     *
2525
     * @psalm-pure
2526
     *
2527
     * @return null[]|string[]
2528
     *                         <p>with this keys: 'ext', 'mime', 'type'</p>
2529
     *
2530
     * @phpstan-param array{ext: null|string, mime: null|string, type: null|string} $fallback
2531
     */
2532 39
    public static function get_file_type(
2533
        string $str,
2534
        array $fallback = [
2535
            'ext'  => null,
2536
            'mime' => 'application/octet-stream',
2537
            'type' => null,
2538
        ]
2539
    ): array {
2540 39
        if ($str === '') {
2541
            return $fallback;
2542
        }
2543
2544
        /** @var false|string $str_info - needed for PhpStan (stubs error) */
2545 39
        $str_info = \substr($str, 0, 2);
2546 39
        if ($str_info === false || \strlen($str_info) !== 2) {
2547 10
            return $fallback;
2548
        }
2549
2550
        // DEBUG
2551
        //var_dump($str_info);
2552
2553 36
        $str_info = \unpack('C2chars', $str_info);
2554
2555 36
        if ($str_info === false) {
2556
            return $fallback;
2557
        }
2558 36
        $type_code = (int) ($str_info['chars1'] . $str_info['chars2']);
2559
2560
        // DEBUG
2561
        //var_dump($type_code);
2562
2563
        //
2564
        // info: https://en.wikipedia.org/wiki/Magic_number_%28programming%29#Format_indicator
2565
        //
2566
        switch ($type_code) {
2567
            // WARNING: do not add too simple comparisons, because of false-positive results:
2568
            //
2569
            // 3780 => 'pdf', 7790 => 'exe', 7784 => 'midi', 8075 => 'zip',
2570
            // 8297 => 'rar', 7173 => 'gif', 7373 => 'tiff' 6677 => 'bmp', ...
2571
            //
2572 36
            case 255216:
2573
                $ext = 'jpg';
2574
                $mime = 'image/jpeg';
2575
                $type = 'binary';
2576
2577
                break;
2578 36
            case 13780:
2579 7
                $ext = 'png';
2580 7
                $mime = 'image/png';
2581 7
                $type = 'binary';
2582
2583 7
                break;
2584
            default:
2585 35
                return $fallback;
2586
        }
2587
2588
        return [
2589 7
            'ext'  => $ext,
2590 7
            'mime' => $mime,
2591 7
            'type' => $type,
2592
        ];
2593
    }
2594
2595
    /**
2596
     * @param int    $length         <p>Length of the random string.</p>
2597
     * @param string $possible_chars [optional] <p>Characters string for the random selection.</p>
2598
     * @param string $encoding       [optional] <p>Set the charset for e.g. "mb_" function</p>
2599
     *
2600
     * @return string
2601
     */
2602 1
    public static function get_random_string(
2603
        int $length,
2604
        string $possible_chars = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789',
2605
        string $encoding = 'UTF-8'
2606
    ): string {
2607
        // init
2608 1
        $i = 0;
2609 1
        $str = '';
2610
2611
        //
2612
        // add random chars
2613
        //
2614
2615 1
        if ($encoding === 'UTF-8') {
2616 1
            $max_length = (int) \mb_strlen($possible_chars);
2617 1
            if ($max_length === 0) {
2618 1
                return '';
2619
            }
2620
2621 1
            while ($i < $length) {
2622
                try {
2623 1
                    $rand_int = \random_int(0, $max_length - 1);
2624
                } catch (\Exception $e) {
2625
                    $rand_int = \mt_rand(0, $max_length - 1);
2626
                }
2627 1
                $char = \mb_substr($possible_chars, $rand_int, 1);
2628 1
                if ($char !== false) {
2629 1
                    $str .= $char;
2630 1
                    ++$i;
2631
                }
2632
            }
2633
        } else {
2634
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
2635
2636
            $max_length = (int) self::strlen($possible_chars, $encoding);
2637
            if ($max_length === 0) {
2638
                return '';
2639
            }
2640
2641
            while ($i < $length) {
2642
                try {
2643
                    $rand_int = \random_int(0, $max_length - 1);
2644
                } catch (\Exception $e) {
2645
                    $rand_int = \mt_rand(0, $max_length - 1);
2646
                }
2647
                $char = self::substr($possible_chars, $rand_int, 1, $encoding);
2648
                if ($char !== false) {
2649
                    $str .= $char;
2650
                    ++$i;
2651
                }
2652
            }
2653
        }
2654
2655 1
        return $str;
2656
    }
2657
2658
    /**
2659
     * @param int|string $extra_entropy [optional] <p>Extra entropy via a string or int value.</p>
2660
     * @param bool       $use_md5       [optional] <p>Return the unique identifier as md5-hash? Default: true</p>
2661
     *
2662
     * @return string
2663
     */
2664 1
    public static function get_unique_string($extra_entropy = '', bool $use_md5 = true): string
2665
    {
2666
        try {
2667 1
            $rand_int = \random_int(0, \mt_getrandmax());
2668
        } catch (\Exception $e) {
2669
            $rand_int = \mt_rand(0, \mt_getrandmax());
2670
        }
2671
2672
        $unique_helper = $rand_int .
2673 1
                         \session_id() .
2674 1
                         ($_SERVER['REMOTE_ADDR'] ?? '') .
2675 1
                         ($_SERVER['SERVER_ADDR'] ?? '') .
2676 1
                         $extra_entropy;
2677
2678 1
        $unique_string = \uniqid($unique_helper, true);
2679
2680 1
        if ($use_md5) {
2681 1
            $unique_string = \md5($unique_string . $unique_helper);
2682
        }
2683
2684 1
        return $unique_string;
2685
    }
2686
2687
    /**
2688
     * Returns true if the string contains a lower case char, false otherwise.
2689
     *
2690
     * @param string $str <p>The input string.</p>
2691
     *
2692
     * @psalm-pure
2693
     *
2694
     * @return bool
2695
     *              <p>Whether or not the string contains a lower case character.</p>
2696
     */
2697 47
    public static function has_lowercase(string $str): bool
2698
    {
2699 47
        if (self::$SUPPORT['mbstring'] === true) {
2700 47
            return \mb_ereg_match('.*[[:lower:]]', $str);
2701
        }
2702
2703
        return self::str_matches_pattern($str, '.*[[:lower:]]');
2704
    }
2705
2706
    /**
2707
     * Returns true if the string contains whitespace, false otherwise.
2708
     *
2709
     * @param string $str <p>The input string.</p>
2710
     *
2711
     * @psalm-pure
2712
     *
2713
     * @return bool
2714
     *              <p>Whether or not the string contains whitespace.</p>
2715
     */
2716 11
    public static function has_whitespace(string $str): bool
2717
    {
2718 11
        if (self::$SUPPORT['mbstring'] === true) {
2719 11
            return \mb_ereg_match('.*[[:space:]]', $str);
2720
        }
2721
2722
        return self::str_matches_pattern($str, '.*[[:space:]]');
2723
    }
2724
2725
    /**
2726
     * Returns true if the string contains an upper case char, false otherwise.
2727
     *
2728
     * @param string $str <p>The input string.</p>
2729
     *
2730
     * @psalm-pure
2731
     *
2732
     * @return bool
2733
     *              <p>Whether or not the string contains an upper case character.</p>
2734
     */
2735 12
    public static function has_uppercase(string $str): bool
2736
    {
2737 12
        if (self::$SUPPORT['mbstring'] === true) {
2738 12
            return \mb_ereg_match('.*[[:upper:]]', $str);
2739
        }
2740
2741
        return self::str_matches_pattern($str, '.*[[:upper:]]');
2742
    }
2743
2744
    /**
2745
     * Converts a hexadecimal value into a UTF-8 character.
2746
     *
2747
     * INFO: opposite to UTF8::chr_to_hex()
2748
     *
2749
     * EXAMPLE: <code>UTF8::hex_to_chr('U+00a7'); // '§'</code>
2750
     *
2751
     * @param string $hexdec <p>The hexadecimal value.</p>
2752
     *
2753
     * @psalm-pure
2754
     *
2755
     * @return false|string one single UTF-8 character
2756
     */
2757 4
    public static function hex_to_chr(string $hexdec)
2758
    {
2759
        /** @noinspection PhpUsageOfSilenceOperatorInspection - Invalid characters passed for attempted conversion, these have been ignored */
2760 4
        return self::decimal_to_chr((int) @\hexdec($hexdec));
2761
    }
2762
2763
    /**
2764
     * Converts hexadecimal U+xxxx code point representation to integer.
2765
     *
2766
     * INFO: opposite to UTF8::int_to_hex()
2767
     *
2768
     * EXAMPLE: <code>UTF8::hex_to_int('U+00f1'); // 241</code>
2769
     *
2770
     * @param string $hexdec <p>The hexadecimal code point representation.</p>
2771
     *
2772
     * @psalm-pure
2773
     *
2774
     * @return false|int
2775
     *                   <p>The code point, or false on failure.</p>
2776
     */
2777 2
    public static function hex_to_int($hexdec)
2778
    {
2779
        // init
2780 2
        $hexdec = (string) $hexdec;
2781
2782 2
        if ($hexdec === '') {
2783 2
            return false;
2784
        }
2785
2786 2
        if (\preg_match('/^(?:\\\u|U\+|)([a-zA-Z0-9]{4,6})$/', $hexdec, $match)) {
2787 2
            return \intval($match[1], 16);
2788
        }
2789
2790 2
        return false;
2791
    }
2792
2793
    /**
2794
     * Converts a UTF-8 string to a series of HTML numbered entities.
2795
     *
2796
     * INFO: opposite to UTF8::html_decode()
2797
     *
2798
     * EXAMPLE: <code>UTF8::html_encode('中文空白'); // '&#20013;&#25991;&#31354;&#30333;'</code>
2799
     *
2800
     * @param string $str              <p>The Unicode string to be encoded as numbered entities.</p>
2801
     * @param bool   $keep_ascii_chars [optional] <p>Keep ASCII chars.</p>
2802
     * @param string $encoding         [optional] <p>Set the charset for e.g. "mb_" function</p>
2803
     *
2804
     * @psalm-pure
2805
     *
2806
     * @return string HTML numbered entities
2807
     */
2808 14
    public static function html_encode(
2809
        string $str,
2810
        bool $keep_ascii_chars = false,
2811
        string $encoding = 'UTF-8'
2812
    ): string {
2813 14
        if ($str === '') {
2814 4
            return '';
2815
        }
2816
2817 14
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
2818 4
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
2819
        }
2820
2821
        // INFO: http://stackoverflow.com/questions/35854535/better-explanation-of-convmap-in-mb-encode-numericentity
2822 14
        if (self::$SUPPORT['mbstring'] === true) {
2823 14
            if ($keep_ascii_chars) {
2824 13
                $start_code = 0x80;
2825
            } else {
2826 3
                $start_code = 0x00;
2827
            }
2828
2829 14
            if ($encoding === 'UTF-8') {
2830
                /** @var false|string|null $return - needed for PhpStan (stubs error) */
2831 14
                $return = \mb_encode_numericentity(
0 ignored issues
show
Bug introduced by
The call to mb_encode_numericentity() has too few arguments starting with encoding. ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-call  annotation

2831
                $return = /** @scrutinizer ignore-call */ \mb_encode_numericentity(

This check compares calls to functions or methods with their respective definitions. If the call has less arguments than are defined, it raises an issue.

If a function is defined several times with a different number of parameters, the check may pick up the wrong definition and report false positives. One codebase where this has been known to happen is Wordpress. Please note the @ignore annotation hint above.

Loading history...
2832 14
                    $str,
2833 14
                    [$start_code, 0xfffff, 0, 0xfffff]
2834
                );
2835 14
                if ($return !== null && $return !== false) {
2836 14
                    return $return;
2837
                }
2838
            }
2839
2840
            /** @var false|string|null $return - needed for PhpStan (stubs error) */
2841 4
            $return = \mb_encode_numericentity(
2842 4
                $str,
2843 4
                [$start_code, 0xfffff, 0, 0xfffff],
2844 4
                $encoding
2845
            );
2846 4
            if ($return !== null && $return !== false) {
2847 4
                return $return;
2848
            }
2849
        }
2850
2851
        //
2852
        // fallback via vanilla php
2853
        //
2854
2855
        return \implode(
2856
            '',
2857
            \array_map(
2858
                static function (string $chr) use ($keep_ascii_chars, $encoding): string {
2859
                    return self::single_chr_html_encode($chr, $keep_ascii_chars, $encoding);
2860
                },
2861
                self::str_split($str)
2862
            )
2863
        );
2864
    }
2865
2866
    /**
2867
     * UTF-8 version of html_entity_decode()
2868
     *
2869
     * The reason we are not using html_entity_decode() by itself is because
2870
     * while it is not technically correct to leave out the semicolon
2871
     * at the end of an entity most browsers will still interpret the entity
2872
     * correctly. html_entity_decode() does not convert entities without
2873
     * semicolons, so we are left with our own little solution here. Bummer.
2874
     *
2875
     * Convert all HTML entities to their applicable characters.
2876
     *
2877
     * INFO: opposite to UTF8::html_encode()
2878
     *
2879
     * EXAMPLE: <code>UTF8::html_entity_decode('&#20013;&#25991;&#31354;&#30333;'); // '中文空白'</code>
2880
     *
2881
     * @see http://php.net/manual/en/function.html-entity-decode.php
2882
     *
2883
     * @param string   $str      <p>
2884
     *                           The input string.
2885
     *                           </p>
2886
     * @param int|null $flags    [optional] <p>
2887
     *                           A bitmask of one or more of the following flags, which specify how to handle quotes
2888
     *                           and which document type to use. The default is ENT_COMPAT | ENT_HTML401.
2889
     *                           <table>
2890
     *                           Available <i>flags</i> constants
2891
     *                           <tr valign="top">
2892
     *                           <td>Constant Name</td>
2893
     *                           <td>Description</td>
2894
     *                           </tr>
2895
     *                           <tr valign="top">
2896
     *                           <td><b>ENT_COMPAT</b></td>
2897
     *                           <td>Will convert double-quotes and leave single-quotes alone.</td>
2898
     *                           </tr>
2899
     *                           <tr valign="top">
2900
     *                           <td><b>ENT_QUOTES</b></td>
2901
     *                           <td>Will convert both double and single quotes.</td>
2902
     *                           </tr>
2903
     *                           <tr valign="top">
2904
     *                           <td><b>ENT_NOQUOTES</b></td>
2905
     *                           <td>Will leave both double and single quotes unconverted.</td>
2906
     *                           </tr>
2907
     *                           <tr valign="top">
2908
     *                           <td><b>ENT_HTML401</b></td>
2909
     *                           <td>
2910
     *                           Handle code as HTML 4.01.
2911
     *                           </td>
2912
     *                           </tr>
2913
     *                           <tr valign="top">
2914
     *                           <td><b>ENT_XML1</b></td>
2915
     *                           <td>
2916
     *                           Handle code as XML 1.
2917
     *                           </td>
2918
     *                           </tr>
2919
     *                           <tr valign="top">
2920
     *                           <td><b>ENT_XHTML</b></td>
2921
     *                           <td>
2922
     *                           Handle code as XHTML.
2923
     *                           </td>
2924
     *                           </tr>
2925
     *                           <tr valign="top">
2926
     *                           <td><b>ENT_HTML5</b></td>
2927
     *                           <td>
2928
     *                           Handle code as HTML 5.
2929
     *                           </td>
2930
     *                           </tr>
2931
     *                           </table>
2932
     *                           </p>
2933
     * @param string   $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
2934
     *
2935
     * @psalm-pure
2936
     *
2937
     * @return string the decoded string
2938
     */
2939 34
    public static function html_entity_decode(
2940
        string $str,
2941
        int $flags = null,
2942
        string $encoding = 'UTF-8'
2943
    ): string {
2944
        if (
2945 34
            !isset($str[3]) // examples: &; || &x;
2946
            ||
2947 34
            \strpos($str, '&') === false // no "&"
2948
        ) {
2949 23
            return $str;
2950
        }
2951
2952 34
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
2953 9
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
2954
        }
2955
2956 34
        if ($flags === null) {
2957 11
            $flags = \ENT_QUOTES | \ENT_HTML5;
2958
        }
2959
2960
        if (
2961 34
            $encoding !== 'UTF-8'
2962
            &&
2963 34
            $encoding !== 'ISO-8859-1'
2964
            &&
2965 34
            $encoding !== 'WINDOWS-1252'
2966
            &&
2967 34
            self::$SUPPORT['mbstring'] === false
2968
        ) {
2969
            /**
2970
             * @psalm-suppress ImpureFunctionCall - is is only a warning
2971
             */
2972
            \trigger_error('UTF8::html_entity_decode() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
2973
        }
2974
2975
        do {
2976 34
            $str_compare = $str;
2977
2978 34
            if (\strpos($str, '&') !== false) {
2979 34
                if (\strpos($str, '&#') !== false) {
2980
                    // decode also numeric & UTF16 two byte entities
2981 25
                    $str = (string) \preg_replace(
2982 25
                        '/(&#(?:x0*[0-9a-fA-F]{2,6}(?![0-9a-fA-F;])|(?:0*\d{2,6}(?![0-9;]))))/S',
2983 25
                        '$1;',
2984 25
                        $str
2985
                    );
2986
                }
2987
2988 34
                $str = \html_entity_decode(
2989 34
                    $str,
2990 34
                    $flags,
2991 34
                    $encoding
2992
                );
2993
            }
2994 34
        } while ($str_compare !== $str);
2995
2996 34
        return $str;
2997
    }
2998
2999
    /**
3000
     * Create a escape html version of the string via "UTF8::htmlspecialchars()".
3001
     *
3002
     * @param string $str
3003
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
3004
     *
3005
     * @psalm-pure
3006
     *
3007
     * @return string
3008
     */
3009 6
    public static function html_escape(string $str, string $encoding = 'UTF-8'): string
3010
    {
3011 6
        return self::htmlspecialchars(
3012 6
            $str,
3013 6
            \ENT_QUOTES | \ENT_SUBSTITUTE,
3014
            $encoding
3015
        );
3016
    }
3017
3018
    /**
3019
     * Remove empty html-tag.
3020
     *
3021
     * e.g.: <pre><tag></tag></pre>
3022
     *
3023
     * @param string $str
3024
     *
3025
     * @psalm-pure
3026
     *
3027
     * @return string
3028
     */
3029 1
    public static function html_stripe_empty_tags(string $str): string
3030
    {
3031 1
        return (string) \preg_replace(
3032 1
            '/<[^\\/>]*?>\\s*?<\\/[^>]*?>/u',
3033 1
            '',
3034 1
            $str
3035
        );
3036
    }
3037
3038
    /**
3039
     * Convert all applicable characters to HTML entities: UTF-8 version of htmlentities().
3040
     *
3041
     * EXAMPLE: <code>UTF8::htmlentities('<白-öäü>'); // '&lt;&#30333;-&ouml;&auml;&uuml;&gt;'</code>
3042
     *
3043
     * @see http://php.net/manual/en/function.htmlentities.php
3044
     *
3045
     * @param string $str           <p>
3046
     *                              The input string.
3047
     *                              </p>
3048
     * @param int    $flags         [optional] <p>
3049
     *                              A bitmask of one or more of the following flags, which specify how to handle
3050
     *                              quotes, invalid code unit sequences and the used document type. The default is
3051
     *                              ENT_COMPAT | ENT_HTML401.
3052
     *                              <table>
3053
     *                              Available <i>flags</i> constants
3054
     *                              <tr valign="top">
3055
     *                              <td>Constant Name</td>
3056
     *                              <td>Description</td>
3057
     *                              </tr>
3058
     *                              <tr valign="top">
3059
     *                              <td><b>ENT_COMPAT</b></td>
3060
     *                              <td>Will convert double-quotes and leave single-quotes alone.</td>
3061
     *                              </tr>
3062
     *                              <tr valign="top">
3063
     *                              <td><b>ENT_QUOTES</b></td>
3064
     *                              <td>Will convert both double and single quotes.</td>
3065
     *                              </tr>
3066
     *                              <tr valign="top">
3067
     *                              <td><b>ENT_NOQUOTES</b></td>
3068
     *                              <td>Will leave both double and single quotes unconverted.</td>
3069
     *                              </tr>
3070
     *                              <tr valign="top">
3071
     *                              <td><b>ENT_IGNORE</b></td>
3072
     *                              <td>
3073
     *                              Silently discard invalid code unit sequences instead of returning
3074
     *                              an empty string. Using this flag is discouraged as it
3075
     *                              may have security implications.
3076
     *                              </td>
3077
     *                              </tr>
3078
     *                              <tr valign="top">
3079
     *                              <td><b>ENT_SUBSTITUTE</b></td>
3080
     *                              <td>
3081
     *                              Replace invalid code unit sequences with a Unicode Replacement Character
3082
     *                              U+FFFD (UTF-8) or &#38;#38;#FFFD; (otherwise) instead of returning an empty
3083
     *                              string.
3084
     *                              </td>
3085
     *                              </tr>
3086
     *                              <tr valign="top">
3087
     *                              <td><b>ENT_DISALLOWED</b></td>
3088
     *                              <td>
3089
     *                              Replace invalid code points for the given document type with a
3090
     *                              Unicode Replacement Character U+FFFD (UTF-8) or &#38;#38;#FFFD;
3091
     *                              (otherwise) instead of leaving them as is. This may be useful, for
3092
     *                              instance, to ensure the well-formedness of XML documents with
3093
     *                              embedded external content.
3094
     *                              </td>
3095
     *                              </tr>
3096
     *                              <tr valign="top">
3097
     *                              <td><b>ENT_HTML401</b></td>
3098
     *                              <td>
3099
     *                              Handle code as HTML 4.01.
3100
     *                              </td>
3101
     *                              </tr>
3102
     *                              <tr valign="top">
3103
     *                              <td><b>ENT_XML1</b></td>
3104
     *                              <td>
3105
     *                              Handle code as XML 1.
3106
     *                              </td>
3107
     *                              </tr>
3108
     *                              <tr valign="top">
3109
     *                              <td><b>ENT_XHTML</b></td>
3110
     *                              <td>
3111
     *                              Handle code as XHTML.
3112
     *                              </td>
3113
     *                              </tr>
3114
     *                              <tr valign="top">
3115
     *                              <td><b>ENT_HTML5</b></td>
3116
     *                              <td>
3117
     *                              Handle code as HTML 5.
3118
     *                              </td>
3119
     *                              </tr>
3120
     *                              </table>
3121
     *                              </p>
3122
     * @param string $encoding      [optional] <p>
3123
     *                              Like <b>htmlspecialchars</b>,
3124
     *                              <b>htmlentities</b> takes an optional third argument
3125
     *                              <i>encoding</i> which defines encoding used in
3126
     *                              conversion.
3127
     *                              Although this argument is technically optional, you are highly
3128
     *                              encouraged to specify the correct value for your code.
3129
     *                              </p>
3130
     * @param bool   $double_encode [optional] <p>
3131
     *                              When <i>double_encode</i> is turned off PHP will not
3132
     *                              encode existing html entities. The default is to convert everything.
3133
     *                              </p>
3134
     *
3135
     * @psalm-pure
3136
     *
3137
     * @return string
3138
     *                <p>
3139
     *                The encoded string.
3140
     *                <br><br>
3141
     *                If the input <i>string</i> contains an invalid code unit
3142
     *                sequence within the given <i>encoding</i> an empty string
3143
     *                will be returned, unless either the <b>ENT_IGNORE</b> or
3144
     *                <b>ENT_SUBSTITUTE</b> flags are set.
3145
     *                </p>
3146
     */
3147 9
    public static function htmlentities(
3148
        string $str,
3149
        int $flags = \ENT_COMPAT,
3150
        string $encoding = 'UTF-8',
3151
        bool $double_encode = true
3152
    ): string {
3153 9
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
3154 7
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
3155
        }
3156
3157 9
        $str = \htmlentities(
3158 9
            $str,
3159 9
            $flags,
3160 9
            $encoding,
3161 9
            $double_encode
3162
        );
3163
3164
        /**
3165
         * PHP doesn't replace a backslash to its html entity since this is something
3166
         * that's mostly used to escape characters when inserting in a database. Since
3167
         * we're using a decent database layer, we don't need this shit and we're replacing
3168
         * the double backslashes by its' html entity equivalent.
3169
         *
3170
         * https://github.com/forkcms/library/blob/master/spoon/filter/filter.php#L303
3171
         */
3172 9
        $str = \str_replace('\\', '&#92;', $str);
3173
3174 9
        return self::html_encode($str, true, $encoding);
3175
    }
3176
3177
    /**
3178
     * Convert only special characters to HTML entities: UTF-8 version of htmlspecialchars()
3179
     *
3180
     * INFO: Take a look at "UTF8::htmlentities()"
3181
     *
3182
     * EXAMPLE: <code>UTF8::htmlspecialchars('<白-öäü>'); // '&lt;白-öäü&gt;'</code>
3183
     *
3184
     * @see http://php.net/manual/en/function.htmlspecialchars.php
3185
     *
3186
     * @param string $str           <p>
3187
     *                              The string being converted.
3188
     *                              </p>
3189
     * @param int    $flags         [optional] <p>
3190
     *                              A bitmask of one or more of the following flags, which specify how to handle
3191
     *                              quotes, invalid code unit sequences and the used document type. The default is
3192
     *                              ENT_COMPAT | ENT_HTML401.
3193
     *                              <table>
3194
     *                              Available <i>flags</i> constants
3195
     *                              <tr valign="top">
3196
     *                              <td>Constant Name</td>
3197
     *                              <td>Description</td>
3198
     *                              </tr>
3199
     *                              <tr valign="top">
3200
     *                              <td><b>ENT_COMPAT</b></td>
3201
     *                              <td>Will convert double-quotes and leave single-quotes alone.</td>
3202
     *                              </tr>
3203
     *                              <tr valign="top">
3204
     *                              <td><b>ENT_QUOTES</b></td>
3205
     *                              <td>Will convert both double and single quotes.</td>
3206
     *                              </tr>
3207
     *                              <tr valign="top">
3208
     *                              <td><b>ENT_NOQUOTES</b></td>
3209
     *                              <td>Will leave both double and single quotes unconverted.</td>
3210
     *                              </tr>
3211
     *                              <tr valign="top">
3212
     *                              <td><b>ENT_IGNORE</b></td>
3213
     *                              <td>
3214
     *                              Silently discard invalid code unit sequences instead of returning
3215
     *                              an empty string. Using this flag is discouraged as it
3216
     *                              may have security implications.
3217
     *                              </td>
3218
     *                              </tr>
3219
     *                              <tr valign="top">
3220
     *                              <td><b>ENT_SUBSTITUTE</b></td>
3221
     *                              <td>
3222
     *                              Replace invalid code unit sequences with a Unicode Replacement Character
3223
     *                              U+FFFD (UTF-8) or &#38;#38;#FFFD; (otherwise) instead of returning an empty
3224
     *                              string.
3225
     *                              </td>
3226
     *                              </tr>
3227
     *                              <tr valign="top">
3228
     *                              <td><b>ENT_DISALLOWED</b></td>
3229
     *                              <td>
3230
     *                              Replace invalid code points for the given document type with a
3231
     *                              Unicode Replacement Character U+FFFD (UTF-8) or &#38;#38;#FFFD;
3232
     *                              (otherwise) instead of leaving them as is. This may be useful, for
3233
     *                              instance, to ensure the well-formedness of XML documents with
3234
     *                              embedded external content.
3235
     *                              </td>
3236
     *                              </tr>
3237
     *                              <tr valign="top">
3238
     *                              <td><b>ENT_HTML401</b></td>
3239
     *                              <td>
3240
     *                              Handle code as HTML 4.01.
3241
     *                              </td>
3242
     *                              </tr>
3243
     *                              <tr valign="top">
3244
     *                              <td><b>ENT_XML1</b></td>
3245
     *                              <td>
3246
     *                              Handle code as XML 1.
3247
     *                              </td>
3248
     *                              </tr>
3249
     *                              <tr valign="top">
3250
     *                              <td><b>ENT_XHTML</b></td>
3251
     *                              <td>
3252
     *                              Handle code as XHTML.
3253
     *                              </td>
3254
     *                              </tr>
3255
     *                              <tr valign="top">
3256
     *                              <td><b>ENT_HTML5</b></td>
3257
     *                              <td>
3258
     *                              Handle code as HTML 5.
3259
     *                              </td>
3260
     *                              </tr>
3261
     *                              </table>
3262
     *                              </p>
3263
     * @param string $encoding      [optional] <p>
3264
     *                              Defines encoding used in conversion.
3265
     *                              </p>
3266
     *                              <p>
3267
     *                              For the purposes of this function, the encodings
3268
     *                              ISO-8859-1, ISO-8859-15,
3269
     *                              UTF-8, cp866,
3270
     *                              cp1251, cp1252, and
3271
     *                              KOI8-R are effectively equivalent, provided the
3272
     *                              <i>string</i> itself is valid for the encoding, as
3273
     *                              the characters affected by <b>htmlspecialchars</b> occupy
3274
     *                              the same positions in all of these encodings.
3275
     *                              </p>
3276
     * @param bool   $double_encode [optional] <p>
3277
     *                              When <i>double_encode</i> is turned off PHP will not
3278
     *                              encode existing html entities, the default is to convert everything.
3279
     *                              </p>
3280
     *
3281
     * @psalm-pure
3282
     *
3283
     * @return string the converted string.
3284
     *                </p>
3285
     *                <p>
3286
     *                If the input <i>string</i> contains an invalid code unit
3287
     *                sequence within the given <i>encoding</i> an empty string
3288
     *                will be returned, unless either the <b>ENT_IGNORE</b> or
3289
     *                <b>ENT_SUBSTITUTE</b> flags are set
3290
     */
3291 8
    public static function htmlspecialchars(
3292
        string $str,
3293
        int $flags = \ENT_COMPAT,
3294
        string $encoding = 'UTF-8',
3295
        bool $double_encode = true
3296
    ): string {
3297 8
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
3298 8
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
3299
        }
3300
3301 8
        return \htmlspecialchars(
3302 8
            $str,
3303 8
            $flags,
3304 8
            $encoding,
3305 8
            $double_encode
3306
        );
3307
    }
3308
3309
    /**
3310
     * Checks whether iconv is available on the server.
3311
     *
3312
     * @psalm-pure
3313
     *
3314
     * @return bool
3315
     *              <p><strong>true</strong> if available, <strong>false</strong> otherwise</p>
3316
     *
3317
     * @internal <p>Please do not use it anymore, we will make is private in next major version.</p>
3318
     */
3319
    public static function iconv_loaded(): bool
3320
    {
3321
        return \extension_loaded('iconv');
3322
    }
3323
3324
    /**
3325
     * Converts Integer to hexadecimal U+xxxx code point representation.
3326
     *
3327
     * INFO: opposite to UTF8::hex_to_int()
3328
     *
3329
     * EXAMPLE: <code>UTF8::int_to_hex(241); // 'U+00f1'</code>
3330
     *
3331
     * @param int    $int    <p>The integer to be converted to hexadecimal code point.</p>
3332
     * @param string $prefix [optional]
3333
     *
3334
     * @psalm-pure
3335
     *
3336
     * @return string the code point, or empty string on failure
3337
     */
3338 6
    public static function int_to_hex(int $int, string $prefix = 'U+'): string
3339
    {
3340 6
        $hex = \dechex($int);
3341
3342 6
        $hex = (\strlen($hex) < 4 ? \substr('0000' . $hex, -4) : $hex);
3343
3344 6
        return $prefix . $hex . '';
3345
    }
3346
3347
    /**
3348
     * Checks whether intl-char is available on the server.
3349
     *
3350
     * @psalm-pure
3351
     *
3352
     * @return bool
3353
     *              <p><strong>true</strong> if available, <strong>false</strong> otherwise</p>
3354
     *
3355
     * @internal <p>Please do not use it anymore, we will make is private in next major version.</p>
3356
     */
3357
    public static function intlChar_loaded(): bool
3358
    {
3359
        return \class_exists('IntlChar');
3360
    }
3361
3362
    /**
3363
     * Checks whether intl is available on the server.
3364
     *
3365
     * @psalm-pure
3366
     *
3367
     * @return bool
3368
     *              <p><strong>true</strong> if available, <strong>false</strong> otherwise</p>
3369
     *
3370
     * @internal <p>Please do not use it anymore, we will make is private in next major version.</p>
3371
     */
3372 5
    public static function intl_loaded(): bool
3373
    {
3374 5
        return \extension_loaded('intl');
3375
    }
3376
3377
    /**
3378
     * Returns true if the string contains only alphabetic chars, false otherwise.
3379
     *
3380
     * @param string $str <p>The input string.</p>
3381
     *
3382
     * @psalm-pure
3383
     *
3384
     * @return bool
3385
     *              <p>Whether or not $str contains only alphabetic chars.</p>
3386
     */
3387 10
    public static function is_alpha(string $str): bool
3388
    {
3389 10
        if (self::$SUPPORT['mbstring'] === true) {
3390 10
            return \mb_ereg_match('^[[:alpha:]]*$', $str);
3391
        }
3392
3393
        return self::str_matches_pattern($str, '^[[:alpha:]]*$');
3394
    }
3395
3396
    /**
3397
     * Returns true if the string contains only alphabetic and numeric chars, false otherwise.
3398
     *
3399
     * @param string $str <p>The input string.</p>
3400
     *
3401
     * @psalm-pure
3402
     *
3403
     * @return bool
3404
     *              <p>Whether or not $str contains only alphanumeric chars.</p>
3405
     */
3406 13
    public static function is_alphanumeric(string $str): bool
3407
    {
3408 13
        if (self::$SUPPORT['mbstring'] === true) {
3409 13
            return \mb_ereg_match('^[[:alnum:]]*$', $str);
3410
        }
3411
3412
        return self::str_matches_pattern($str, '^[[:alnum:]]*$');
3413
    }
3414
3415
    /**
3416
     * Returns true if the string contains only punctuation chars, false otherwise.
3417
     *
3418
     * @param string $str <p>The input string.</p>
3419
     *
3420
     * @psalm-pure
3421
     *
3422
     * @return bool
3423
     *              <p>Whether or not $str contains only punctuation chars.</p>
3424
     */
3425 10
    public static function is_punctuation(string $str): bool
3426
    {
3427 10
        return self::str_matches_pattern($str, '^[[:punct:]]*$');
3428
    }
3429
3430
    /**
3431
     * Returns true if the string contains only printable (non-invisible) chars, false otherwise.
3432
     *
3433
     * @param string $str                       <p>The input string.</p>
3434
     * @param bool   $ignore_control_characters [optional] <p>Ignore control characters like [LRM] or [LSEP].</p>
3435
     *
3436
     * @psalm-pure
3437
     *
3438
     * @return bool
3439
     *              <p>Whether or not $str contains only printable (non-invisible) chars.</p>
3440
     */
3441 1
    public static function is_printable(string $str, bool $ignore_control_characters = false): bool
3442
    {
3443 1
        return self::remove_invisible_characters($str, false, '', $ignore_control_characters) === $str;
3444
    }
3445
3446
    /**
3447
     * Checks if a string is 7 bit ASCII.
3448
     *
3449
     * EXAMPLE: <code>UTF8::is_ascii('白'); // false</code>
3450
     *
3451
     * @param string $str <p>The string to check.</p>
3452
     *
3453
     * @psalm-pure
3454
     *
3455
     * @return bool
3456
     *              <p>
3457
     *              <strong>true</strong> if it is ASCII<br>
3458
     *              <strong>false</strong> otherwise
3459
     *              </p>
3460
     */
3461 8
    public static function is_ascii(string $str): bool
3462
    {
3463 8
        return ASCII::is_ascii($str);
3464
    }
3465
3466
    /**
3467
     * Returns true if the string is base64 encoded, false otherwise.
3468
     *
3469
     * EXAMPLE: <code>UTF8::is_base64('4KSu4KWL4KSo4KS/4KSa'); // true</code>
3470
     *
3471
     * @param string|null $str                   <p>The input string.</p>
3472
     * @param bool        $empty_string_is_valid [optional] <p>Is an empty string valid base64 or not?</p>
3473
     *
3474
     * @psalm-pure
3475
     *
3476
     * @return bool
3477
     *              <p>Whether or not $str is base64 encoded.</p>
3478
     */
3479 16
    public static function is_base64($str, bool $empty_string_is_valid = false): bool
3480
    {
3481
        if (
3482 16
            !$empty_string_is_valid
3483
            &&
3484 16
            $str === ''
3485
        ) {
3486 3
            return false;
3487
        }
3488
3489 15
        if (!\is_string($str)) {
3490 2
            return false;
3491
        }
3492
3493 15
        $base64String = \base64_decode($str, true);
3494
3495 15
        return $base64String !== false && \base64_encode($base64String) === $str;
3496
    }
3497
3498
    /**
3499
     * Check if the input is binary... (is look like a hack).
3500
     *
3501
     * EXAMPLE: <code>UTF8::is_binary(01); // true</code>
3502
     *
3503
     * @param int|string $input
3504
     * @param bool       $strict
3505
     *
3506
     * @psalm-pure
3507
     *
3508
     * @return bool
3509
     */
3510 39
    public static function is_binary($input, bool $strict = false): bool
3511
    {
3512 39
        $input = (string) $input;
3513 39
        if ($input === '') {
3514 10
            return false;
3515
        }
3516
3517 39
        if (\preg_match('~^[01]+$~', $input)) {
3518 13
            return true;
3519
        }
3520
3521 39
        $ext = self::get_file_type($input);
3522 39
        if ($ext['type'] === 'binary') {
3523 7
            return true;
3524
        }
3525
3526 38
        if (!$strict) {
3527 7
            $test_length = \strlen($input);
3528 7
            $test_null_counting = \substr_count($input, "\x0", 0, $test_length);
3529 7
            if (($test_null_counting / $test_length) > 0.25) {
3530 5
                return true;
3531
            }
3532
        }
3533
3534 38
        if ($strict) {
3535 38
            if (self::$SUPPORT['finfo'] === false) {
3536
                throw new \RuntimeException('ext-fileinfo: is not installed');
3537
            }
3538
3539
            /**
3540
             * @psalm-suppress ImpureMethodCall - it will return the same result for the same file ...
3541
             */
3542 38
            $finfo_encoding = (new \finfo(\FILEINFO_MIME_ENCODING))->buffer($input);
3543 38
            if ($finfo_encoding && $finfo_encoding === 'binary') {
3544 20
                return true;
3545
            }
3546
        }
3547
3548 33
        return false;
3549
    }
3550
3551
    /**
3552
     * Check if the file is binary.
3553
     *
3554
     * EXAMPLE: <code>UTF8::is_binary('./utf32.txt'); // true</code>
3555
     *
3556
     * @param string $file
3557
     *
3558
     * @return bool
3559
     */
3560 6
    public static function is_binary_file($file): bool
3561
    {
3562
        // init
3563 6
        $block = '';
3564
3565 6
        $fp = \fopen($file, 'rb');
3566 6
        if (\is_resource($fp)) {
3567 6
            $block = \fread($fp, 512);
3568 6
            \fclose($fp);
3569
        }
3570
3571 6
        if ($block === '' || $block === false) {
3572 2
            return false;
3573
        }
3574
3575 6
        return self::is_binary($block, true);
3576
    }
3577
3578
    /**
3579
     * Returns true if the string contains only whitespace chars, false otherwise.
3580
     *
3581
     * @param string $str <p>The input string.</p>
3582
     *
3583
     * @psalm-pure
3584
     *
3585
     * @return bool
3586
     *              <p>Whether or not $str contains only whitespace characters.</p>
3587
     */
3588 15
    public static function is_blank(string $str): bool
3589
    {
3590 15
        if (self::$SUPPORT['mbstring'] === true) {
3591 15
            return \mb_ereg_match('^[[:space:]]*$', $str);
3592
        }
3593
3594
        return self::str_matches_pattern($str, '^[[:space:]]*$');
3595
    }
3596
3597
    /**
3598
     * Checks if the given string is equal to any "Byte Order Mark".
3599
     *
3600
     * WARNING: Use "UTF8::string_has_bom()" if you will check BOM in a string.
3601
     *
3602
     * EXAMPLE: <code>UTF8::is_bom("\xef\xbb\xbf"); // true</code>
3603
     *
3604
     * @param string $str <p>The input string.</p>
3605
     *
3606
     * @psalm-pure
3607
     *
3608
     * @return bool
3609
     *              <p><strong>true</strong> if the $utf8_chr is Byte Order Mark, <strong>false</strong> otherwise.</p>
3610
     */
3611 2
    public static function is_bom($str): bool
3612
    {
3613
        /** @noinspection PhpUnusedLocalVariableInspection */
3614 2
        foreach (self::$BOM as $bom_string => &$bom_byte_length) {
3615 2
            if ($str === $bom_string) {
3616 2
                return true;
3617
            }
3618
        }
3619
3620 2
        return false;
3621
    }
3622
3623
    /**
3624
     * Determine whether the string is considered to be empty.
3625
     *
3626
     * A variable is considered empty if it does not exist or if its value equals FALSE.
3627
     * empty() does not generate a warning if the variable does not exist.
3628
     *
3629
     * @param array|float|int|string $str
3630
     *
3631
     * @psalm-pure
3632
     *
3633
     * @return bool
3634
     *              <p>Whether or not $str is empty().</p>
3635
     */
3636 1
    public static function is_empty($str): bool
3637
    {
3638 1
        return empty($str);
3639
    }
3640
3641
    /**
3642
     * Returns true if the string contains only hexadecimal chars, false otherwise.
3643
     *
3644
     * @param string $str <p>The input string.</p>
3645
     *
3646
     * @psalm-pure
3647
     *
3648
     * @return bool
3649
     *              <p>Whether or not $str contains only hexadecimal chars.</p>
3650
     */
3651 13
    public static function is_hexadecimal(string $str): bool
3652
    {
3653 13
        if (self::$SUPPORT['mbstring'] === true) {
3654 13
            return \mb_ereg_match('^[[:xdigit:]]*$', $str);
3655
        }
3656
3657
        return self::str_matches_pattern($str, '^[[:xdigit:]]*$');
3658
    }
3659
3660
    /**
3661
     * Check if the string contains any HTML tags.
3662
     *
3663
     * EXAMPLE: <code>UTF8::is_html('<b>lall</b>'); // true</code>
3664
     *
3665
     * @param string $str <p>The input string.</p>
3666
     *
3667
     * @psalm-pure
3668
     *
3669
     * @return bool
3670
     *              <p>Whether or not $str contains html elements.</p>
3671
     */
3672 3
    public static function is_html(string $str): bool
3673
    {
3674 3
        if ($str === '') {
3675 3
            return false;
3676
        }
3677
3678
        // init
3679 3
        $matches = [];
3680
3681 3
        $str = self::emoji_encode($str); // hack for emoji support :/
3682
3683 3
        \preg_match("/<\\/?\\w+(?:(?:\\s+\\w+(?:\\s*=\\s*(?:\".*?\"|'.*?'|[^'\">\\s]+))?)*\\s*|\\s*)\\/?>/u", $str, $matches);
3684
3685 3
        return $matches !== [];
3686
    }
3687
3688
    /**
3689
     * Check if $url is an correct url.
3690
     *
3691
     * @param string $url
3692
     * @param bool   $disallow_localhost
3693
     *
3694
     * @psalm-pure
3695
     *
3696
     * @return bool
3697
     */
3698 1
    public static function is_url(string $url, bool $disallow_localhost = false): bool
3699
    {
3700 1
        if ($url === '') {
3701 1
            return false;
3702
        }
3703
3704
        // WARNING: keep this as hack protection
3705 1
        if (!self::str_istarts_with_any($url, ['http://', 'https://'])) {
3706 1
            return false;
3707
        }
3708
3709
        // e.g. -> the server itself connect to "https://foo.localhost/phpmyadmin/...
3710 1
        if ($disallow_localhost) {
3711 1
            if (self::str_istarts_with_any(
3712 1
                $url,
3713
                [
3714 1
                    'http://localhost',
3715
                    'https://localhost',
3716
                    'http://127.0.0.1',
3717
                    'https://127.0.0.1',
3718
                    'http://::1',
3719
                    'https://::1',
3720
                ]
3721
            )) {
3722 1
                return false;
3723
            }
3724
3725 1
            $regex = '/^(?:http(?:s)?:\/\/).*?(?:\.localhost)/iu';
3726 1
            if (\preg_match($regex, $url)) {
3727 1
                return false;
3728
            }
3729
        }
3730
3731
        // INFO: this is needed for e.g. "http://müller.de/" (internationalized domain names) and non ASCII-parameters
3732 1
        $regex = '/^(?:http(?:s)?:\\/\\/)(?:[\p{L}0-9][\p{L}0-9_-]*(?:\\.[\p{L}0-9][\p{L}0-9_-]*))(?:\\d+)?(?:\\/\\.*)?/iu';
3733 1
        if (\preg_match($regex, $url)) {
3734 1
            return true;
3735
        }
3736
3737 1
        return \filter_var($url, \FILTER_VALIDATE_URL) !== false;
3738
    }
3739
3740
    /**
3741
     * Try to check if "$str" is a JSON-string.
3742
     *
3743
     * EXAMPLE: <code>UTF8::is_json('{"array":[1,"¥","ä"]}'); // true</code>
3744
     *
3745
     * @param string $str                                    <p>The input string.</p>
3746
     * @param bool   $only_array_or_object_results_are_valid [optional] <p>Only array and objects are valid json
3747
     *                                                       results.</p>
3748
     *
3749
     * @return bool
3750
     *              <p>Whether or not the $str is in JSON format.</p>
3751
     */
3752 42
    public static function is_json(string $str, bool $only_array_or_object_results_are_valid = true): bool
3753
    {
3754 42
        if ($str === '') {
3755 4
            return false;
3756
        }
3757
3758 40
        if (self::$SUPPORT['json'] === false) {
3759
            throw new \RuntimeException('ext-json: is not installed');
3760
        }
3761
3762 40
        $jsonOrNull = self::json_decode($str);
3763 40
        if ($jsonOrNull === null && \strtoupper($str) !== 'NULL') {
3764 18
            return false;
3765
        }
3766
3767
        if (
3768 24
            $only_array_or_object_results_are_valid
3769
            &&
3770 24
            !\is_object($jsonOrNull)
3771
            &&
3772 24
            !\is_array($jsonOrNull)
3773
        ) {
3774 5
            return false;
3775
        }
3776
3777 19
        return \json_last_error() === \JSON_ERROR_NONE;
3778
    }
3779
3780
    /**
3781
     * @param string $str <p>The input string.</p>
3782
     *
3783
     * @psalm-pure
3784
     *
3785
     * @return bool
3786
     *              <p>Whether or not $str contains only lowercase chars.</p>
3787
     */
3788 8
    public static function is_lowercase(string $str): bool
3789
    {
3790 8
        if (self::$SUPPORT['mbstring'] === true) {
3791 8
            return \mb_ereg_match('^[[:lower:]]*$', $str);
3792
        }
3793
3794
        return self::str_matches_pattern($str, '^[[:lower:]]*$');
3795
    }
3796
3797
    /**
3798
     * Returns true if the string is serialized, false otherwise.
3799
     *
3800
     * @param string $str <p>The input string.</p>
3801
     *
3802
     * @psalm-pure
3803
     *
3804
     * @return bool
3805
     *              <p>Whether or not $str is serialized.</p>
3806
     */
3807 7
    public static function is_serialized(string $str): bool
3808
    {
3809 7
        if ($str === '') {
3810 1
            return false;
3811
        }
3812
3813
        /** @noinspection PhpUsageOfSilenceOperatorInspection */
3814
        /** @noinspection UnserializeExploitsInspection */
3815 6
        return $str === 'b:0;'
3816
               ||
3817 6
               @\unserialize($str, []) !== false;
3818
    }
3819
3820
    /**
3821
     * Returns true if the string contains only lower case chars, false
3822
     * otherwise.
3823
     *
3824
     * @param string $str <p>The input string.</p>
3825
     *
3826
     * @psalm-pure
3827
     *
3828
     * @return bool
3829
     *              <p>Whether or not $str contains only lower case characters.</p>
3830
     */
3831 8
    public static function is_uppercase(string $str): bool
3832
    {
3833 8
        if (self::$SUPPORT['mbstring'] === true) {
3834 8
            return \mb_ereg_match('^[[:upper:]]*$', $str);
3835
        }
3836
3837
        return self::str_matches_pattern($str, '^[[:upper:]]*$');
3838
    }
3839
3840
    /**
3841
     * Check if the string is UTF-16.
3842
     *
3843
     * EXAMPLE: <code>
3844
     * UTF8::is_utf16(file_get_contents('utf-16-le.txt')); // 1
3845
     * //
3846
     * UTF8::is_utf16(file_get_contents('utf-16-be.txt')); // 2
3847
     * //
3848
     * UTF8::is_utf16(file_get_contents('utf-8.txt')); // false
3849
     * </code>
3850
     *
3851
     * @param string $str                       <p>The input string.</p>
3852
     * @param bool   $check_if_string_is_binary
3853
     *
3854
     * @psalm-pure
3855
     *
3856
     * @return false|int
3857
     *                   <strong>false</strong> if is't not UTF-16,<br>
3858
     *                   <strong>1</strong> for UTF-16LE,<br>
3859
     *                   <strong>2</strong> for UTF-16BE
3860
     */
3861 21
    public static function is_utf16($str, bool $check_if_string_is_binary = true)
3862
    {
3863
        // init
3864 21
        $str = (string) $str;
3865 21
        $str_chars = [];
3866
3867
        // fix for the "binary"-check
3868 21
        if (self::string_has_bom($str)) {
3869 3
            $check_if_string_is_binary = false;
3870
        }
3871
3872
        if (
3873 21
            $check_if_string_is_binary
3874
            &&
3875 21
            !self::is_binary($str, true)
3876
        ) {
3877 2
            return false;
3878
        }
3879
3880 21
        if (self::$SUPPORT['mbstring'] === false) {
3881
            /**
3882
             * @psalm-suppress ImpureFunctionCall - is is only a warning
3883
             */
3884 3
            \trigger_error('UTF8::is_utf16() without mbstring may did not work correctly', \E_USER_WARNING);
3885
        }
3886
3887 21
        $str = self::remove_bom($str);
3888
3889 21
        $maybe_utf16le = 0;
3890 21
        $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-16LE');
3891 21
        if ($test) {
3892 15
            $test2 = \mb_convert_encoding($test, 'UTF-16LE', 'UTF-8');
3893 15
            $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-16LE');
3894 15
            if ($test3 === $test) {
3895
                /**
3896
                 * @psalm-suppress RedundantCondition
3897
                 */
3898 15
                if ($str_chars === []) {
3899 15
                    $str_chars = self::count_chars($str, true, false);
3900
                }
3901 15
                foreach (self::count_chars($test3) as $test3char => &$test3charEmpty) {
0 ignored issues
show
Bug introduced by
The expression self::count_chars($test3) cannot be used as a reference.

Let?s assume that you have the following foreach statement:

foreach ($array as &$itemValue) { }

$itemValue is assigned by reference. This is possible because the expression (in the example $array) can be used as a reference target.

However, if we were to replace $array with something different like the result of a function call as in

foreach (getArray() as &$itemValue) { }

then assigning by reference is not possible anymore as there is no target that could be modified.

Available Fixes

1. Do not assign by reference
foreach (getArray() as $itemValue) { }
2. Assign to a local variable first
$array = getArray();
foreach ($array as &$itemValue) {}
3. Return a reference
function &getArray() { $array = array(); return $array; }

foreach (getArray() as &$itemValue) { }
Loading history...
Bug introduced by
It seems like $test3 can also be of type array; however, parameter $str of voku\helper\UTF8::count_chars() does only seem to accept string, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

3901
                foreach (self::count_chars(/** @scrutinizer ignore-type */ $test3) as $test3char => &$test3charEmpty) {
Loading history...
3902 15
                    if (\in_array($test3char, $str_chars, true)) {
3903 5
                        ++$maybe_utf16le;
3904
                    }
3905
                }
3906 15
                unset($test3charEmpty);
3907
            }
3908
        }
3909
3910 21
        $maybe_utf16be = 0;
3911 21
        $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-16BE');
3912 21
        if ($test) {
3913 15
            $test2 = \mb_convert_encoding($test, 'UTF-16BE', 'UTF-8');
3914 15
            $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-16BE');
3915 15
            if ($test3 === $test) {
3916 15
                if ($str_chars === []) {
3917 7
                    $str_chars = self::count_chars($str, true, false);
3918
                }
3919 15
                foreach (self::count_chars($test3) as $test3char => &$test3charEmpty) {
0 ignored issues
show
Bug introduced by
The expression self::count_chars($test3) cannot be used as a reference.

Let?s assume that you have the following foreach statement:

foreach ($array as &$itemValue) { }

$itemValue is assigned by reference. This is possible because the expression (in the example $array) can be used as a reference target.

However, if we were to replace $array with something different like the result of a function call as in

foreach (getArray() as &$itemValue) { }

then assigning by reference is not possible anymore as there is no target that could be modified.

Available Fixes

1. Do not assign by reference
foreach (getArray() as $itemValue) { }
2. Assign to a local variable first
$array = getArray();
foreach ($array as &$itemValue) {}
3. Return a reference
function &getArray() { $array = array(); return $array; }

foreach (getArray() as &$itemValue) { }
Loading history...
3920 15
                    if (\in_array($test3char, $str_chars, true)) {
3921 6
                        ++$maybe_utf16be;
3922
                    }
3923
                }
3924 15
                unset($test3charEmpty);
3925
            }
3926
        }
3927
3928 21
        if ($maybe_utf16be !== $maybe_utf16le) {
3929 7
            if ($maybe_utf16le > $maybe_utf16be) {
3930 5
                return 1;
3931
            }
3932
3933 6
            return 2;
3934
        }
3935
3936 17
        return false;
3937
    }
3938
3939
    /**
3940
     * Check if the string is UTF-32.
3941
     *
3942
     * EXAMPLE: <code>
3943
     * UTF8::is_utf32(file_get_contents('utf-32-le.txt')); // 1
3944
     * //
3945
     * UTF8::is_utf32(file_get_contents('utf-32-be.txt')); // 2
3946
     * //
3947
     * UTF8::is_utf32(file_get_contents('utf-8.txt')); // false
3948
     * </code>
3949
     *
3950
     * @param string $str                       <p>The input string.</p>
3951
     * @param bool   $check_if_string_is_binary
3952
     *
3953
     * @psalm-pure
3954
     *
3955
     * @return false|int
3956
     *                   <strong>false</strong> if is't not UTF-32,<br>
3957
     *                   <strong>1</strong> for UTF-32LE,<br>
3958
     *                   <strong>2</strong> for UTF-32BE
3959
     */
3960 19
    public static function is_utf32($str, bool $check_if_string_is_binary = true)
3961
    {
3962
        // init
3963 19
        $str = (string) $str;
3964 19
        $str_chars = [];
3965
3966
        if (
3967 19
            $check_if_string_is_binary
3968
            &&
3969 19
            !self::is_binary($str, true)
3970
        ) {
3971 2
            return false;
3972
        }
3973
3974 19
        if (self::$SUPPORT['mbstring'] === false) {
3975
            /**
3976
             * @psalm-suppress ImpureFunctionCall - is is only a warning
3977
             */
3978 3
            \trigger_error('UTF8::is_utf32() without mbstring may did not work correctly', \E_USER_WARNING);
3979
        }
3980
3981 19
        $str = self::remove_bom($str);
3982
3983 19
        $maybe_utf32le = 0;
3984 19
        $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-32LE');
3985 19
        if ($test) {
3986 13
            $test2 = \mb_convert_encoding($test, 'UTF-32LE', 'UTF-8');
3987 13
            $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-32LE');
3988 13
            if ($test3 === $test) {
3989
                /**
3990
                 * @psalm-suppress RedundantCondition
3991
                 */
3992 13
                if ($str_chars === []) {
3993 13
                    $str_chars = self::count_chars($str, true, false);
3994
                }
3995 13
                foreach (self::count_chars($test3) as $test3char => &$test3charEmpty) {
0 ignored issues
show
Bug introduced by
The expression self::count_chars($test3) cannot be used as a reference.

Let?s assume that you have the following foreach statement:

foreach ($array as &$itemValue) { }

$itemValue is assigned by reference. This is possible because the expression (in the example $array) can be used as a reference target.

However, if we were to replace $array with something different like the result of a function call as in

foreach (getArray() as &$itemValue) { }

then assigning by reference is not possible anymore as there is no target that could be modified.

Available Fixes

1. Do not assign by reference
foreach (getArray() as $itemValue) { }
2. Assign to a local variable first
$array = getArray();
foreach ($array as &$itemValue) {}
3. Return a reference
function &getArray() { $array = array(); return $array; }

foreach (getArray() as &$itemValue) { }
Loading history...
Bug introduced by
It seems like $test3 can also be of type array; however, parameter $str of voku\helper\UTF8::count_chars() does only seem to accept string, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

3995
                foreach (self::count_chars(/** @scrutinizer ignore-type */ $test3) as $test3char => &$test3charEmpty) {
Loading history...
3996 13
                    if (\in_array($test3char, $str_chars, true)) {
3997 2
                        ++$maybe_utf32le;
3998
                    }
3999
                }
4000 13
                unset($test3charEmpty);
4001
            }
4002
        }
4003
4004 19
        $maybe_utf32be = 0;
4005 19
        $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-32BE');
4006 19
        if ($test) {
4007 13
            $test2 = \mb_convert_encoding($test, 'UTF-32BE', 'UTF-8');
4008 13
            $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-32BE');
4009 13
            if ($test3 === $test) {
4010 13
                if ($str_chars === []) {
4011 7
                    $str_chars = self::count_chars($str, true, false);
4012
                }
4013 13
                foreach (self::count_chars($test3) as $test3char => &$test3charEmpty) {
0 ignored issues
show
Bug introduced by
The expression self::count_chars($test3) cannot be used as a reference.

Let?s assume that you have the following foreach statement:

foreach ($array as &$itemValue) { }

$itemValue is assigned by reference. This is possible because the expression (in the example $array) can be used as a reference target.

However, if we were to replace $array with something different like the result of a function call as in

foreach (getArray() as &$itemValue) { }

then assigning by reference is not possible anymore as there is no target that could be modified.

Available Fixes

1. Do not assign by reference
foreach (getArray() as $itemValue) { }
2. Assign to a local variable first
$array = getArray();
foreach ($array as &$itemValue) {}
3. Return a reference
function &getArray() { $array = array(); return $array; }

foreach (getArray() as &$itemValue) { }
Loading history...
4014 13
                    if (\in_array($test3char, $str_chars, true)) {
4015 3
                        ++$maybe_utf32be;
4016
                    }
4017
                }
4018 13
                unset($test3charEmpty);
4019
            }
4020
        }
4021
4022 19
        if ($maybe_utf32be !== $maybe_utf32le) {
4023 3
            if ($maybe_utf32le > $maybe_utf32be) {
4024 2
                return 1;
4025
            }
4026
4027 3
            return 2;
4028
        }
4029
4030 19
        return false;
4031
    }
4032
4033
    /**
4034
     * Checks whether the passed input contains only byte sequences that appear valid UTF-8.
4035
     *
4036
     * EXAMPLE: <code>
4037
     * UTF8::is_utf8(['Iñtërnâtiônàlizætiøn', 'foo']); // true
4038
     * //
4039
     * UTF8::is_utf8(["Iñtërnâtiônàlizætiøn\xA0\xA1", 'bar']); // false
4040
     * </code>
4041
     *
4042
     * @param int|string|string[]|null $str    <p>The input to be checked.</p>
4043
     * @param bool                     $strict <p>Check also if the string is not UTF-16 or UTF-32.</p>
4044
     *
4045
     * @psalm-pure
4046
     *
4047
     * @return bool
4048
     */
4049 83
    public static function is_utf8($str, bool $strict = false): bool
4050
    {
4051 83
        if (\is_array($str)) {
4052 2
            foreach ($str as &$v) {
4053 2
                if (!self::is_utf8($v, $strict)) {
4054 2
                    return false;
4055
                }
4056
            }
4057
4058
            return true;
4059
        }
4060
4061 83
        return self::is_utf8_string((string) $str, $strict);
4062
    }
4063
4064
    /**
4065
     * (PHP 5 &gt;= 5.2.0, PECL json &gt;= 1.2.0)<br/>
4066
     * Decodes a JSON string
4067
     *
4068
     * EXAMPLE: <code>UTF8::json_decode('[1,"\u00a5","\u00e4"]'); // array(1, '¥', 'ä')</code>
4069
     *
4070
     * @see http://php.net/manual/en/function.json-decode.php
4071
     *
4072
     * @param string $json    <p>
4073
     *                        The <i>json</i> string being decoded.
4074
     *                        </p>
4075
     *                        <p>
4076
     *                        This function only works with UTF-8 encoded strings.
4077
     *                        </p>
4078
     *                        <p>PHP implements a superset of
4079
     *                        JSON - it will also encode and decode scalar types and <b>NULL</b>. The JSON standard
4080
     *                        only supports these values when they are nested inside an array or an object.
4081
     *                        </p>
4082
     * @param bool   $assoc   [optional] <p>
4083
     *                        When <b>TRUE</b>, returned objects will be converted into
4084
     *                        associative arrays.
4085
     *                        </p>
4086
     * @param int    $depth   [optional] <p>
4087
     *                        User specified recursion depth.
4088
     *                        </p>
4089
     * @param int    $options [optional] <p>
4090
     *                        Bitmask of JSON decode options. Currently only
4091
     *                        <b>JSON_BIGINT_AS_STRING</b>
4092
     *                        is supported (default is to cast large integers as floats)
4093
     *                        </p>
4094
     *
4095
     * @psalm-pure
4096
     *
4097
     * @return mixed
4098
     *               <p>The value encoded in <i>json</i> in appropriate PHP type. Values true, false and
4099
     *               null (case-insensitive) are returned as <b>TRUE</b>, <b>FALSE</b> and <b>NULL</b> respectively.
4100
     *               <b>NULL</b> is returned if the <i>json</i> cannot be decoded or if the encoded data
4101
     *               is deeper than the recursion limit.</p>
4102
     */
4103 43
    public static function json_decode(
4104
        string $json,
4105
        bool $assoc = false,
4106
        int $depth = 512,
4107
        int $options = 0
4108
    ) {
4109 43
        $json = self::filter($json);
4110
4111 43
        if (self::$SUPPORT['json'] === false) {
4112
            throw new \RuntimeException('ext-json: is not installed');
4113
        }
4114
4115 43
        return \json_decode($json, $assoc, $depth, $options);
4116
    }
4117
4118
    /**
4119
     * (PHP 5 &gt;= 5.2.0, PECL json &gt;= 1.2.0)<br/>
4120
     * Returns the JSON representation of a value.
4121
     *
4122
     * EXAMPLE: <code>UTF8::json_enocde(array(1, '¥', 'ä')); // '[1,"\u00a5","\u00e4"]'</code>
4123
     *
4124
     * @see http://php.net/manual/en/function.json-encode.php
4125
     *
4126
     * @param mixed $value   <p>
4127
     *                       The <i>value</i> being encoded. Can be any type except
4128
     *                       a resource.
4129
     *                       </p>
4130
     *                       <p>
4131
     *                       All string data must be UTF-8 encoded.
4132
     *                       </p>
4133
     *                       <p>PHP implements a superset of
4134
     *                       JSON - it will also encode and decode scalar types and <b>NULL</b>. The JSON standard
4135
     *                       only supports these values when they are nested inside an array or an object.
4136
     *                       </p>
4137
     * @param int   $options [optional] <p>
4138
     *                       Bitmask consisting of <b>JSON_HEX_QUOT</b>,
4139
     *                       <b>JSON_HEX_TAG</b>,
4140
     *                       <b>JSON_HEX_AMP</b>,
4141
     *                       <b>JSON_HEX_APOS</b>,
4142
     *                       <b>JSON_NUMERIC_CHECK</b>,
4143
     *                       <b>JSON_PRETTY_PRINT</b>,
4144
     *                       <b>JSON_UNESCAPED_SLASHES</b>,
4145
     *                       <b>JSON_FORCE_OBJECT</b>,
4146
     *                       <b>JSON_UNESCAPED_UNICODE</b>. The behaviour of these
4147
     *                       constants is described on
4148
     *                       the JSON constants page.
4149
     *                       </p>
4150
     * @param int   $depth   [optional] <p>
4151
     *                       Set the maximum depth. Must be greater than zero.
4152
     *                       </p>
4153
     *
4154
     * @psalm-pure
4155
     *
4156
     * @return false|string
4157
     *                      A JSON encoded <strong>string</strong> on success or<br>
4158
     *                      <strong>FALSE</strong> on failure
4159
     */
4160 5
    public static function json_encode($value, int $options = 0, int $depth = 512)
4161
    {
4162 5
        $value = self::filter($value);
4163
4164 5
        if (self::$SUPPORT['json'] === false) {
4165
            throw new \RuntimeException('ext-json: is not installed');
4166
        }
4167
4168 5
        return \json_encode($value, $options, $depth);
4169
    }
4170
4171
    /**
4172
     * Checks whether JSON is available on the server.
4173
     *
4174
     * @psalm-pure
4175
     *
4176
     * @return bool
4177
     *              <p><strong>true</strong> if available, <strong>false</strong> otherwise</p>
4178
     *
4179
     * @internal <p>Please do not use it anymore, we will make is private in next major version.</p>
4180
     */
4181
    public static function json_loaded(): bool
4182
    {
4183
        return \function_exists('json_decode');
4184
    }
4185
4186
    /**
4187
     * Makes string's first char lowercase.
4188
     *
4189
     * EXAMPLE: <code>UTF8::lcfirst('ÑTËRNÂTIÔNÀLIZÆTIØN'); // ñTËRNÂTIÔNÀLIZÆTIØN</code>
4190
     *
4191
     * @param string      $str                           <p>The input string</p>
4192
     * @param string      $encoding                      [optional] <p>Set the charset for e.g. "mb_" function</p>
4193
     * @param bool        $clean_utf8                    [optional] <p>Remove non UTF-8 chars from the string.</p>
4194
     * @param string|null $lang                          [optional] <p>Set the language for special cases: az, el, lt,
4195
     *                                                   tr</p>
4196
     * @param bool        $try_to_keep_the_string_length [optional] <p>true === try to keep the string length: e.g. ẞ
4197
     *                                                   -> ß</p>
4198
     *
4199
     * @psalm-pure
4200
     *
4201
     * @return string the resulting string
4202
     */
4203 46
    public static function lcfirst(
4204
        string $str,
4205
        string $encoding = 'UTF-8',
4206
        bool $clean_utf8 = false,
4207
        string $lang = null,
4208
        bool $try_to_keep_the_string_length = false
4209
    ): string {
4210 46
        if ($clean_utf8) {
4211
            $str = self::clean($str);
4212
        }
4213
4214 46
        $use_mb_functions = ($lang === null && !$try_to_keep_the_string_length);
4215
4216 46
        if ($encoding === 'UTF-8') {
4217 43
            $str_part_two = (string) \mb_substr($str, 1);
4218
4219 43
            if ($use_mb_functions) {
4220 43
                $str_part_one = \mb_strtolower(
4221 43
                    (string) \mb_substr($str, 0, 1)
4222
                );
4223
            } else {
4224
                $str_part_one = self::strtolower(
4225 43
                    (string) \mb_substr($str, 0, 1),
4226
                    $encoding,
4227
                    false,
4228
                    $lang,
4229
                    $try_to_keep_the_string_length
4230
                );
4231
            }
4232
        } else {
4233 3
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
4234
4235 3
            $str_part_two = (string) self::substr($str, 1, null, $encoding);
4236
4237 3
            $str_part_one = self::strtolower(
4238 3
                (string) self::substr($str, 0, 1, $encoding),
4239
                $encoding,
4240 3
                false,
4241
                $lang,
4242
                $try_to_keep_the_string_length
4243
            );
4244
        }
4245
4246 46
        return $str_part_one . $str_part_two;
4247
    }
4248
4249
    /**
4250
     * Lowercase for all words in the string.
4251
     *
4252
     * @param string      $str                           <p>The input string.</p>
4253
     * @param string[]    $exceptions                    [optional] <p>Exclusion for some words.</p>
4254
     * @param string      $char_list                     [optional] <p>Additional chars that contains to words and do
4255
     *                                                   not start a new word.</p>
4256
     * @param string      $encoding                      [optional] <p>Set the charset.</p>
4257
     * @param bool        $clean_utf8                    [optional] <p>Remove non UTF-8 chars from the string.</p>
4258
     * @param string|null $lang                          [optional] <p>Set the language for special cases: az, el, lt,
4259
     *                                                   tr</p>
4260
     * @param bool        $try_to_keep_the_string_length [optional] <p>true === try to keep the string length: e.g. ẞ
4261
     *                                                   -> ß</p>
4262
     *
4263
     * @psalm-pure
4264
     *
4265
     * @return string
4266
     */
4267 4
    public static function lcwords(
4268
        string $str,
4269
        array $exceptions = [],
4270
        string $char_list = '',
4271
        string $encoding = 'UTF-8',
4272
        bool $clean_utf8 = false,
4273
        string $lang = null,
4274
        bool $try_to_keep_the_string_length = false
4275
    ): string {
4276 4
        if (!$str) {
4277 2
            return '';
4278
        }
4279
4280 4
        $words = self::str_to_words($str, $char_list);
4281 4
        $use_exceptions = $exceptions !== [];
4282
4283 4
        $words_str = '';
4284 4
        foreach ($words as &$word) {
4285 4
            if (!$word) {
4286 4
                continue;
4287
            }
4288
4289
            if (
4290 4
                !$use_exceptions
4291
                ||
4292 4
                !\in_array($word, $exceptions, true)
4293
            ) {
4294 4
                $words_str .= self::lcfirst($word, $encoding, $clean_utf8, $lang, $try_to_keep_the_string_length);
4295
            } else {
4296 2
                $words_str .= $word;
4297
            }
4298
        }
4299
4300 4
        return $words_str;
4301
    }
4302
4303
    /**
4304
     * Strip whitespace or other characters from the beginning of a UTF-8 string.
4305
     *
4306
     * EXAMPLE: <code>UTF8::ltrim(' 中文空白  '); // '中文空白  '</code>
4307
     *
4308
     * @param string      $str   <p>The string to be trimmed</p>
4309
     * @param string|null $chars <p>Optional characters to be stripped</p>
4310
     *
4311
     * @psalm-pure
4312
     *
4313
     * @return string the string with unwanted characters stripped from the left
4314
     */
4315 23
    public static function ltrim(string $str = '', string $chars = null): string
4316
    {
4317 23
        if ($str === '') {
4318 3
            return '';
4319
        }
4320
4321 22
        if (self::$SUPPORT['mbstring'] === true) {
4322 22
            if ($chars !== null) {
4323
                /** @noinspection PregQuoteUsageInspection */
4324 11
                $chars = \preg_quote($chars);
4325 11
                $pattern = "^[${chars}]+";
4326
            } else {
4327 14
                $pattern = '^[\\s]+';
4328
            }
4329
4330 22
            return (string) \mb_ereg_replace($pattern, '', $str);
4331
        }
4332
4333
        if ($chars !== null) {
4334
            $chars = \preg_quote($chars, '/');
4335
            $pattern = "^[${chars}]+";
4336
        } else {
4337
            $pattern = '^[\\s]+';
4338
        }
4339
4340
        return self::regex_replace($str, $pattern, '');
4341
    }
4342
4343
    /**
4344
     * Returns the UTF-8 character with the maximum code point in the given data.
4345
     *
4346
     * EXAMPLE: <code>UTF8::max('abc-äöü-中文空白'); // 'ø'</code>
4347
     *
4348
     * @param array<string>|string $arg <p>A UTF-8 encoded string or an array of such strings.</p>
4349
     *
4350
     * @psalm-pure
4351
     *
4352
     * @return string|null the character with the highest code point than others, returns null on failure or empty input
4353
     */
4354 2
    public static function max($arg)
4355
    {
4356 2
        if (\is_array($arg)) {
4357 2
            $arg = \implode('', $arg);
4358
        }
4359
4360 2
        $codepoints = self::codepoints($arg);
4361 2
        if ($codepoints === []) {
4362 2
            return null;
4363
        }
4364
4365 2
        $codepoint_max = \max($codepoints);
4366
4367 2
        return self::chr((int) $codepoint_max);
4368
    }
4369
4370
    /**
4371
     * Calculates and returns the maximum number of bytes taken by any
4372
     * UTF-8 encoded character in the given string.
4373
     *
4374
     * EXAMPLE: <code>UTF8::max_chr_width('Intërnâtiônàlizætiøn'); // 2</code>
4375
     *
4376
     * @param string $str <p>The original Unicode string.</p>
4377
     *
4378
     * @psalm-pure
4379
     *
4380
     * @return int
4381
     *             <p>Max byte lengths of the given chars.</p>
4382
     */
4383 2
    public static function max_chr_width(string $str): int
4384
    {
4385 2
        $bytes = self::chr_size_list($str);
4386 2
        if ($bytes !== []) {
4387 2
            return (int) \max($bytes);
4388
        }
4389
4390 2
        return 0;
4391
    }
4392
4393
    /**
4394
     * Checks whether mbstring is available on the server.
4395
     *
4396
     * @psalm-pure
4397
     *
4398
     * @return bool
4399
     *              <p><strong>true</strong> if available, <strong>false</strong> otherwise</p>
4400
     *
4401
     * @internal <p>Please do not use it anymore, we will make is private in next major version.</p>
4402
     */
4403 29
    public static function mbstring_loaded(): bool
4404
    {
4405 29
        return \extension_loaded('mbstring');
4406
    }
4407
4408
    /**
4409
     * Returns the UTF-8 character with the minimum code point in the given data.
4410
     *
4411
     * EXAMPLE: <code>UTF8::min('abc-äöü-中文空白'); // '-'</code>
4412
     *
4413
     * @param string|string[] $arg <strong>A UTF-8 encoded string or an array of such strings.</strong>
4414
     *
4415
     * @psalm-pure
4416
     *
4417
     * @return string|null
4418
     *                     <p>The character with the lowest code point than others, returns null on failure or empty input.</p>
4419
     */
4420 2
    public static function min($arg)
4421
    {
4422 2
        if (\is_array($arg)) {
4423 2
            $arg = \implode('', $arg);
4424
        }
4425
4426 2
        $codepoints = self::codepoints($arg);
4427 2
        if ($codepoints === []) {
4428 2
            return null;
4429
        }
4430
4431 2
        $codepoint_min = \min($codepoints);
4432
4433 2
        return self::chr((int) $codepoint_min);
4434
    }
4435
4436
    /**
4437
     * Normalize the encoding-"name" input.
4438
     *
4439
     * EXAMPLE: <code>UTF8::normalize_encoding('UTF8'); // 'UTF-8'</code>
4440
     *
4441
     * @param mixed $encoding <p>e.g.: ISO, UTF8, WINDOWS-1251 etc.</p>
4442
     * @param mixed $fallback <p>e.g.: UTF-8</p>
4443
     *
4444
     * @psalm-pure
4445
     *
4446
     * @return mixed|string
4447
     *                      <p>e.g.: ISO-8859-1, UTF-8, WINDOWS-1251 etc.<br>Will return a empty string as fallback (by default)</p>
4448
     *
4449
     * @template TNormalizeEncodingFallback
4450
     * @phpstan-param string|TNormalizeEncodingFallback $fallback
4451
     * @phpstan-return string|TNormalizeEncodingFallback
4452
     */
4453 339
    public static function normalize_encoding($encoding, $fallback = '')
4454
    {
4455
        /**
4456
         * @psalm-suppress ImpureStaticVariable
4457
         *
4458
         * @var array<string,string>
4459
         */
4460 339
        static $STATIC_NORMALIZE_ENCODING_CACHE = [];
4461
4462
        // init
4463 339
        $encoding = (string) $encoding;
4464
4465 339
        if (!$encoding) {
4466 290
            return $fallback;
4467
        }
4468
4469
        if (
4470 53
            $encoding === 'UTF-8'
4471
            ||
4472 53
            $encoding === 'UTF8'
4473
        ) {
4474 29
            return 'UTF-8';
4475
        }
4476
4477
        if (
4478 44
            $encoding === '8BIT'
4479
            ||
4480 44
            $encoding === 'BINARY'
4481
        ) {
4482
            return 'CP850';
4483
        }
4484
4485
        if (
4486 44
            $encoding === 'HTML'
4487
            ||
4488 44
            $encoding === 'HTML-ENTITIES'
4489
        ) {
4490 2
            return 'HTML-ENTITIES';
4491
        }
4492
4493
        if (
4494 44
            $encoding === 'ISO'
4495
            ||
4496 44
            $encoding === 'ISO-8859-1'
4497
        ) {
4498 41
            return 'ISO-8859-1';
4499
        }
4500
4501
        if (
4502 11
            $encoding === '1' // only a fallback, for non "strict_types" usage ...
4503
            ||
4504 11
            $encoding === '0' // only a fallback, for non "strict_types" usage ...
4505
        ) {
4506
            return $fallback;
4507
        }
4508
4509 11
        if (isset($STATIC_NORMALIZE_ENCODING_CACHE[$encoding])) {
4510 8
            return $STATIC_NORMALIZE_ENCODING_CACHE[$encoding];
4511
        }
4512
4513 5
        if (self::$ENCODINGS === null) {
4514 1
            self::$ENCODINGS = self::getData('encodings');
4515
        }
4516
4517 5
        if (\in_array($encoding, self::$ENCODINGS, true)) {
4518 3
            $STATIC_NORMALIZE_ENCODING_CACHE[$encoding] = $encoding;
4519
4520 3
            return $encoding;
4521
        }
4522
4523 4
        $encoding_original = $encoding;
4524 4
        $encoding = \strtoupper($encoding);
4525 4
        $encoding_upper_helper = (string) \preg_replace('/[^a-zA-Z0-9]/u', '', $encoding);
4526
4527
        $equivalences = [
4528 4
            'ISO8859'     => 'ISO-8859-1',
4529
            'ISO88591'    => 'ISO-8859-1',
4530
            'ISO'         => 'ISO-8859-1',
4531
            'LATIN'       => 'ISO-8859-1',
4532
            'LATIN1'      => 'ISO-8859-1', // Western European
4533
            'ISO88592'    => 'ISO-8859-2',
4534
            'LATIN2'      => 'ISO-8859-2', // Central European
4535
            'ISO88593'    => 'ISO-8859-3',
4536
            'LATIN3'      => 'ISO-8859-3', // Southern European
4537
            'ISO88594'    => 'ISO-8859-4',
4538
            'LATIN4'      => 'ISO-8859-4', // Northern European
4539
            'ISO88595'    => 'ISO-8859-5',
4540
            'ISO88596'    => 'ISO-8859-6', // Greek
4541
            'ISO88597'    => 'ISO-8859-7',
4542
            'ISO88598'    => 'ISO-8859-8', // Hebrew
4543
            'ISO88599'    => 'ISO-8859-9',
4544
            'LATIN5'      => 'ISO-8859-9', // Turkish
4545
            'ISO885911'   => 'ISO-8859-11',
4546
            'TIS620'      => 'ISO-8859-11', // Thai
4547
            'ISO885910'   => 'ISO-8859-10',
4548
            'LATIN6'      => 'ISO-8859-10', // Nordic
4549
            'ISO885913'   => 'ISO-8859-13',
4550
            'LATIN7'      => 'ISO-8859-13', // Baltic
4551
            'ISO885914'   => 'ISO-8859-14',
4552
            'LATIN8'      => 'ISO-8859-14', // Celtic
4553
            'ISO885915'   => 'ISO-8859-15',
4554
            'LATIN9'      => 'ISO-8859-15', // Western European (with some extra chars e.g. €)
4555
            'ISO885916'   => 'ISO-8859-16',
4556
            'LATIN10'     => 'ISO-8859-16', // Southeast European
4557
            'CP1250'      => 'WINDOWS-1250',
4558
            'WIN1250'     => 'WINDOWS-1250',
4559
            'WINDOWS1250' => 'WINDOWS-1250',
4560
            'CP1251'      => 'WINDOWS-1251',
4561
            'WIN1251'     => 'WINDOWS-1251',
4562
            'WINDOWS1251' => 'WINDOWS-1251',
4563
            'CP1252'      => 'WINDOWS-1252',
4564
            'WIN1252'     => 'WINDOWS-1252',
4565
            'WINDOWS1252' => 'WINDOWS-1252',
4566
            'CP1253'      => 'WINDOWS-1253',
4567
            'WIN1253'     => 'WINDOWS-1253',
4568
            'WINDOWS1253' => 'WINDOWS-1253',
4569
            'CP1254'      => 'WINDOWS-1254',
4570
            'WIN1254'     => 'WINDOWS-1254',
4571
            'WINDOWS1254' => 'WINDOWS-1254',
4572
            'CP1255'      => 'WINDOWS-1255',
4573
            'WIN1255'     => 'WINDOWS-1255',
4574
            'WINDOWS1255' => 'WINDOWS-1255',
4575
            'CP1256'      => 'WINDOWS-1256',
4576
            'WIN1256'     => 'WINDOWS-1256',
4577
            'WINDOWS1256' => 'WINDOWS-1256',
4578
            'CP1257'      => 'WINDOWS-1257',
4579
            'WIN1257'     => 'WINDOWS-1257',
4580
            'WINDOWS1257' => 'WINDOWS-1257',
4581
            'CP1258'      => 'WINDOWS-1258',
4582
            'WIN1258'     => 'WINDOWS-1258',
4583
            'WINDOWS1258' => 'WINDOWS-1258',
4584
            'UTF16'       => 'UTF-16',
4585
            'UTF32'       => 'UTF-32',
4586
            'UTF8'        => 'UTF-8',
4587
            'UTF'         => 'UTF-8',
4588
            'UTF7'        => 'UTF-7',
4589
            '8BIT'        => 'CP850',
4590
            'BINARY'      => 'CP850',
4591
        ];
4592
4593 4
        if (!empty($equivalences[$encoding_upper_helper])) {
4594 3
            $encoding = $equivalences[$encoding_upper_helper];
4595
        }
4596
4597 4
        $STATIC_NORMALIZE_ENCODING_CACHE[$encoding_original] = $encoding;
4598
4599 4
        return $encoding;
4600
    }
4601
4602
    /**
4603
     * Standardize line ending to unix-like.
4604
     *
4605
     * @param string          $str      <p>The input string.</p>
4606
     * @param string|string[] $replacer <p>The replacer char e.g. "\n" (Linux) or "\r\n" (Windows). You can also use \PHP_EOL
4607
     *                                  here.</p>
4608
     *
4609
     * @psalm-pure
4610
     *
4611
     * @return string
4612
     *                <p>A string with normalized line ending.</p>
4613
     */
4614 4
    public static function normalize_line_ending(string $str, $replacer = "\n"): string
4615
    {
4616 4
        return \str_replace(["\r\n", "\r", "\n"], $replacer, $str);
4617
    }
4618
4619
    /**
4620
     * Normalize some MS Word special characters.
4621
     *
4622
     * EXAMPLE: <code>UTF8::normalize_msword('„Abcdef…”'); // '"Abcdef..."'</code>
4623
     *
4624
     * @param string $str <p>The string to be normalized.</p>
4625
     *
4626
     * @psalm-pure
4627
     *
4628
     * @return string
4629
     *                <p>A string with normalized characters for commonly used chars in Word documents.</p>
4630
     */
4631 10
    public static function normalize_msword(string $str): string
4632
    {
4633 10
        return ASCII::normalize_msword($str);
4634
    }
4635
4636
    /**
4637
     * Normalize the whitespace.
4638
     *
4639
     * EXAMPLE: <code>UTF8::normalize_whitespace("abc-\xc2\xa0-öäü-\xe2\x80\xaf-\xE2\x80\xAC", true); // "abc-\xc2\xa0-öäü- -"</code>
4640
     *
4641
     * @param string $str                          <p>The string to be normalized.</p>
4642
     * @param bool   $keep_non_breaking_space      [optional] <p>Set to true, to keep non-breaking-spaces.</p>
4643
     * @param bool   $keep_bidi_unicode_controls   [optional] <p>Set to true, to keep non-printable (for the web)
4644
     *                                             bidirectional text chars.</p>
4645
     * @param bool   $normalize_control_characters [optional] <p>Set to true, to convert e.g. LINE-, PARAGRAPH-SEPARATOR with "\n" and LINE TABULATION with "\t".</p>
4646
     *
4647
     * @psalm-pure
4648
     *
4649
     * @return string
4650
     *                <p>A string with normalized whitespace.</p>
4651
     */
4652 61
    public static function normalize_whitespace(
4653
        string $str,
4654
        bool $keep_non_breaking_space = false,
4655
        bool $keep_bidi_unicode_controls = false,
4656
        bool $normalize_control_characters = false
4657
    ): string {
4658 61
        return ASCII::normalize_whitespace(
4659 61
            $str,
4660
            $keep_non_breaking_space,
4661
            $keep_bidi_unicode_controls,
4662
            $normalize_control_characters
4663
        );
4664
    }
4665
4666
    /**
4667
     * Calculates Unicode code point of the given UTF-8 encoded character.
4668
     *
4669
     * INFO: opposite to UTF8::chr()
4670
     *
4671
     * EXAMPLE: <code>UTF8::ord('☃'); // 0x2603</code>
4672
     *
4673
     * @param string $chr      <p>The character of which to calculate code point.<p/>
4674
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
4675
     *
4676
     * @psalm-pure
4677
     *
4678
     * @return int
4679
     *             <p>Unicode code point of the given character,<br>
4680
     *             0 on invalid UTF-8 byte sequence</p>
4681
     */
4682 27
    public static function ord($chr, string $encoding = 'UTF-8'): int
4683
    {
4684
        /**
4685
         * @psalm-suppress ImpureStaticVariable
4686
         *
4687
         * @var array<string,int>
4688
         */
4689 27
        static $CHAR_CACHE = [];
4690
4691
        // init
4692 27
        $chr = (string) $chr;
4693
4694 27
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
4695 5
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
4696
        }
4697
4698 27
        $cache_key = $chr . '_' . $encoding;
4699 27
        if (isset($CHAR_CACHE[$cache_key])) {
4700 27
            return $CHAR_CACHE[$cache_key];
4701
        }
4702
4703
        // check again, if it's still not UTF-8
4704 11
        if ($encoding !== 'UTF-8') {
4705 3
            $chr = self::encode($encoding, $chr);
4706
        }
4707
4708 11
        if (self::$ORD === null) {
4709 1
            self::$ORD = self::getData('ord');
4710
        }
4711
4712 11
        if (isset(self::$ORD[$chr])) {
4713 11
            return $CHAR_CACHE[$cache_key] = self::$ORD[$chr];
4714
        }
4715
4716
        //
4717
        // fallback via "IntlChar"
4718
        //
4719
4720 6
        if (self::$SUPPORT['intlChar'] === true) {
4721 5
            $code = \IntlChar::ord($chr);
4722 5
            if ($code) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $code of type integer|null is loosely compared to true; this is ambiguous if the integer can be 0. You might want to explicitly use !== null instead.

In PHP, under loose comparison (like ==, or !=, or switch conditions), values of different types might be equal.

For integer values, zero is a special case, in particular the following results might be unexpected:

0   == false // true
0   == null  // true
123 == false // false
123 == null  // false

// It is often better to use strict comparison
0 === false // false
0 === null  // false
Loading history...
4723 5
                return $CHAR_CACHE[$cache_key] = $code;
4724
            }
4725
        }
4726
4727
        //
4728
        // fallback via vanilla php
4729
        //
4730
4731 1
        $chr = \unpack('C*', (string) \substr($chr, 0, 4));
4732
        /** @noinspection PhpSillyAssignmentInspection - hack for phpstan */
4733
        /** @var int[] $chr - "unpack": only false if the format string contains errors */
4734 1
        $chr = $chr;
4735 1
        $code = $chr ? $chr[1] : 0;
4736
4737 1
        if ($code >= 0xF0 && isset($chr[4])) {
4738
            return $CHAR_CACHE[$cache_key] = ((($code - 0xF0) << 18) + (($chr[2] - 0x80) << 12) + (($chr[3] - 0x80) << 6) + $chr[4] - 0x80);
4739
        }
4740
4741 1
        if ($code >= 0xE0 && isset($chr[3])) {
4742 1
            return $CHAR_CACHE[$cache_key] = ((($code - 0xE0) << 12) + (($chr[2] - 0x80) << 6) + $chr[3] - 0x80);
4743
        }
4744
4745 1
        if ($code >= 0xC0 && isset($chr[2])) {
4746 1
            return $CHAR_CACHE[$cache_key] = ((($code - 0xC0) << 6) + $chr[2] - 0x80);
4747
        }
4748
4749
        return $CHAR_CACHE[$cache_key] = $code;
4750
    }
4751
4752
    /**
4753
     * Parses the string into an array (into the the second parameter).
4754
     *
4755
     * WARNING: Unlike "parse_str()", this method does not (re-)place variables in the current scope,
4756
     *          if the second parameter is not set!
4757
     *
4758
     * EXAMPLE: <code>
4759
     * UTF8::parse_str('Iñtërnâtiônéàlizætiøn=測試&arr[]=foo+測試&arr[]=ການທົດສອບ', $array);
4760
     * echo $array['Iñtërnâtiônéàlizætiøn']; // '測試'
4761
     * </code>
4762
     *
4763
     * @see http://php.net/manual/en/function.parse-str.php
4764
     *
4765
     * @param string $str        <p>The input string.</p>
4766
     * @param array  $result     <p>The result will be returned into this reference parameter.</p>
4767
     * @param bool   $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
4768
     *
4769
     * @psalm-pure
4770
     *
4771
     * @return bool
4772
     *              <p>Will return <strong>false</strong> if php can't parse the string and we haven't any $result.</p>
4773
     */
4774 2
    public static function parse_str(string $str, &$result, bool $clean_utf8 = false): bool
4775
    {
4776 2
        if ($clean_utf8) {
4777 2
            $str = self::clean($str);
4778
        }
4779
4780 2
        if (self::$SUPPORT['mbstring'] === true) {
4781 2
            $return = \mb_parse_str($str, $result);
4782
4783 2
            return $return !== false && $result !== [];
4784
        }
4785
4786
        /**
4787
         * @psalm-suppress ImpureFunctionCall - we use the second parameter, so we don't change variables by magic
4788
         */
4789
        \parse_str($str, $result);
4790
4791
        return $result !== [];
4792
    }
4793
4794
    /**
4795
     * Checks if \u modifier is available that enables Unicode support in PCRE.
4796
     *
4797
     * @psalm-pure
4798
     *
4799
     * @return bool
4800
     *              <p>
4801
     *              <strong>true</strong> if support is available,<br>
4802
     *              <strong>false</strong> otherwise
4803
     *              </p>
4804
     */
4805
    public static function pcre_utf8_support(): bool
4806
    {
4807
        /** @noinspection PhpUsageOfSilenceOperatorInspection */
4808
        return (bool) @\preg_match('//u', '');
4809
    }
4810
4811
    /**
4812
     * Create an array containing a range of UTF-8 characters.
4813
     *
4814
     * EXAMPLE: <code>UTF8::range('κ', 'ζ'); // array('κ', 'ι', 'θ', 'η', 'ζ',)</code>
4815
     *
4816
     * @param int|string $var1      <p>Numeric or hexadecimal code points, or a UTF-8 character to start from.</p>
4817
     * @param int|string $var2      <p>Numeric or hexadecimal code points, or a UTF-8 character to end at.</p>
4818
     * @param bool       $use_ctype <p>use ctype to detect numeric and hexadecimal, otherwise we will use a simple
4819
     *                              "is_numeric"</p>
4820
     * @param string     $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
4821
     * @param float|int  $step      [optional] <p>
4822
     *                              If a step value is given, it will be used as the
4823
     *                              increment between elements in the sequence. step
4824
     *                              should be given as a positive number. If not specified,
4825
     *                              step will default to 1.
4826
     *                              </p>
4827
     *
4828
     * @psalm-pure
4829
     *
4830
     * @return string[]
4831
     */
4832 2
    public static function range(
4833
        $var1,
4834
        $var2,
4835
        bool $use_ctype = true,
4836
        string $encoding = 'UTF-8',
4837
        $step = 1
4838
    ): array {
4839 2
        if (!$var1 || !$var2) {
4840 2
            return [];
4841
        }
4842
4843 2
        if ($step !== 1) {
4844
            /**
4845
             * @psalm-suppress RedundantConditionGivenDocblockType
4846
             * @psalm-suppress DocblockTypeContradiction
4847
             */
4848 1
            if (!\is_numeric($step)) {
0 ignored issues
show
introduced by
The condition is_numeric($step) is always true.
Loading history...
4849
                throw new \InvalidArgumentException('$step need to be a number, type given: ' . \gettype($step));
4850
            }
4851
4852
            /**
4853
             * @psalm-suppress RedundantConditionGivenDocblockType - false-positive from psalm?
4854
             */
4855 1
            if ($step <= 0) {
4856
                throw new \InvalidArgumentException('$step need to be a positive number, given: ' . $step);
4857
            }
4858
        }
4859
4860 2
        if ($use_ctype && self::$SUPPORT['ctype'] === false) {
4861
            throw new \RuntimeException('ext-ctype: is not installed');
4862
        }
4863
4864 2
        $is_digit = false;
4865 2
        $is_xdigit = false;
4866
4867 2
        if ($use_ctype && \ctype_digit((string) $var1) && \ctype_digit((string) $var2)) {
4868 2
            $is_digit = true;
4869 2
            $start = (int) $var1;
4870 2
        } elseif ($use_ctype && \ctype_xdigit($var1) && \ctype_xdigit($var2)) {
4871
            $is_xdigit = true;
4872
            $start = (int) self::hex_to_int((string) $var1);
4873 2
        } elseif (!$use_ctype && \is_numeric($var1)) {
4874 1
            $start = (int) $var1;
4875
        } else {
4876 2
            $start = self::ord((string) $var1);
4877
        }
4878
4879 2
        if (!$start) {
4880
            return [];
4881
        }
4882
4883 2
        if ($is_digit) {
4884 2
            $end = (int) $var2;
4885 2
        } elseif ($is_xdigit) {
4886
            $end = (int) self::hex_to_int((string) $var2);
4887 2
        } elseif (!$use_ctype && \is_numeric($var2)) {
4888 1
            $end = (int) $var2;
4889
        } else {
4890 2
            $end = self::ord((string) $var2);
4891
        }
4892
4893 2
        if (!$end) {
4894
            return [];
4895
        }
4896
4897 2
        $array = [];
4898 2
        foreach (\range($start, $end, $step) as $i) {
4899 2
            $array[] = (string) self::chr((int) $i, $encoding);
4900
        }
4901
4902 2
        return $array;
4903
    }
4904
4905
    /**
4906
     * Multi decode HTML entity + fix urlencoded-win1252-chars.
4907
     *
4908
     * EXAMPLE: <code>UTF8::rawurldecode('tes%20öäü%20\u00edtest+test'); // 'tes öäü ítest+test'</code>
4909
     *
4910
     * e.g:
4911
     * 'test+test'                     => 'test+test'
4912
     * 'D&#252;sseldorf'               => 'Düsseldorf'
4913
     * 'D%FCsseldorf'                  => 'Düsseldorf'
4914
     * 'D&#xFC;sseldorf'               => 'Düsseldorf'
4915
     * 'D%26%23xFC%3Bsseldorf'         => 'Düsseldorf'
4916
     * 'Düsseldorf'                   => 'Düsseldorf'
4917
     * 'D%C3%BCsseldorf'               => 'Düsseldorf'
4918
     * 'D%C3%83%C2%BCsseldorf'         => 'Düsseldorf'
4919
     * 'D%25C3%2583%25C2%25BCsseldorf' => 'Düsseldorf'
4920
     *
4921
     * @param string $str          <p>The input string.</p>
4922
     * @param bool   $multi_decode <p>Decode as often as possible.</p>
4923
     *
4924
     * @psalm-pure
4925
     *
4926
     * @return string
4927
     *                <p>The decoded URL, as a string.</p>
4928
     */
4929 6
    public static function rawurldecode(string $str, bool $multi_decode = true): string
4930
    {
4931 6
        if ($str === '') {
4932 4
            return '';
4933
        }
4934
4935 6
        $str = self::urldecode_unicode_helper($str);
4936
4937 6
        if ($multi_decode) {
4938
            do {
4939 5
                $str_compare = $str;
4940
4941
                /**
4942
                 * @psalm-suppress PossiblyInvalidArgument
4943
                 */
4944 5
                $str = \rawurldecode(
4945 5
                    self::html_entity_decode(
4946 5
                        self::to_utf8($str),
4947 5
                        \ENT_QUOTES | \ENT_HTML5
4948
                    )
4949
                );
4950 5
            } while ($str_compare !== $str);
4951
        } else {
4952
            /**
4953
             * @psalm-suppress PossiblyInvalidArgument
4954
             */
4955 1
            $str = \rawurldecode(
4956 1
                self::html_entity_decode(
4957 1
                    self::to_utf8($str),
4958 1
                    \ENT_QUOTES | \ENT_HTML5
4959
                )
4960
            );
4961
        }
4962
4963 6
        return self::fix_simple_utf8($str);
4964
    }
4965
4966
    /**
4967
     * Replaces all occurrences of $pattern in $str by $replacement.
4968
     *
4969
     * @param string $str         <p>The input string.</p>
4970
     * @param string $pattern     <p>The regular expression pattern.</p>
4971
     * @param string $replacement <p>The string to replace with.</p>
4972
     * @param string $options     [optional] <p>Matching conditions to be used.</p>
4973
     * @param string $delimiter   [optional] <p>Delimiter the the regex. Default: '/'</p>
4974
     *
4975
     * @psalm-pure
4976
     *
4977
     * @return string
4978
     */
4979 18
    public static function regex_replace(
4980
        string $str,
4981
        string $pattern,
4982
        string $replacement,
4983
        string $options = '',
4984
        string $delimiter = '/'
4985
    ): string {
4986 18
        if ($options === 'msr') {
4987 9
            $options = 'ms';
4988
        }
4989
4990
        // fallback
4991 18
        if (!$delimiter) {
4992
            $delimiter = '/';
4993
        }
4994
4995 18
        return (string) \preg_replace(
4996 18
            $delimiter . $pattern . $delimiter . 'u' . $options,
4997 18
            $replacement,
4998 18
            $str
4999
        );
5000
    }
5001
5002
    /**
5003
     * Remove the BOM from UTF-8 / UTF-16 / UTF-32 strings.
5004
     *
5005
     * EXAMPLE: <code>UTF8::remove_bom("\xEF\xBB\xBFΜπορώ να"); // 'Μπορώ να'</code>
5006
     *
5007
     * @param string $str <p>The input string.</p>
5008
     *
5009
     * @psalm-pure
5010
     *
5011
     * @return string
5012
     *                <p>A string without UTF-BOM.</p>
5013
     */
5014 54
    public static function remove_bom(string $str): string
5015
    {
5016 54
        if ($str === '') {
5017 9
            return '';
5018
        }
5019
5020 54
        $str_length = \strlen($str);
5021 54
        foreach (self::$BOM as $bom_string => $bom_byte_length) {
5022 54
            if (\strncmp($str, $bom_string, $bom_byte_length) === 0) {
5023
                /** @var false|string $str_tmp - needed for PhpStan (stubs error) */
5024 9
                $str_tmp = \substr($str, $bom_byte_length, $str_length);
5025 9
                if ($str_tmp === false) {
5026
                    return '';
5027
                }
5028
5029 9
                $str_length -= $bom_byte_length;
5030
5031 9
                $str = (string) $str_tmp;
5032
            }
5033
        }
5034
5035 54
        return $str;
5036
    }
5037
5038
    /**
5039
     * Removes duplicate occurrences of a string in another string.
5040
     *
5041
     * EXAMPLE: <code>UTF8::remove_duplicates('öäü-κόσμεκόσμε-äöü', 'κόσμε'); // 'öäü-κόσμε-äöü'</code>
5042
     *
5043
     * @param string          $str  <p>The base string.</p>
5044
     * @param string|string[] $what <p>String to search for in the base string.</p>
5045
     *
5046
     * @psalm-pure
5047
     *
5048
     * @return string
5049
     *                <p>A string with removed duplicates.</p>
5050
     */
5051 2
    public static function remove_duplicates(string $str, $what = ' '): string
5052
    {
5053 2
        if (\is_string($what)) {
5054 2
            $what = [$what];
5055
        }
5056
5057
        /**
5058
         * @psalm-suppress RedundantConditionGivenDocblockType
5059
         */
5060 2
        if (\is_array($what)) {
0 ignored issues
show
introduced by
The condition is_array($what) is always true.
Loading history...
5061 2
            foreach ($what as $item) {
5062 2
                $str = (string) \preg_replace('/(' . \preg_quote($item, '/') . ')+/u', $item, $str);
5063
            }
5064
        }
5065
5066 2
        return $str;
5067
    }
5068
5069
    /**
5070
     * Remove html via "strip_tags()" from the string.
5071
     *
5072
     * @param string $str            <p>The input string.</p>
5073
     * @param string $allowable_tags [optional] <p>You can use the optional second parameter to specify tags which
5074
     *                               should not be stripped. Default: null
5075
     *                               </p>
5076
     *
5077
     * @psalm-pure
5078
     *
5079
     * @return string
5080
     *                <p>A string with without html tags.</p>
5081
     */
5082 6
    public static function remove_html(string $str, string $allowable_tags = ''): string
5083
    {
5084 6
        return \strip_tags($str, $allowable_tags);
5085
    }
5086
5087
    /**
5088
     * Remove all breaks [<br> | \r\n | \r | \n | ...] from the string.
5089
     *
5090
     * @param string $str         <p>The input string.</p>
5091
     * @param string $replacement [optional] <p>Default is a empty string.</p>
5092
     *
5093
     * @psalm-pure
5094
     *
5095
     * @return string
5096
     *                <p>A string without breaks.</p>
5097
     */
5098 6
    public static function remove_html_breaks(string $str, string $replacement = ''): string
5099
    {
5100 6
        return (string) \preg_replace("#/\r\n|\r|\n|<br.*/?>#isU", $replacement, $str);
5101
    }
5102
5103
    /**
5104
     * Remove invisible characters from a string.
5105
     *
5106
     * e.g.: This prevents sandwiching null characters between ascii characters, like Java\0script.
5107
     *
5108
     * EXAMPLE: <code>UTF8::remove_invisible_characters("κόσ\0με"); // 'κόσμε'</code>
5109
     *
5110
     * copy&past from https://github.com/bcit-ci/CodeIgniter/blob/develop/system/core/Common.php
5111
     *
5112
     * @param string $str                           <p>The input string.</p>
5113
     * @param bool   $url_encoded                   [optional] <p>
5114
     *                                              Try to remove url encoded control character.
5115
     *                                              WARNING: maybe contains false-positives e.g. aa%0Baa -> aaaa.
5116
     *                                              <br>
5117
     *                                              Default: false
5118
     *                                              </p>
5119
     * @param string $replacement                   [optional] <p>The replacement character.</p>
5120
     * @param bool   $keep_basic_control_characters [optional] <p>Keep control characters like [LRM] or [LSEP].</p>
5121
     *
5122
     * @psalm-pure
5123
     *
5124
     * @return string
5125
     *                <p>A string without invisible chars.</p>
5126
     */
5127 92
    public static function remove_invisible_characters(
5128
        string $str,
5129
        bool $url_encoded = false,
5130
        string $replacement = '',
5131
        bool $keep_basic_control_characters = true
5132
    ): string {
5133 92
        return ASCII::remove_invisible_characters(
5134 92
            $str,
5135
            $url_encoded,
5136
            $replacement,
5137
            $keep_basic_control_characters
5138
        );
5139
    }
5140
5141
    /**
5142
     * Returns a new string with the prefix $substring removed, if present.
5143
     *
5144
     * @param string $str       <p>The input string.</p>
5145
     * @param string $substring <p>The prefix to remove.</p>
5146
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
5147
     *
5148
     * @psalm-pure
5149
     *
5150
     * @return string
5151
     *                <p>A string without the prefix $substring.</p>
5152
     */
5153 12
    public static function remove_left(
5154
        string $str,
5155
        string $substring,
5156
        string $encoding = 'UTF-8'
5157
    ): string {
5158
        if (
5159 12
            $substring
5160
            &&
5161 12
            \strpos($str, $substring) === 0
5162
        ) {
5163 6
            if ($encoding === 'UTF-8') {
5164 4
                return (string) \mb_substr(
5165 4
                    $str,
5166 4
                    (int) \mb_strlen($substring)
5167
                );
5168
            }
5169
5170 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
5171
5172 2
            return (string) self::substr(
5173 2
                $str,
5174 2
                (int) self::strlen($substring, $encoding),
5175 2
                null,
5176
                $encoding
5177
            );
5178
        }
5179
5180 6
        return $str;
5181
    }
5182
5183
    /**
5184
     * Returns a new string with the suffix $substring removed, if present.
5185
     *
5186
     * @param string $str
5187
     * @param string $substring <p>The suffix to remove.</p>
5188
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
5189
     *
5190
     * @psalm-pure
5191
     *
5192
     * @return string
5193
     *                <p>A string having a $str without the suffix $substring.</p>
5194
     */
5195 12
    public static function remove_right(
5196
        string $str,
5197
        string $substring,
5198
        string $encoding = 'UTF-8'
5199
    ): string {
5200 12
        if ($substring && \substr($str, -\strlen($substring)) === $substring) {
5201 6
            if ($encoding === 'UTF-8') {
5202 4
                return (string) \mb_substr(
5203 4
                    $str,
5204 4
                    0,
5205 4
                    (int) \mb_strlen($str) - (int) \mb_strlen($substring)
5206
                );
5207
            }
5208
5209 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
5210
5211 2
            return (string) self::substr(
5212 2
                $str,
5213 2
                0,
5214 2
                (int) self::strlen($str, $encoding) - (int) self::strlen($substring, $encoding),
5215
                $encoding
5216
            );
5217
        }
5218
5219 6
        return $str;
5220
    }
5221
5222
    /**
5223
     * Replaces all occurrences of $search in $str by $replacement.
5224
     *
5225
     * @param string $str            <p>The input string.</p>
5226
     * @param string $search         <p>The needle to search for.</p>
5227
     * @param string $replacement    <p>The string to replace with.</p>
5228
     * @param bool   $case_sensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
5229
     *
5230
     * @psalm-pure
5231
     *
5232
     * @return string
5233
     *                <p>A string with replaced parts.</p>
5234
     */
5235 29
    public static function replace(
5236
        string $str,
5237
        string $search,
5238
        string $replacement,
5239
        bool $case_sensitive = true
5240
    ): string {
5241 29
        if ($case_sensitive) {
5242 22
            return \str_replace($search, $replacement, $str);
5243
        }
5244
5245 7
        return self::str_ireplace($search, $replacement, $str);
0 ignored issues
show
Bug Best Practice introduced by
The expression return self::str_ireplac...ch, $replacement, $str) could return the type string[] which is incompatible with the type-hinted return string. Consider adding an additional type-check to rule them out.
Loading history...
5246
    }
5247
5248
    /**
5249
     * Replaces all occurrences of $search in $str by $replacement.
5250
     *
5251
     * @param string       $str            <p>The input string.</p>
5252
     * @param array        $search         <p>The elements to search for.</p>
5253
     * @param array|string $replacement    <p>The string to replace with.</p>
5254
     * @param bool         $case_sensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
5255
     *
5256
     * @psalm-pure
5257
     *
5258
     * @return string
5259
     *                <p>A string with replaced parts.</p>
5260
     */
5261 30
    public static function replace_all(
5262
        string $str,
5263
        array $search,
5264
        $replacement,
5265
        bool $case_sensitive = true
5266
    ): string {
5267 30
        if ($case_sensitive) {
5268 23
            return \str_replace($search, $replacement, $str);
5269
        }
5270
5271 7
        return self::str_ireplace($search, $replacement, $str);
0 ignored issues
show
Bug Best Practice introduced by
The expression return self::str_ireplac...ch, $replacement, $str) could return the type string[] which is incompatible with the type-hinted return string. Consider adding an additional type-check to rule them out.
Loading history...
5272
    }
5273
5274
    /**
5275
     * Replace the diamond question mark (�) and invalid-UTF8 chars with the replacement.
5276
     *
5277
     * EXAMPLE: <code>UTF8::replace_diamond_question_mark('中文空白�', ''); // '中文空白'</code>
5278
     *
5279
     * @param string $str                        <p>The input string</p>
5280
     * @param string $replacement_char           <p>The replacement character.</p>
5281
     * @param bool   $process_invalid_utf8_chars <p>Convert invalid UTF-8 chars </p>
5282
     *
5283
     * @psalm-pure
5284
     *
5285
     * @return string
5286
     *                <p>A string without diamond question marks (�).</p>
5287
     */
5288 35
    public static function replace_diamond_question_mark(
5289
        string $str,
5290
        string $replacement_char = '',
5291
        bool $process_invalid_utf8_chars = true
5292
    ): string {
5293 35
        if ($str === '') {
5294 9
            return '';
5295
        }
5296
5297 35
        if ($process_invalid_utf8_chars) {
5298 35
            if ($replacement_char === '') {
5299 35
                $replacement_char_helper = 'none';
5300
            } else {
5301 2
                $replacement_char_helper = \ord($replacement_char);
5302
            }
5303
5304 35
            if (self::$SUPPORT['mbstring'] === false) {
5305
                // if there is no native support for "mbstring",
5306
                // then we need to clean the string before ...
5307
                $str = self::clean($str);
5308
            }
5309
5310
            /**
5311
             * @psalm-suppress ImpureFunctionCall - we will reset the value in the next step
5312
             */
5313 35
            $save = \mb_substitute_character();
5314
            /** @noinspection PhpUsageOfSilenceOperatorInspection - ignore "Unknown character" warnings, it's working anyway */
5315 35
            @\mb_substitute_character($replacement_char_helper);
0 ignored issues
show
Security Best Practice introduced by
It seems like you do not handle an error condition for mb_substitute_character(). This can introduce security issues, and is generally not recommended. ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-unhandled  annotation

5315
            /** @scrutinizer ignore-unhandled */ @\mb_substitute_character($replacement_char_helper);

If you suppress an error, we recommend checking for the error condition explicitly:

// For example instead of
@mkdir($dir);

// Better use
if (@mkdir($dir) === false) {
    throw new \RuntimeException('The directory '.$dir.' could not be created.');
}
Loading history...
5316
            // the polyfill maybe return false, so cast to string
5317 35
            $str = (string) \mb_convert_encoding($str, 'UTF-8', 'UTF-8');
5318 35
            \mb_substitute_character($save);
0 ignored issues
show
Bug introduced by
It seems like $save can also be of type true; however, parameter $substitute_character of mb_substitute_character() does only seem to accept integer|null|string, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

5318
            \mb_substitute_character(/** @scrutinizer ignore-type */ $save);
Loading history...
5319
        }
5320
5321 35
        return \str_replace(
5322
            [
5323 35
                "\xEF\xBF\xBD",
5324
                '�',
5325
            ],
5326
            [
5327 35
                $replacement_char,
5328 35
                $replacement_char,
5329
            ],
5330 35
            $str
5331
        );
5332
    }
5333
5334
    /**
5335
     * Strip whitespace or other characters from the end of a UTF-8 string.
5336
     *
5337
     * EXAMPLE: <code>UTF8::rtrim('-ABC-中文空白-  '); // '-ABC-中文空白-'</code>
5338
     *
5339
     * @param string      $str   <p>The string to be trimmed.</p>
5340
     * @param string|null $chars <p>Optional characters to be stripped.</p>
5341
     *
5342
     * @psalm-pure
5343
     *
5344
     * @return string
5345
     *                <p>A string with unwanted characters stripped from the right.</p>
5346
     */
5347 21
    public static function rtrim(string $str = '', string $chars = null): string
5348
    {
5349 21
        if ($str === '') {
5350 3
            return '';
5351
        }
5352
5353 20
        if (self::$SUPPORT['mbstring'] === true) {
5354 20
            if ($chars !== null) {
5355
                /** @noinspection PregQuoteUsageInspection */
5356 9
                $chars = \preg_quote($chars);
5357 9
                $pattern = "[${chars}]+$";
5358
            } else {
5359 14
                $pattern = '[\\s]+$';
5360
            }
5361
5362 20
            return (string) \mb_ereg_replace($pattern, '', $str);
5363
        }
5364
5365
        if ($chars !== null) {
5366
            $chars = \preg_quote($chars, '/');
5367
            $pattern = "[${chars}]+$";
5368
        } else {
5369
            $pattern = '[\\s]+$';
5370
        }
5371
5372
        return self::regex_replace($str, $pattern, '');
5373
    }
5374
5375
    /**
5376
     * WARNING: Print native UTF-8 support (libs) by default, e.g. for debugging.
5377
     *
5378
     * @param bool $useEcho
5379
     *
5380
     * @psalm-pure
5381
     *
5382
     * @return string|void
5383
     */
5384 2
    public static function showSupport(bool $useEcho = true)
5385
    {
5386
        // init
5387 2
        $html = '';
5388
5389 2
        $html .= '<pre>';
5390 2
        foreach (self::$SUPPORT as $key => &$value) {
5391 2
            $html .= $key . ' - ' . \print_r($value, true) . "\n<br>";
0 ignored issues
show
Bug introduced by
Are you sure print_r($value, true) of type string|true can be used in concatenation? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

5391
            $html .= $key . ' - ' . /** @scrutinizer ignore-type */ \print_r($value, true) . "\n<br>";
Loading history...
5392
        }
5393 2
        $html .= '</pre>';
5394
5395 2
        if ($useEcho) {
5396 1
            echo $html;
5397
        }
5398
5399 2
        return $html;
5400
    }
5401
5402
    /**
5403
     * Converts a UTF-8 character to HTML Numbered Entity like "&#123;".
5404
     *
5405
     * EXAMPLE: <code>UTF8::single_chr_html_encode('κ'); // '&#954;'</code>
5406
     *
5407
     * @param string $char             <p>The Unicode character to be encoded as numbered entity.</p>
5408
     * @param bool   $keep_ascii_chars <p>Set to <strong>true</strong> to keep ASCII chars.</>
5409
     * @param string $encoding         [optional] <p>Set the charset for e.g. "mb_" function</p>
5410
     *
5411
     * @psalm-pure
5412
     *
5413
     * @return string
5414
     *                <p>The HTML numbered entity for the given character.</p>
5415
     */
5416 2
    public static function single_chr_html_encode(
5417
        string $char,
5418
        bool $keep_ascii_chars = false,
5419
        string $encoding = 'UTF-8'
5420
    ): string {
5421 2
        if ($char === '') {
5422 2
            return '';
5423
        }
5424
5425
        if (
5426 2
            $keep_ascii_chars
5427
            &&
5428 2
            ASCII::is_ascii($char)
5429
        ) {
5430 2
            return $char;
5431
        }
5432
5433 2
        return '&#' . self::ord($char, $encoding) . ';';
5434
    }
5435
5436
    /**
5437
     * @param string $str
5438
     * @param int    $tab_length
5439
     *
5440
     * @psalm-pure
5441
     *
5442
     * @return string
5443
     */
5444 5
    public static function spaces_to_tabs(string $str, int $tab_length = 4): string
5445
    {
5446 5
        if ($tab_length === 4) {
5447 3
            $tab = '    ';
5448 2
        } elseif ($tab_length === 2) {
5449 1
            $tab = '  ';
5450
        } else {
5451 1
            $tab = \str_repeat(' ', $tab_length);
5452
        }
5453
5454 5
        return \str_replace($tab, "\t", $str);
5455
    }
5456
5457
    /**
5458
     * Returns a camelCase version of the string. Trims surrounding spaces,
5459
     * capitalizes letters following digits, spaces, dashes and underscores,
5460
     * and removes spaces, dashes, as well as underscores.
5461
     *
5462
     * @param string      $str                           <p>The input string.</p>
5463
     * @param string      $encoding                      [optional] <p>Default: 'UTF-8'</p>
5464
     * @param bool        $clean_utf8                    [optional] <p>Remove non UTF-8 chars from the string.</p>
5465
     * @param string|null $lang                          [optional] <p>Set the language for special cases: az, el, lt,
5466
     *                                                   tr</p>
5467
     * @param bool        $try_to_keep_the_string_length [optional] <p>true === try to keep the string length: e.g. ẞ
5468
     *                                                   -> ß</p>
5469
     *
5470
     * @psalm-pure
5471
     *
5472
     * @return string
5473
     */
5474 32
    public static function str_camelize(
5475
        string $str,
5476
        string $encoding = 'UTF-8',
5477
        bool $clean_utf8 = false,
5478
        string $lang = null,
5479
        bool $try_to_keep_the_string_length = false
5480
    ): string {
5481 32
        if ($clean_utf8) {
5482
            $str = self::clean($str);
5483
        }
5484
5485 32
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
5486 26
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
5487
        }
5488
5489 32
        $str = self::lcfirst(
5490 32
            \trim($str),
5491 32
            $encoding,
5492 32
            false,
5493 32
            $lang,
5494 32
            $try_to_keep_the_string_length
5495
        );
5496 32
        $str = (string) \preg_replace('/^[-_]+/', '', $str);
5497
5498 32
        $use_mb_functions = $lang === null && !$try_to_keep_the_string_length;
5499
5500 32
        $str = (string) \preg_replace_callback(
5501 32
            '/[-_\\s]+(.)?/u',
5502
            /**
5503
             * @param array $match
5504
             *
5505
             * @psalm-pure
5506
             *
5507
             * @return string
5508
             */
5509 32
            static function (array $match) use ($use_mb_functions, $encoding, $lang, $try_to_keep_the_string_length): string {
5510 27
                if (isset($match[1])) {
5511 27
                    if ($use_mb_functions) {
5512 27
                        if ($encoding === 'UTF-8') {
5513 27
                            return \mb_strtoupper($match[1]);
5514
                        }
5515
5516
                        return \mb_strtoupper($match[1], $encoding);
5517
                    }
5518
5519
                    return self::strtoupper($match[1], $encoding, false, $lang, $try_to_keep_the_string_length);
5520
                }
5521
5522 1
                return '';
5523 32
            },
5524 32
            $str
5525
        );
5526
5527 32
        return (string) \preg_replace_callback(
5528 32
            '/[\\p{N}]+(.)?/u',
5529
            /**
5530
             * @param array $match
5531
             *
5532
             * @psalm-pure
5533
             *
5534
             * @return string
5535
             */
5536 32
            static function (array $match) use ($use_mb_functions, $encoding, $clean_utf8, $lang, $try_to_keep_the_string_length): string {
5537 6
                if ($use_mb_functions) {
5538 6
                    if ($encoding === 'UTF-8') {
5539 6
                        return \mb_strtoupper($match[0]);
5540
                    }
5541
5542
                    return \mb_strtoupper($match[0], $encoding);
5543
                }
5544
5545
                return self::strtoupper($match[0], $encoding, $clean_utf8, $lang, $try_to_keep_the_string_length);
5546 32
            },
5547 32
            $str
5548
        );
5549
    }
5550
5551
    /**
5552
     * Returns the string with the first letter of each word capitalized,
5553
     * except for when the word is a name which shouldn't be capitalized.
5554
     *
5555
     * @param string $str
5556
     *
5557
     * @psalm-pure
5558
     *
5559
     * @return string
5560
     *                <p>A string with $str capitalized.</p>
5561
     */
5562 1
    public static function str_capitalize_name(string $str): string
5563
    {
5564 1
        return self::str_capitalize_name_helper(
5565 1
            self::str_capitalize_name_helper(
5566 1
                self::collapse_whitespace($str),
5567 1
                ' '
5568
            ),
5569 1
            '-'
5570
        );
5571
    }
5572
5573
    /**
5574
     * Returns true if the string contains $needle, false otherwise. By default
5575
     * the comparison is case-sensitive, but can be made insensitive by setting
5576
     * $case_sensitive to false.
5577
     *
5578
     * @param string $haystack       <p>The input string.</p>
5579
     * @param string $needle         <p>Substring to look for.</p>
5580
     * @param bool   $case_sensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
5581
     *
5582
     * @psalm-pure
5583
     *
5584
     * @return bool
5585
     *              <p>Whether or not $haystack contains $needle.</p>
5586
     */
5587 21
    public static function str_contains(
5588
        string $haystack,
5589
        string $needle,
5590
        bool $case_sensitive = true
5591
    ): bool {
5592 21
        if ($case_sensitive) {
5593 11
            if (\PHP_VERSION_ID >= 80000) {
5594
                /** @phpstan-ignore-next-line - only for PHP8 */
5595
                return \str_contains($haystack, $needle);
5596
            }
5597
5598 11
            return \strpos($haystack, $needle) !== false;
5599
        }
5600
5601 10
        return \mb_stripos($haystack, $needle) !== false;
5602
    }
5603
5604
    /**
5605
     * Returns true if the string contains all $needles, false otherwise. By
5606
     * default the comparison is case-sensitive, but can be made insensitive by
5607
     * setting $case_sensitive to false.
5608
     *
5609
     * @param string $haystack       <p>The input string.</p>
5610
     * @param array  $needles        <p>SubStrings to look for.</p>
5611
     * @param bool   $case_sensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
5612
     *
5613
     * @psalm-pure
5614
     *
5615
     * @return bool
5616
     *              <p>Whether or not $haystack contains $needle.</p>
5617
     */
5618 45
    public static function str_contains_all(
5619
        string $haystack,
5620
        array $needles,
5621
        bool $case_sensitive = true
5622
    ): bool {
5623 45
        if ($haystack === '' || $needles === []) {
5624 1
            return false;
5625
        }
5626
5627 44
        foreach ($needles as &$needle) {
5628 44
            if ($case_sensitive) {
5629 24
                if (!$needle || \strpos($haystack, $needle) === false) {
5630 12
                    return false;
5631
                }
5632
            }
5633
5634 33
            if (!$needle || \mb_stripos($haystack, $needle) === false) {
5635 8
                return false;
5636
            }
5637
        }
5638
5639 24
        return true;
5640
    }
5641
5642
    /**
5643
     * Returns true if the string contains any $needles, false otherwise. By
5644
     * default the comparison is case-sensitive, but can be made insensitive by
5645
     * setting $case_sensitive to false.
5646
     *
5647
     * @param string $haystack       <p>The input string.</p>
5648
     * @param array  $needles        <p>SubStrings to look for.</p>
5649
     * @param bool   $case_sensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
5650
     *
5651
     * @psalm-pure
5652
     *
5653
     * @return bool
5654
     *              <p>Whether or not $str contains $needle.</p>
5655
     */
5656 46
    public static function str_contains_any(
5657
        string $haystack,
5658
        array $needles,
5659
        bool $case_sensitive = true
5660
    ): bool {
5661 46
        if ($haystack === '' || $needles === []) {
5662 1
            return false;
5663
        }
5664
5665 45
        foreach ($needles as &$needle) {
5666 45
            if (!$needle) {
5667
                continue;
5668
            }
5669
5670 45
            if ($case_sensitive) {
5671 25
                if (\strpos($haystack, $needle) !== false) {
5672 14
                    return true;
5673
                }
5674
5675 13
                continue;
5676
            }
5677
5678 20
            if (\mb_stripos($haystack, $needle) !== false) {
5679 12
                return true;
5680
            }
5681
        }
5682
5683 19
        return false;
5684
    }
5685
5686
    /**
5687
     * Returns a lowercase and trimmed string separated by dashes. Dashes are
5688
     * inserted before uppercase characters (with the exception of the first
5689
     * character of the string), and in place of spaces as well as underscores.
5690
     *
5691
     * @param string $str      <p>The input string.</p>
5692
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
5693
     *
5694
     * @psalm-pure
5695
     *
5696
     * @return string
5697
     */
5698 19
    public static function str_dasherize(string $str, string $encoding = 'UTF-8'): string
5699
    {
5700 19
        return self::str_delimit($str, '-', $encoding);
5701
    }
5702
5703
    /**
5704
     * Returns a lowercase and trimmed string separated by the given delimiter.
5705
     * Delimiters are inserted before uppercase characters (with the exception
5706
     * of the first character of the string), and in place of spaces, dashes,
5707
     * and underscores. Alpha delimiters are not converted to lowercase.
5708
     *
5709
     * @param string      $str                           <p>The input string.</p>
5710
     * @param string      $delimiter                     <p>Sequence used to separate parts of the string.</p>
5711
     * @param string      $encoding                      [optional] <p>Set the charset for e.g. "mb_" function</p>
5712
     * @param bool        $clean_utf8                    [optional] <p>Remove non UTF-8 chars from the string.</p>
5713
     * @param string|null $lang                          [optional] <p>Set the language for special cases: az, el, lt,
5714
     *                                                   tr</p>
5715
     * @param bool        $try_to_keep_the_string_length [optional] <p>true === try to keep the string length: e.g. ẞ ->
5716
     *                                                   ß</p>
5717
     *
5718
     * @psalm-pure
5719
     *
5720
     * @return string
5721
     */
5722 49
    public static function str_delimit(
5723
        string $str,
5724
        string $delimiter,
5725
        string $encoding = 'UTF-8',
5726
        bool $clean_utf8 = false,
5727
        string $lang = null,
5728
        bool $try_to_keep_the_string_length = false
5729
    ): string {
5730 49
        if (self::$SUPPORT['mbstring'] === true) {
5731 49
            $str = (string) \mb_ereg_replace('\\B(\\p{Lu})', '-\1', \trim($str));
5732
5733 49
            $use_mb_functions = $lang === null && !$try_to_keep_the_string_length;
5734 49
            if ($use_mb_functions && $encoding === 'UTF-8') {
5735 22
                $str = \mb_strtolower($str);
5736
            } else {
5737 27
                $str = self::strtolower($str, $encoding, $clean_utf8, $lang, $try_to_keep_the_string_length);
5738
            }
5739
5740 49
            return (string) \mb_ereg_replace('[\\-_\\s]+', $delimiter, $str);
5741
        }
5742
5743
        $str = (string) \preg_replace('/\\B(\\p{Lu})/u', '-\1', \trim($str));
5744
5745
        $use_mb_functions = $lang === null && !$try_to_keep_the_string_length;
5746
        if ($use_mb_functions && $encoding === 'UTF-8') {
5747
            $str = \mb_strtolower($str);
5748
        } else {
5749
            $str = self::strtolower($str, $encoding, $clean_utf8, $lang, $try_to_keep_the_string_length);
5750
        }
5751
5752
        return (string) \preg_replace('/[\\-_\\s]+/u', $delimiter, $str);
5753
    }
5754
5755
    /**
5756
     * Optimized "mb_detect_encoding()"-function -> with support for UTF-16 and UTF-32.
5757
     *
5758
     * EXAMPLE: <code>
5759
     * UTF8::str_detect_encoding('中文空白'); // 'UTF-8'
5760
     * UTF8::str_detect_encoding('Abc'); // 'ASCII'
5761
     * </code>
5762
     *
5763
     * @param string $str <p>The input string.</p>
5764
     *
5765
     * @psalm-pure
5766
     *
5767
     * @return false|string
5768
     *                      <p>
5769
     *                      The detected string-encoding e.g. UTF-8 or UTF-16BE,<br>
5770
     *                      otherwise it will return false e.g. for BINARY or not detected encoding.
5771
     *                      </p>
5772
     */
5773 30
    public static function str_detect_encoding($str)
5774
    {
5775
        // init
5776 30
        $str = (string) $str;
5777
5778
        //
5779
        // 1.) check binary strings (010001001...) like UTF-16 / UTF-32 / PDF / Images / ...
5780
        //
5781
5782 30
        if (self::is_binary($str, self::string_has_bom($str) ? false : true)) {
5783 10
            $is_utf32 = self::is_utf32($str, false);
5784 10
            if ($is_utf32 === 1) {
5785
                return 'UTF-32LE';
5786
            }
5787 10
            if ($is_utf32 === 2) {
5788 1
                return 'UTF-32BE';
5789
            }
5790
5791 10
            $is_utf16 = self::is_utf16($str, false);
5792 10
            if ($is_utf16 === 1) {
5793 3
                return 'UTF-16LE';
5794
            }
5795 10
            if ($is_utf16 === 2) {
5796 2
                return 'UTF-16BE';
5797
            }
5798
5799
            // is binary but not "UTF-16" or "UTF-32"
5800 8
            return false;
5801
        }
5802
5803
        //
5804
        // 2.) simple check for ASCII chars
5805
        //
5806
5807 27
        if (ASCII::is_ascii($str)) {
5808 10
            return 'ASCII';
5809
        }
5810
5811
        //
5812
        // 3.) simple check for UTF-8 chars
5813
        //
5814
5815 27
        if (self::is_utf8_string($str)) {
5816 19
            return 'UTF-8';
5817
        }
5818
5819
        //
5820
        // 4.) check via "mb_detect_encoding()"
5821
        //
5822
        // INFO: UTF-16, UTF-32, UCS2 and UCS4, encoding detection will fail always with "mb_detect_encoding()"
5823
5824
        $encoding_detecting_order = [
5825 16
            'ISO-8859-1',
5826
            'ISO-8859-2',
5827
            'ISO-8859-3',
5828
            'ISO-8859-4',
5829
            'ISO-8859-5',
5830
            'ISO-8859-6',
5831
            'ISO-8859-7',
5832
            'ISO-8859-8',
5833
            'ISO-8859-9',
5834
            'ISO-8859-10',
5835
            'ISO-8859-13',
5836
            'ISO-8859-14',
5837
            'ISO-8859-15',
5838
            'ISO-8859-16',
5839
            'WINDOWS-1251',
5840
            'WINDOWS-1252',
5841
            'WINDOWS-1254',
5842
            'CP932',
5843
            'CP936',
5844
            'CP950',
5845
            'CP866',
5846
            'CP850',
5847
            'CP51932',
5848
            'CP50220',
5849
            'CP50221',
5850
            'CP50222',
5851
            'ISO-2022-JP',
5852
            'ISO-2022-KR',
5853
            'JIS',
5854
            'JIS-ms',
5855
            'EUC-CN',
5856
            'EUC-JP',
5857
        ];
5858
5859 16
        if (self::$SUPPORT['mbstring'] === true) {
5860
            // info: do not use the symfony polyfill here
5861 16
            $encoding = \mb_detect_encoding($str, $encoding_detecting_order, true);
5862 16
            if ($encoding) {
5863 16
                return $encoding;
5864
            }
5865
        }
5866
5867
        //
5868
        // 5.) check via "iconv()"
5869
        //
5870
5871
        if (self::$ENCODINGS === null) {
5872
            self::$ENCODINGS = self::getData('encodings');
5873
        }
5874
5875
        foreach (self::$ENCODINGS as $encoding_tmp) {
5876
            // INFO: //IGNORE but still throw notice
5877
            /** @noinspection PhpUsageOfSilenceOperatorInspection */
5878
            if ((string) @\iconv($encoding_tmp, $encoding_tmp . '//IGNORE', $str) === $str) {
5879
                return $encoding_tmp;
5880
            }
5881
        }
5882
5883
        return false;
5884
    }
5885
5886
    /**
5887
     * Check if the string ends with the given substring.
5888
     *
5889
     * EXAMPLE: <code>
5890
     * UTF8::str_ends_with('BeginMiddleΚόσμε', 'Κόσμε'); // true
5891
     * UTF8::str_ends_with('BeginMiddleΚόσμε', 'κόσμε'); // false
5892
     * </code>
5893
     *
5894
     * @param string $haystack <p>The string to search in.</p>
5895
     * @param string $needle   <p>The substring to search for.</p>
5896
     *
5897
     * @psalm-pure
5898
     *
5899
     * @return bool
5900
     */
5901 9
    public static function str_ends_with(string $haystack, string $needle): bool
5902
    {
5903 9
        if ($needle === '') {
5904 2
            return true;
5905
        }
5906
5907 9
        if ($haystack === '') {
5908
            return false;
5909
        }
5910
5911 9
        if (\PHP_VERSION_ID >= 80000) {
5912
            /** @phpstan-ignore-next-line - only for PHP8 */
5913
            return \str_ends_with($haystack, $needle);
5914
        }
5915
5916 9
        return \substr($haystack, -\strlen($needle)) === $needle;
5917
    }
5918
5919
    /**
5920
     * Returns true if the string ends with any of $substrings, false otherwise.
5921
     *
5922
     * - case-sensitive
5923
     *
5924
     * @param string   $str        <p>The input string.</p>
5925
     * @param string[] $substrings <p>Substrings to look for.</p>
5926
     *
5927
     * @psalm-pure
5928
     *
5929
     * @return bool
5930
     *              <p>Whether or not $str ends with $substring.</p>
5931
     */
5932 7
    public static function str_ends_with_any(string $str, array $substrings): bool
5933
    {
5934 7
        if ($substrings === []) {
5935
            return false;
5936
        }
5937
5938 7
        foreach ($substrings as &$substring) {
5939 7
            if (\substr($str, -\strlen($substring)) === $substring) {
5940 1
                return true;
5941
            }
5942
        }
5943
5944 6
        return false;
5945
    }
5946
5947
    /**
5948
     * Ensures that the string begins with $substring. If it doesn't, it's
5949
     * prepended.
5950
     *
5951
     * @param string $str       <p>The input string.</p>
5952
     * @param string $substring <p>The substring to add if not present.</p>
5953
     *
5954
     * @psalm-pure
5955
     *
5956
     * @return string
5957
     */
5958 10
    public static function str_ensure_left(string $str, string $substring): string
5959
    {
5960
        if (
5961 10
            $substring !== ''
5962
            &&
5963 10
            \strpos($str, $substring) === 0
5964
        ) {
5965 6
            return $str;
5966
        }
5967
5968 4
        return $substring . $str;
5969
    }
5970
5971
    /**
5972
     * Ensures that the string ends with $substring. If it doesn't, it's appended.
5973
     *
5974
     * @param string $str       <p>The input string.</p>
5975
     * @param string $substring <p>The substring to add if not present.</p>
5976
     *
5977
     * @psalm-pure
5978
     *
5979
     * @return string
5980
     */
5981 10
    public static function str_ensure_right(string $str, string $substring): string
5982
    {
5983
        if (
5984 10
            $str === ''
5985
            ||
5986 10
            $substring === ''
5987
            ||
5988 10
            \substr($str, -\strlen($substring)) !== $substring
5989
        ) {
5990 4
            $str .= $substring;
5991
        }
5992
5993 10
        return $str;
5994
    }
5995
5996
    /**
5997
     * Capitalizes the first word of the string, replaces underscores with
5998
     * spaces, and strips '_id'.
5999
     *
6000
     * @param string $str
6001
     *
6002
     * @psalm-pure
6003
     *
6004
     * @return string
6005
     */
6006 3
    public static function str_humanize($str): string
6007
    {
6008 3
        $str = \str_replace(
6009
            [
6010 3
                '_id',
6011
                '_',
6012
            ],
6013
            [
6014 3
                '',
6015
                ' ',
6016
            ],
6017 3
            $str
6018
        );
6019
6020 3
        return self::ucfirst(\trim($str));
6021
    }
6022
6023
    /**
6024
     * Check if the string ends with the given substring, case-insensitive.
6025
     *
6026
     * EXAMPLE: <code>
6027
     * UTF8::str_iends_with('BeginMiddleΚόσμε', 'Κόσμε'); // true
6028
     * UTF8::str_iends_with('BeginMiddleΚόσμε', 'κόσμε'); // true
6029
     * </code>
6030
     *
6031
     * @param string $haystack <p>The string to search in.</p>
6032
     * @param string $needle   <p>The substring to search for.</p>
6033
     *
6034
     * @psalm-pure
6035
     *
6036
     * @return bool
6037
     */
6038 12
    public static function str_iends_with(string $haystack, string $needle): bool
6039
    {
6040 12
        if ($needle === '') {
6041 2
            return true;
6042
        }
6043
6044 12
        if ($haystack === '') {
6045
            return false;
6046
        }
6047
6048 12
        return self::strcasecmp(\substr($haystack, -\strlen($needle)), $needle) === 0;
6049
    }
6050
6051
    /**
6052
     * Returns true if the string ends with any of $substrings, false otherwise.
6053
     *
6054
     * - case-insensitive
6055
     *
6056
     * @param string   $str        <p>The input string.</p>
6057
     * @param string[] $substrings <p>Substrings to look for.</p>
6058
     *
6059
     * @psalm-pure
6060
     *
6061
     * @return bool
6062
     *              <p>Whether or not $str ends with $substring.</p>
6063
     */
6064 4
    public static function str_iends_with_any(string $str, array $substrings): bool
6065
    {
6066 4
        if ($substrings === []) {
6067
            return false;
6068
        }
6069
6070 4
        foreach ($substrings as &$substring) {
6071 4
            if (self::str_iends_with($str, $substring)) {
6072 4
                return true;
6073
            }
6074
        }
6075
6076
        return false;
6077
    }
6078
6079
    /**
6080
     * Inserts $substring into the string at the $index provided.
6081
     *
6082
     * @param string $str       <p>The input string.</p>
6083
     * @param string $substring <p>String to be inserted.</p>
6084
     * @param int    $index     <p>The index at which to insert the substring.</p>
6085
     * @param string $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
6086
     *
6087
     * @psalm-pure
6088
     *
6089
     * @return string
6090
     */
6091 8
    public static function str_insert(
6092
        string $str,
6093
        string $substring,
6094
        int $index,
6095
        string $encoding = 'UTF-8'
6096
    ): string {
6097 8
        if ($encoding === 'UTF-8') {
6098 4
            $len = (int) \mb_strlen($str);
6099 4
            if ($index > $len) {
6100
                return $str;
6101
            }
6102
6103
            /** @noinspection UnnecessaryCastingInspection */
6104 4
            return (string) \mb_substr($str, 0, $index) .
6105 4
                   $substring .
6106 4
                   (string) \mb_substr($str, $index, $len);
6107
        }
6108
6109 4
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
6110
6111 4
        $len = (int) self::strlen($str, $encoding);
6112 4
        if ($index > $len) {
6113 1
            return $str;
6114
        }
6115
6116 3
        return ((string) self::substr($str, 0, $index, $encoding)) .
6117 3
               $substring .
6118 3
               ((string) self::substr($str, $index, $len, $encoding));
6119
    }
6120
6121
    /**
6122
     * Case-insensitive and UTF-8 safe version of <function>str_replace</function>.
6123
     *
6124
     * EXAMPLE: <code>
6125
     * UTF8::str_ireplace('lIzÆ', 'lise', 'Iñtërnâtiônàlizætiøn'); // 'Iñtërnâtiônàlisetiøn'
6126
     * </code>
6127
     *
6128
     * @see http://php.net/manual/en/function.str-ireplace.php
6129
     *
6130
     * @param string|string[] $search      <p>
6131
     *                                     Every replacement with search array is
6132
     *                                     performed on the result of previous replacement.
6133
     *                                     </p>
6134
     * @param string|string[] $replacement <p>The replacement.</p>
6135
     * @param string|string[] $subject     <p>
6136
     *                                     If subject is an array, then the search and
6137
     *                                     replace is performed with every entry of
6138
     *                                     subject, and the return value is an array as
6139
     *                                     well.
6140
     *                                     </p>
6141
     * @param int             $count       [optional] <p>
6142
     *                                     The number of matched and replaced needles will
6143
     *                                     be returned in count which is passed by
6144
     *                                     reference.
6145
     *                                     </p>
6146
     *
6147
     * @psalm-pure
6148
     *
6149
     * @return string|string[]
6150
     *                         <p>A string or an array of replacements.</p>
6151
     *
6152
     * @template TStrIReplaceSubject
6153
     * @phpstan-param TStrIReplaceSubject $subject
6154
     * @phpstan-return TStrIReplaceSubject
6155
     */
6156 29
    public static function str_ireplace($search, $replacement, $subject, &$count = null)
6157
    {
6158 29
        $search = (array) $search;
6159
6160
        /** @noinspection AlterInForeachInspection */
6161 29
        foreach ($search as &$s) {
6162 29
            $s = (string) $s;
6163 29
            if ($s === '') {
6164 6
                $s = '/^(?<=.)$/';
6165
            } else {
6166 24
                $s = '/' . \preg_quote($s, '/') . '/ui';
6167
            }
6168
        }
6169
6170
        // fallback
6171
        /** @phpstan-ignore-next-line - only a fallback for PHP8 */
6172 29
        if ($replacement === null) {
0 ignored issues
show
introduced by
The condition $replacement === null is always false.
Loading history...
6173 1
            $replacement = '';
6174
        }
6175
        /** @phpstan-ignore-next-line - only a fallback for PHP8 */
6176 29
        if ($subject === null) {
6177 1
            $subject = '';
6178
        }
6179
6180
        /**
6181
         * @psalm-suppress PossiblyNullArgument
6182
         * @phpstan-var TStrIReplaceSubject $subject
6183
         */
6184 29
        $subject = \preg_replace($search, $replacement, $subject, -1, $count);
6185
6186 29
        return $subject;
6187
    }
6188
6189
    /**
6190
     * Replaces $search from the beginning of string with $replacement.
6191
     *
6192
     * @param string $str         <p>The input string.</p>
6193
     * @param string $search      <p>The string to search for.</p>
6194
     * @param string $replacement <p>The replacement.</p>
6195
     *
6196
     * @psalm-pure
6197
     *
6198
     * @return string
6199
     *                <p>The string after the replacement.</p>
6200
     */
6201 17
    public static function str_ireplace_beginning(string $str, string $search, string $replacement): string
6202
    {
6203 17
        if ($str === '') {
6204 4
            if ($replacement === '') {
6205 2
                return '';
6206
            }
6207
6208 2
            if ($search === '') {
6209 2
                return $replacement;
6210
            }
6211
        }
6212
6213 13
        if ($search === '') {
6214 2
            return $str . $replacement;
6215
        }
6216
6217 11
        $searchLength = \strlen($search);
6218 11
        if (\strncasecmp($str, $search, $searchLength) === 0) {
6219 10
            return $replacement . \substr($str, $searchLength);
6220
        }
6221
6222 1
        return $str;
6223
    }
6224
6225
    /**
6226
     * Replaces $search from the ending of string with $replacement.
6227
     *
6228
     * @param string $str         <p>The input string.</p>
6229
     * @param string $search      <p>The string to search for.</p>
6230
     * @param string $replacement <p>The replacement.</p>
6231
     *
6232
     * @psalm-pure
6233
     *
6234
     * @return string
6235
     *                <p>The string after the replacement.</p>
6236
     */
6237 17
    public static function str_ireplace_ending(string $str, string $search, string $replacement): string
6238
    {
6239 17
        if ($str === '') {
6240 4
            if ($replacement === '') {
6241 2
                return '';
6242
            }
6243
6244 2
            if ($search === '') {
6245 2
                return $replacement;
6246
            }
6247
        }
6248
6249 13
        if ($search === '') {
6250 2
            return $str . $replacement;
6251
        }
6252
6253 11
        if (\stripos($str, $search, \strlen($str) - \strlen($search)) !== false) {
6254 9
            $str = \substr($str, 0, -\strlen($search)) . $replacement;
6255
        }
6256
6257 11
        return $str;
6258
    }
6259
6260
    /**
6261
     * Check if the string starts with the given substring, case-insensitive.
6262
     *
6263
     * EXAMPLE: <code>
6264
     * UTF8::str_istarts_with('ΚόσμεMiddleEnd', 'Κόσμε'); // true
6265
     * UTF8::str_istarts_with('ΚόσμεMiddleEnd', 'κόσμε'); // true
6266
     * </code>
6267
     *
6268
     * @param string $haystack <p>The string to search in.</p>
6269
     * @param string $needle   <p>The substring to search for.</p>
6270
     *
6271
     * @psalm-pure
6272
     *
6273
     * @return bool
6274
     */
6275 13
    public static function str_istarts_with(string $haystack, string $needle): bool
6276
    {
6277 13
        if ($needle === '') {
6278 2
            return true;
6279
        }
6280
6281 13
        if ($haystack === '') {
6282
            return false;
6283
        }
6284
6285 13
        return self::stripos($haystack, $needle) === 0;
6286
    }
6287
6288
    /**
6289
     * Returns true if the string begins with any of $substrings, false otherwise.
6290
     *
6291
     * - case-insensitive
6292
     *
6293
     * @param string $str        <p>The input string.</p>
6294
     * @param array  $substrings <p>Substrings to look for.</p>
6295
     *
6296
     * @psalm-pure
6297
     *
6298
     * @return bool
6299
     *              <p>Whether or not $str starts with $substring.</p>
6300
     */
6301 5
    public static function str_istarts_with_any(string $str, array $substrings): bool
6302
    {
6303 5
        if ($str === '') {
6304
            return false;
6305
        }
6306
6307 5
        if ($substrings === []) {
6308
            return false;
6309
        }
6310
6311 5
        foreach ($substrings as &$substring) {
6312 5
            if (self::str_istarts_with($str, $substring)) {
6313 5
                return true;
6314
            }
6315
        }
6316
6317 1
        return false;
6318
    }
6319
6320
    /**
6321
     * Gets the substring after the first occurrence of a separator.
6322
     *
6323
     * @param string $str       <p>The input string.</p>
6324
     * @param string $separator <p>The string separator.</p>
6325
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
6326
     *
6327
     * @psalm-pure
6328
     *
6329
     * @return string
6330
     */
6331 1
    public static function str_isubstr_after_first_separator(
6332
        string $str,
6333
        string $separator,
6334
        string $encoding = 'UTF-8'
6335
    ): string {
6336 1
        if ($separator === '' || $str === '') {
6337 1
            return '';
6338
        }
6339
6340 1
        $offset = self::stripos($str, $separator);
6341 1
        if ($offset === false) {
6342 1
            return '';
6343
        }
6344
6345 1
        if ($encoding === 'UTF-8') {
6346 1
            return (string) \mb_substr(
6347 1
                $str,
6348 1
                $offset + (int) \mb_strlen($separator)
6349
            );
6350
        }
6351
6352
        return (string) self::substr(
6353
            $str,
6354
            $offset + (int) self::strlen($separator, $encoding),
6355
            null,
6356
            $encoding
6357
        );
6358
    }
6359
6360
    /**
6361
     * Gets the substring after the last occurrence of a separator.
6362
     *
6363
     * @param string $str       <p>The input string.</p>
6364
     * @param string $separator <p>The string separator.</p>
6365
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
6366
     *
6367
     * @psalm-pure
6368
     *
6369
     * @return string
6370
     */
6371 1
    public static function str_isubstr_after_last_separator(
6372
        string $str,
6373
        string $separator,
6374
        string $encoding = 'UTF-8'
6375
    ): string {
6376 1
        if ($separator === '' || $str === '') {
6377 1
            return '';
6378
        }
6379
6380 1
        $offset = self::strripos($str, $separator);
6381 1
        if ($offset === false) {
6382 1
            return '';
6383
        }
6384
6385 1
        if ($encoding === 'UTF-8') {
6386 1
            return (string) \mb_substr(
6387 1
                $str,
6388 1
                $offset + (int) self::strlen($separator)
6389
            );
6390
        }
6391
6392
        return (string) self::substr(
6393
            $str,
6394
            $offset + (int) self::strlen($separator, $encoding),
6395
            null,
6396
            $encoding
6397
        );
6398
    }
6399
6400
    /**
6401
     * Gets the substring before the first occurrence of a separator.
6402
     *
6403
     * @param string $str       <p>The input string.</p>
6404
     * @param string $separator <p>The string separator.</p>
6405
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
6406
     *
6407
     * @psalm-pure
6408
     *
6409
     * @return string
6410
     */
6411 1
    public static function str_isubstr_before_first_separator(
6412
        string $str,
6413
        string $separator,
6414
        string $encoding = 'UTF-8'
6415
    ): string {
6416 1
        if ($separator === '' || $str === '') {
6417 1
            return '';
6418
        }
6419
6420 1
        $offset = self::stripos($str, $separator);
6421 1
        if ($offset === false) {
6422 1
            return '';
6423
        }
6424
6425 1
        if ($encoding === 'UTF-8') {
6426 1
            return (string) \mb_substr($str, 0, $offset);
6427
        }
6428
6429
        return (string) self::substr($str, 0, $offset, $encoding);
6430
    }
6431
6432
    /**
6433
     * Gets the substring before the last occurrence of a separator.
6434
     *
6435
     * @param string $str       <p>The input string.</p>
6436
     * @param string $separator <p>The string separator.</p>
6437
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
6438
     *
6439
     * @psalm-pure
6440
     *
6441
     * @return string
6442
     */
6443 1
    public static function str_isubstr_before_last_separator(
6444
        string $str,
6445
        string $separator,
6446
        string $encoding = 'UTF-8'
6447
    ): string {
6448 1
        if ($separator === '' || $str === '') {
6449 1
            return '';
6450
        }
6451
6452 1
        if ($encoding === 'UTF-8') {
6453 1
            $offset = \mb_strripos($str, $separator);
6454 1
            if ($offset === false) {
6455 1
                return '';
6456
            }
6457
6458 1
            return (string) \mb_substr($str, 0, $offset);
6459
        }
6460
6461
        $offset = self::strripos($str, $separator, 0, $encoding);
6462
        if ($offset === false) {
6463
            return '';
6464
        }
6465
6466
        return (string) self::substr($str, 0, $offset, $encoding);
6467
    }
6468
6469
    /**
6470
     * Gets the substring after (or before via "$before_needle") the first occurrence of the "$needle".
6471
     *
6472
     * @param string $str           <p>The input string.</p>
6473
     * @param string $needle        <p>The string to look for.</p>
6474
     * @param bool   $before_needle [optional] <p>Default: false</p>
6475
     * @param string $encoding      [optional] <p>Default: 'UTF-8'</p>
6476
     *
6477
     * @psalm-pure
6478
     *
6479
     * @return string
6480
     */
6481 2
    public static function str_isubstr_first(
6482
        string $str,
6483
        string $needle,
6484
        bool $before_needle = false,
6485
        string $encoding = 'UTF-8'
6486
    ): string {
6487
        if (
6488 2
            $needle === ''
6489
            ||
6490 2
            $str === ''
6491
        ) {
6492 2
            return '';
6493
        }
6494
6495 2
        $part = self::stristr(
6496 2
            $str,
6497
            $needle,
6498
            $before_needle,
6499
            $encoding
6500
        );
6501 2
        if ($part === false) {
6502 2
            return '';
6503
        }
6504
6505 2
        return $part;
6506
    }
6507
6508
    /**
6509
     * Gets the substring after (or before via "$before_needle") the last occurrence of the "$needle".
6510
     *
6511
     * @param string $str           <p>The input string.</p>
6512
     * @param string $needle        <p>The string to look for.</p>
6513
     * @param bool   $before_needle [optional] <p>Default: false</p>
6514
     * @param string $encoding      [optional] <p>Default: 'UTF-8'</p>
6515
     *
6516
     * @psalm-pure
6517
     *
6518
     * @return string
6519
     */
6520 1
    public static function str_isubstr_last(
6521
        string $str,
6522
        string $needle,
6523
        bool $before_needle = false,
6524
        string $encoding = 'UTF-8'
6525
    ): string {
6526
        if (
6527 1
            $needle === ''
6528
            ||
6529 1
            $str === ''
6530
        ) {
6531 1
            return '';
6532
        }
6533
6534 1
        $part = self::strrichr(
6535 1
            $str,
6536
            $needle,
6537
            $before_needle,
6538
            $encoding
6539
        );
6540 1
        if ($part === false) {
6541 1
            return '';
6542
        }
6543
6544 1
        return $part;
6545
    }
6546
6547
    /**
6548
     * Returns the last $n characters of the string.
6549
     *
6550
     * @param string $str      <p>The input string.</p>
6551
     * @param int    $n        <p>Number of characters to retrieve from the end.</p>
6552
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6553
     *
6554
     * @psalm-pure
6555
     *
6556
     * @return string
6557
     */
6558 12
    public static function str_last_char(
6559
        string $str,
6560
        int $n = 1,
6561
        string $encoding = 'UTF-8'
6562
    ): string {
6563 12
        if ($str === '' || $n <= 0) {
6564 4
            return '';
6565
        }
6566
6567 8
        if ($encoding === 'UTF-8') {
6568 4
            return (string) \mb_substr($str, -$n);
6569
        }
6570
6571 4
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
6572
6573 4
        return (string) self::substr($str, -$n, null, $encoding);
6574
    }
6575
6576
    /**
6577
     * Limit the number of characters in a string.
6578
     *
6579
     * @param string $str        <p>The input string.</p>
6580
     * @param int    $length     [optional] <p>Default: 100</p>
6581
     * @param string $str_add_on [optional] <p>Default: …</p>
6582
     * @param string $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
6583
     *
6584
     * @psalm-pure
6585
     *
6586
     * @return string
6587
     */
6588 2
    public static function str_limit(
6589
        string $str,
6590
        int $length = 100,
6591
        string $str_add_on = '…',
6592
        string $encoding = 'UTF-8'
6593
    ): string {
6594 2
        if ($str === '' || $length <= 0) {
6595 2
            return '';
6596
        }
6597
6598 2
        if ($encoding === 'UTF-8') {
6599 2
            if ((int) \mb_strlen($str) <= $length) {
6600 2
                return $str;
6601
            }
6602
6603
            /** @noinspection UnnecessaryCastingInspection */
6604 2
            return (string) \mb_substr($str, 0, $length - (int) self::strlen($str_add_on)) . $str_add_on;
6605
        }
6606
6607
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
6608
6609
        if ((int) self::strlen($str, $encoding) <= $length) {
6610
            return $str;
6611
        }
6612
6613
        return ((string) self::substr($str, 0, $length - (int) self::strlen($str_add_on), $encoding)) . $str_add_on;
6614
    }
6615
6616
    /**
6617
     * Limit the number of characters in a string, but also after the next word.
6618
     *
6619
     * EXAMPLE: <code>UTF8::str_limit_after_word('fòô bàř fòô', 8, ''); // 'fòô bàř'</code>
6620
     *
6621
     * @param string $str        <p>The input string.</p>
6622
     * @param int    $length     [optional] <p>Default: 100</p>
6623
     * @param string $str_add_on [optional] <p>Default: …</p>
6624
     * @param string $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
6625
     *
6626
     * @psalm-pure
6627
     *
6628
     * @return string
6629
     */
6630 6
    public static function str_limit_after_word(
6631
        string $str,
6632
        int $length = 100,
6633
        string $str_add_on = '…',
6634
        string $encoding = 'UTF-8'
6635
    ): string {
6636 6
        if ($str === '' || $length <= 0) {
6637 2
            return '';
6638
        }
6639
6640 6
        if ($encoding === 'UTF-8') {
6641 2
            if ((int) \mb_strlen($str) <= $length) {
6642 2
                return $str;
6643
            }
6644
6645 2
            if (\mb_substr($str, $length - 1, 1) === ' ') {
6646 2
                return ((string) \mb_substr($str, 0, $length - 1)) . $str_add_on;
6647
            }
6648
6649 2
            $str = \mb_substr($str, 0, $length);
6650
6651 2
            $array = \explode(' ', $str, -1);
6652 2
            $new_str = \implode(' ', $array);
6653
6654 2
            if ($new_str === '') {
6655 2
                return ((string) \mb_substr($str, 0, $length - 1)) . $str_add_on;
6656
            }
6657
        } else {
6658 4
            if ((int) self::strlen($str, $encoding) <= $length) {
6659
                return $str;
6660
            }
6661
6662 4
            if (self::substr($str, $length - 1, 1, $encoding) === ' ') {
6663 3
                return ((string) self::substr($str, 0, $length - 1, $encoding)) . $str_add_on;
6664
            }
6665
6666
            /** @noinspection CallableParameterUseCaseInTypeContextInspection - FP */
6667 1
            $str = self::substr($str, 0, $length, $encoding);
6668 1
            if ($str === false) {
6669
                return '' . $str_add_on;
6670
            }
6671
6672 1
            $array = \explode(' ', $str, -1);
6673 1
            $new_str = \implode(' ', $array);
6674
6675 1
            if ($new_str === '') {
6676
                return ((string) self::substr($str, 0, $length - 1, $encoding)) . $str_add_on;
6677
            }
6678
        }
6679
6680 3
        return $new_str . $str_add_on;
6681
    }
6682
6683
    /**
6684
     * Returns the longest common prefix between the $str1 and $str2.
6685
     *
6686
     * @param string $str1     <p>The input sting.</p>
6687
     * @param string $str2     <p>Second string for comparison.</p>
6688
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6689
     *
6690
     * @psalm-pure
6691
     *
6692
     * @return string
6693
     */
6694 10
    public static function str_longest_common_prefix(
6695
        string $str1,
6696
        string $str2,
6697
        string $encoding = 'UTF-8'
6698
    ): string {
6699
        // init
6700 10
        $longest_common_prefix = '';
6701
6702 10
        if ($encoding === 'UTF-8') {
6703 5
            $max_length = (int) \min(
6704 5
                \mb_strlen($str1),
6705 5
                \mb_strlen($str2)
6706
            );
6707
6708 5
            for ($i = 0; $i < $max_length; ++$i) {
6709 4
                $char = \mb_substr($str1, $i, 1);
6710
6711
                if (
6712 4
                    $char !== false
6713
                    &&
6714 4
                    $char === \mb_substr($str2, $i, 1)
6715
                ) {
6716 3
                    $longest_common_prefix .= $char;
6717
                } else {
6718 3
                    break;
6719
                }
6720
            }
6721
        } else {
6722 5
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
6723
6724 5
            $max_length = (int) \min(
6725 5
                self::strlen($str1, $encoding),
6726 5
                self::strlen($str2, $encoding)
6727
            );
6728
6729 5
            for ($i = 0; $i < $max_length; ++$i) {
6730 4
                $char = self::substr($str1, $i, 1, $encoding);
6731
6732
                if (
6733 4
                    $char !== false
6734
                    &&
6735 4
                    $char === self::substr($str2, $i, 1, $encoding)
6736
                ) {
6737 3
                    $longest_common_prefix .= $char;
6738
                } else {
6739 3
                    break;
6740
                }
6741
            }
6742
        }
6743
6744 10
        return $longest_common_prefix;
6745
    }
6746
6747
    /**
6748
     * Returns the longest common substring between the $str1 and $str2.
6749
     * In the case of ties, it returns that which occurs first.
6750
     *
6751
     * @param string $str1
6752
     * @param string $str2     <p>Second string for comparison.</p>
6753
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6754
     *
6755
     * @psalm-pure
6756
     *
6757
     * @return string
6758
     *                <p>A string with its $str being the longest common substring.</p>
6759
     */
6760 11
    public static function str_longest_common_substring(
6761
        string $str1,
6762
        string $str2,
6763
        string $encoding = 'UTF-8'
6764
    ): string {
6765 11
        if ($str1 === '' || $str2 === '') {
6766 2
            return '';
6767
        }
6768
6769
        // Uses dynamic programming to solve
6770
        // http://en.wikipedia.org/wiki/Longest_common_substring_problem
6771
6772 9
        if ($encoding === 'UTF-8') {
6773 4
            $str_length = (int) \mb_strlen($str1);
6774 4
            $other_length = (int) \mb_strlen($str2);
6775
        } else {
6776 5
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
6777
6778 5
            $str_length = (int) self::strlen($str1, $encoding);
6779 5
            $other_length = (int) self::strlen($str2, $encoding);
6780
        }
6781
6782
        // Return if either string is empty
6783 9
        if ($str_length === 0 || $other_length === 0) {
6784
            return '';
6785
        }
6786
6787 9
        $len = 0;
6788 9
        $end = 0;
6789 9
        $table = \array_fill(
6790 9
            0,
6791 9
            $str_length + 1,
6792 9
            \array_fill(0, $other_length + 1, 0)
6793
        );
6794
6795 9
        if ($encoding === 'UTF-8') {
6796 9
            for ($i = 1; $i <= $str_length; ++$i) {
6797 9
                for ($j = 1; $j <= $other_length; ++$j) {
6798 9
                    $str_char = \mb_substr($str1, $i - 1, 1);
6799 9
                    $other_char = \mb_substr($str2, $j - 1, 1);
6800
6801 9
                    if ($str_char === $other_char) {
6802 8
                        $table[$i][$j] = $table[$i - 1][$j - 1] + 1;
6803 8
                        if ($table[$i][$j] > $len) {
6804 8
                            $len = $table[$i][$j];
6805 8
                            $end = $i;
6806
                        }
6807
                    } else {
6808 9
                        $table[$i][$j] = 0;
6809
                    }
6810
                }
6811
            }
6812
        } else {
6813
            for ($i = 1; $i <= $str_length; ++$i) {
6814
                for ($j = 1; $j <= $other_length; ++$j) {
6815
                    $str_char = self::substr($str1, $i - 1, 1, $encoding);
6816
                    $other_char = self::substr($str2, $j - 1, 1, $encoding);
6817
6818
                    if ($str_char === $other_char) {
6819
                        $table[$i][$j] = $table[$i - 1][$j - 1] + 1;
6820
                        if ($table[$i][$j] > $len) {
6821
                            $len = $table[$i][$j];
6822
                            $end = $i;
6823
                        }
6824
                    } else {
6825
                        $table[$i][$j] = 0;
6826
                    }
6827
                }
6828
            }
6829
        }
6830
6831 9
        if ($encoding === 'UTF-8') {
6832 9
            return (string) \mb_substr($str1, $end - $len, $len);
6833
        }
6834
6835
        return (string) self::substr($str1, $end - $len, $len, $encoding);
6836
    }
6837
6838
    /**
6839
     * Returns the longest common suffix between the $str1 and $str2.
6840
     *
6841
     * @param string $str1
6842
     * @param string $str2     <p>Second string for comparison.</p>
6843
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6844
     *
6845
     * @psalm-pure
6846
     *
6847
     * @return string
6848
     */
6849 10
    public static function str_longest_common_suffix(
6850
        string $str1,
6851
        string $str2,
6852
        string $encoding = 'UTF-8'
6853
    ): string {
6854 10
        if ($str1 === '' || $str2 === '') {
6855 2
            return '';
6856
        }
6857
6858 8
        if ($encoding === 'UTF-8') {
6859 4
            $max_length = (int) \min(
6860 4
                \mb_strlen($str1, $encoding),
6861 4
                \mb_strlen($str2, $encoding)
6862
            );
6863
6864 4
            $longest_common_suffix = '';
6865 4
            for ($i = 1; $i <= $max_length; ++$i) {
6866 4
                $char = \mb_substr($str1, -$i, 1);
6867
6868
                if (
6869 4
                    $char !== false
6870
                    &&
6871 4
                    $char === \mb_substr($str2, -$i, 1)
6872
                ) {
6873 3
                    $longest_common_suffix = $char . $longest_common_suffix;
6874
                } else {
6875 3
                    break;
6876
                }
6877
            }
6878
        } else {
6879 4
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
6880
6881 4
            $max_length = (int) \min(
6882 4
                self::strlen($str1, $encoding),
6883 4
                self::strlen($str2, $encoding)
6884
            );
6885
6886 4
            $longest_common_suffix = '';
6887 4
            for ($i = 1; $i <= $max_length; ++$i) {
6888 4
                $char = self::substr($str1, -$i, 1, $encoding);
6889
6890
                if (
6891 4
                    $char !== false
6892
                    &&
6893 4
                    $char === self::substr($str2, -$i, 1, $encoding)
6894
                ) {
6895 3
                    $longest_common_suffix = $char . $longest_common_suffix;
6896
                } else {
6897 3
                    break;
6898
                }
6899
            }
6900
        }
6901
6902 8
        return $longest_common_suffix;
6903
    }
6904
6905
    /**
6906
     * Returns true if $str matches the supplied pattern, false otherwise.
6907
     *
6908
     * @param string $str     <p>The input string.</p>
6909
     * @param string $pattern <p>Regex pattern to match against.</p>
6910
     *
6911
     * @psalm-pure
6912
     *
6913
     * @return bool
6914
     *              <p>Whether or not $str matches the pattern.</p>
6915
     */
6916 10
    public static function str_matches_pattern(string $str, string $pattern): bool
6917
    {
6918 10
        return (bool) \preg_match('/' . $pattern . '/u', $str);
6919
    }
6920
6921
    /**
6922
     * Returns whether or not a character exists at an index. Offsets may be
6923
     * negative to count from the last character in the string. Implements
6924
     * part of the ArrayAccess interface.
6925
     *
6926
     * @param string $str      <p>The input string.</p>
6927
     * @param int    $offset   <p>The index to check.</p>
6928
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6929
     *
6930
     * @psalm-pure
6931
     *
6932
     * @return bool
6933
     *              <p>Whether or not the index exists.</p>
6934
     */
6935 6
    public static function str_offset_exists(string $str, int $offset, string $encoding = 'UTF-8'): bool
6936
    {
6937
        // init
6938 6
        $length = (int) self::strlen($str, $encoding);
6939
6940 6
        if ($offset >= 0) {
6941 3
            return $length > $offset;
6942
        }
6943
6944 3
        return $length >= \abs($offset);
6945
    }
6946
6947
    /**
6948
     * Returns the character at the given index. Offsets may be negative to
6949
     * count from the last character in the string. Implements part of the
6950
     * ArrayAccess interface, and throws an OutOfBoundsException if the index
6951
     * does not exist.
6952
     *
6953
     * @param string $str      <p>The input string.</p>
6954
     * @param int    $index    <p>The <strong>index</strong> from which to retrieve the char.</p>
6955
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6956
     *
6957
     * @throws \OutOfBoundsException if the positive or negative offset does not exist
6958
     *
6959
     * @return string
6960
     *                <p>The character at the specified index.</p>
6961
     *
6962
     * @psalm-pure
6963
     */
6964 2
    public static function str_offset_get(string $str, int $index, string $encoding = 'UTF-8'): string
6965
    {
6966
        // init
6967 2
        $length = (int) self::strlen($str);
6968
6969
        if (
6970 2
            ($index >= 0 && $length <= $index)
6971
            ||
6972 2
            $length < \abs($index)
6973
        ) {
6974 1
            throw new \OutOfBoundsException('No character exists at the index');
6975
        }
6976
6977 1
        return self::char_at($str, $index, $encoding);
6978
    }
6979
6980
    /**
6981
     * Pad a UTF-8 string to a given length with another string.
6982
     *
6983
     * EXAMPLE: <code>UTF8::str_pad('中文空白', 10, '_', STR_PAD_BOTH); // '___中文空白___'</code>
6984
     *
6985
     * @param string     $str        <p>The input string.</p>
6986
     * @param int        $pad_length <p>The length of return string.</p>
6987
     * @param string     $pad_string [optional] <p>String to use for padding the input string.</p>
6988
     * @param int|string $pad_type   [optional] <p>
6989
     *                               Can be <strong>STR_PAD_RIGHT</strong> (default), [or string "right"]<br>
6990
     *                               <strong>STR_PAD_LEFT</strong> [or string "left"] or<br>
6991
     *                               <strong>STR_PAD_BOTH</strong> [or string "both"]
6992
     *                               </p>
6993
     * @param string     $encoding   [optional] <p>Default: 'UTF-8'</p>
6994
     *
6995
     * @psalm-pure
6996
     *
6997
     * @return string
6998
     *                <p>Returns the padded string.</p>
6999
     */
7000 41
    public static function str_pad(
7001
        string $str,
7002
        int $pad_length,
7003
        string $pad_string = ' ',
7004
        $pad_type = \STR_PAD_RIGHT,
7005
        string $encoding = 'UTF-8'
7006
    ): string {
7007 41
        if ($pad_length === 0 || $pad_string === '') {
7008 1
            return $str;
7009
        }
7010
7011 41
        if ($pad_type !== (int) $pad_type) {
7012 13
            if ($pad_type === 'left') {
7013 3
                $pad_type = \STR_PAD_LEFT;
7014 10
            } elseif ($pad_type === 'right') {
7015 6
                $pad_type = \STR_PAD_RIGHT;
7016 4
            } elseif ($pad_type === 'both') {
7017 3
                $pad_type = \STR_PAD_BOTH;
7018
            } else {
7019 1
                throw new \InvalidArgumentException(
7020 1
                    'Pad expects $pad_type to be "STR_PAD_*" or ' . "to be one of 'left', 'right' or 'both'"
7021
                );
7022
            }
7023
        }
7024
7025 40
        if ($encoding === 'UTF-8') {
7026 25
            $str_length = (int) \mb_strlen($str);
7027
7028 25
            if ($pad_length >= $str_length) {
7029 25
                switch ($pad_type) {
7030
                    case \STR_PAD_LEFT:
7031 8
                        $ps_length = (int) \mb_strlen($pad_string);
7032
7033 8
                        $diff = ($pad_length - $str_length);
7034
7035 8
                        $pre = (string) \mb_substr(
7036 8
                            \str_repeat($pad_string, (int) \ceil($diff / $ps_length)),
7037 8
                            0,
7038 8
                            $diff
7039
                        );
7040 8
                        $post = '';
7041
7042 8
                        break;
7043
7044
                    case \STR_PAD_BOTH:
7045 14
                        $diff = ($pad_length - $str_length);
7046
7047 14
                        $ps_length_left = (int) \floor($diff / 2);
7048
7049 14
                        $ps_length_right = (int) \ceil($diff / 2);
7050
7051 14
                        $pre = (string) \mb_substr(
7052 14
                            \str_repeat($pad_string, $ps_length_left),
7053 14
                            0,
7054 14
                            $ps_length_left
7055
                        );
7056 14
                        $post = (string) \mb_substr(
7057 14
                            \str_repeat($pad_string, $ps_length_right),
7058 14
                            0,
7059 14
                            $ps_length_right
7060
                        );
7061
7062 14
                        break;
7063
7064
                    case \STR_PAD_RIGHT:
7065
                    default:
7066 9
                        $ps_length = (int) \mb_strlen($pad_string);
7067
7068 9
                        $diff = ($pad_length - $str_length);
7069
7070 9
                        $post = (string) \mb_substr(
7071 9
                            \str_repeat($pad_string, (int) \ceil($diff / $ps_length)),
7072 9
                            0,
7073 9
                            $diff
7074
                        );
7075 9
                        $pre = '';
7076
                }
7077
7078 25
                return $pre . $str . $post;
7079
            }
7080
7081 3
            return $str;
7082
        }
7083
7084 15
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
7085
7086 15
        $str_length = (int) self::strlen($str, $encoding);
7087
7088 15
        if ($pad_length >= $str_length) {
7089 14
            switch ($pad_type) {
7090
                case \STR_PAD_LEFT:
7091 5
                    $ps_length = (int) self::strlen($pad_string, $encoding);
7092
7093 5
                    $diff = ($pad_length - $str_length);
7094
7095 5
                    $pre = (string) self::substr(
7096 5
                        \str_repeat($pad_string, (int) \ceil($diff / $ps_length)),
7097 5
                        0,
7098
                        $diff,
7099
                        $encoding
7100
                    );
7101 5
                    $post = '';
7102
7103 5
                    break;
7104
7105
                case \STR_PAD_BOTH:
7106 3
                    $diff = ($pad_length - $str_length);
7107
7108 3
                    $ps_length_left = (int) \floor($diff / 2);
7109
7110 3
                    $ps_length_right = (int) \ceil($diff / 2);
7111
7112 3
                    $pre = (string) self::substr(
7113 3
                        \str_repeat($pad_string, $ps_length_left),
7114 3
                        0,
7115
                        $ps_length_left,
7116
                        $encoding
7117
                    );
7118 3
                    $post = (string) self::substr(
7119 3
                        \str_repeat($pad_string, $ps_length_right),
7120 3
                        0,
7121
                        $ps_length_right,
7122
                        $encoding
7123
                    );
7124
7125 3
                    break;
7126
7127
                case \STR_PAD_RIGHT:
7128
                default:
7129 6
                    $ps_length = (int) self::strlen($pad_string, $encoding);
7130
7131 6
                    $diff = ($pad_length - $str_length);
7132
7133 6
                    $post = (string) self::substr(
7134 6
                        \str_repeat($pad_string, (int) \ceil($diff / $ps_length)),
7135 6
                        0,
7136
                        $diff,
7137
                        $encoding
7138
                    );
7139 6
                    $pre = '';
7140
            }
7141
7142 14
            return $pre . $str . $post;
7143
        }
7144
7145 1
        return $str;
7146
    }
7147
7148
    /**
7149
     * Returns a new string of a given length such that both sides of the
7150
     * string are padded. Alias for "UTF8::str_pad()" with a $pad_type of 'both'.
7151
     *
7152
     * @param string $str
7153
     * @param int    $length   <p>Desired string length after padding.</p>
7154
     * @param string $pad_str  [optional] <p>String used to pad, defaults to space. Default: ' '</p>
7155
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
7156
     *
7157
     * @psalm-pure
7158
     *
7159
     * @return string
7160
     *                <p>The string with padding applied.</p>
7161
     */
7162 11
    public static function str_pad_both(
7163
        string $str,
7164
        int $length,
7165
        string $pad_str = ' ',
7166
        string $encoding = 'UTF-8'
7167
    ): string {
7168 11
        return self::str_pad(
7169 11
            $str,
7170 11
            $length,
7171 11
            $pad_str,
7172 11
            \STR_PAD_BOTH,
7173 11
            $encoding
7174
        );
7175
    }
7176
7177
    /**
7178
     * Returns a new string of a given length such that the beginning of the
7179
     * string is padded. Alias for "UTF8::str_pad()" with a $pad_type of 'left'.
7180
     *
7181
     * @param string $str
7182
     * @param int    $length   <p>Desired string length after padding.</p>
7183
     * @param string $pad_str  [optional] <p>String used to pad, defaults to space. Default: ' '</p>
7184
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
7185
     *
7186
     * @psalm-pure
7187
     *
7188
     * @return string
7189
     *                <p>The string with left padding.</p>
7190
     */
7191 7
    public static function str_pad_left(
7192
        string $str,
7193
        int $length,
7194
        string $pad_str = ' ',
7195
        string $encoding = 'UTF-8'
7196
    ): string {
7197 7
        return self::str_pad(
7198 7
            $str,
7199 7
            $length,
7200 7
            $pad_str,
7201 7
            \STR_PAD_LEFT,
7202 7
            $encoding
7203
        );
7204
    }
7205
7206
    /**
7207
     * Returns a new string of a given length such that the end of the string
7208
     * is padded. Alias for "UTF8::str_pad()" with a $pad_type of 'right'.
7209
     *
7210
     * @param string $str
7211
     * @param int    $length   <p>Desired string length after padding.</p>
7212
     * @param string $pad_str  [optional] <p>String used to pad, defaults to space. Default: ' '</p>
7213
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
7214
     *
7215
     * @psalm-pure
7216
     *
7217
     * @return string
7218
     *                <p>The string with right padding.</p>
7219
     */
7220 7
    public static function str_pad_right(
7221
        string $str,
7222
        int $length,
7223
        string $pad_str = ' ',
7224
        string $encoding = 'UTF-8'
7225
    ): string {
7226 7
        return self::str_pad(
7227 7
            $str,
7228 7
            $length,
7229 7
            $pad_str,
7230 7
            \STR_PAD_RIGHT,
7231 7
            $encoding
7232
        );
7233
    }
7234
7235
    /**
7236
     * Repeat a string.
7237
     *
7238
     * EXAMPLE: <code>UTF8::str_repeat("°~\xf0\x90\x28\xbc", 2); // '°~ð(¼°~ð(¼'</code>
7239
     *
7240
     * @param string $str        <p>
7241
     *                           The string to be repeated.
7242
     *                           </p>
7243
     * @param int    $multiplier <p>
7244
     *                           Number of time the input string should be
7245
     *                           repeated.
7246
     *                           </p>
7247
     *                           <p>
7248
     *                           multiplier has to be greater than or equal to 0.
7249
     *                           If the multiplier is set to 0, the function
7250
     *                           will return an empty string.
7251
     *                           </p>
7252
     *
7253
     * @psalm-pure
7254
     *
7255
     * @return string
7256
     *                <p>The repeated string.</p>
7257
     */
7258 9
    public static function str_repeat(string $str, int $multiplier): string
7259
    {
7260 9
        $str = self::filter($str);
7261
7262 9
        return \str_repeat($str, $multiplier);
7263
    }
7264
7265
    /**
7266
     * INFO: This is only a wrapper for "str_replace()"  -> the original functions is already UTF-8 safe.
7267
     *
7268
     * Replace all occurrences of the search string with the replacement string
7269
     *
7270
     * @see http://php.net/manual/en/function.str-replace.php
7271
     *
7272
     * @param string|string[] $search  <p>
7273
     *                                 The value being searched for, otherwise known as the needle.
7274
     *                                 An array may be used to designate multiple needles.
7275
     *                                 </p>
7276
     * @param string|string[] $replace <p>
7277
     *                                 The replacement value that replaces found search
7278
     *                                 values. An array may be used to designate multiple replacements.
7279
     *                                 </p>
7280
     * @param string|string[] $subject <p>
7281
     *                                 The string or array of strings being searched and replaced on,
7282
     *                                 otherwise known as the haystack.
7283
     *                                 </p>
7284
     *                                 <p>
7285
     *                                 If subject is an array, then the search and
7286
     *                                 replace is performed with every entry of
7287
     *                                 subject, and the return value is an array as
7288
     *                                 well.
7289
     *                                 </p>
7290
     * @param int|null        $count   [optional] <p>
7291
     *                                 If passed, this will hold the number of matched and replaced needles.
7292
     *                                 </p>
7293
     *
7294
     * @psalm-pure
7295
     *
7296
     * @return string|string[]
7297
     *                         <p>This function returns a string or an array with the replaced values.</p>
7298
     *
7299
     * @template TStrReplaceSubject
7300
     * @phpstan-param TStrReplaceSubject $subject
7301
     * @phpstan-return TStrReplaceSubject
7302
     *
7303
     * @deprecated please use \str_replace() instead
7304
     */
7305 12
    public static function str_replace(
7306
        $search,
7307
        $replace,
7308
        $subject,
7309
        int &$count = null
7310
    ) {
7311
        /**
7312
         * @psalm-suppress PossiblyNullArgument
7313
         * @phpstan-var TStrReplaceSubject $return;
7314
         */
7315 12
        $return = \str_replace(
7316 12
            $search,
7317 12
            $replace,
7318 12
            $subject,
7319 12
            $count
7320
        );
7321
7322 12
        return $return;
7323
    }
7324
7325
    /**
7326
     * Replaces $search from the beginning of string with $replacement.
7327
     *
7328
     * @param string $str         <p>The input string.</p>
7329
     * @param string $search      <p>The string to search for.</p>
7330
     * @param string $replacement <p>The replacement.</p>
7331
     *
7332
     * @psalm-pure
7333
     *
7334
     * @return string
7335
     *                <p>A string after the replacements.</p>
7336
     */
7337 17
    public static function str_replace_beginning(
7338
        string $str,
7339
        string $search,
7340
        string $replacement
7341
    ): string {
7342 17
        if ($str === '') {
7343 4
            if ($replacement === '') {
7344 2
                return '';
7345
            }
7346
7347 2
            if ($search === '') {
7348 2
                return $replacement;
7349
            }
7350
        }
7351
7352 13
        if ($search === '') {
7353 2
            return $str . $replacement;
7354
        }
7355
7356 11
        $searchLength = \strlen($search);
7357 11
        if (\strncmp($str, $search, $searchLength) === 0) {
7358 9
            return $replacement . \substr($str, $searchLength);
7359
        }
7360
7361 2
        return $str;
7362
    }
7363
7364
    /**
7365
     * Replaces $search from the ending of string with $replacement.
7366
     *
7367
     * @param string $str         <p>The input string.</p>
7368
     * @param string $search      <p>The string to search for.</p>
7369
     * @param string $replacement <p>The replacement.</p>
7370
     *
7371
     * @psalm-pure
7372
     *
7373
     * @return string
7374
     *                <p>A string after the replacements.</p>
7375
     */
7376 17
    public static function str_replace_ending(
7377
        string $str,
7378
        string $search,
7379
        string $replacement
7380
    ): string {
7381 17
        if ($str === '') {
7382 4
            if ($replacement === '') {
7383 2
                return '';
7384
            }
7385
7386 2
            if ($search === '') {
7387 2
                return $replacement;
7388
            }
7389
        }
7390
7391 13
        if ($search === '') {
7392 2
            return $str . $replacement;
7393
        }
7394
7395 11
        if (\strpos($str, $search, \strlen($str) - \strlen($search)) !== false) {
7396 8
            $str = \substr($str, 0, -\strlen($search)) . $replacement;
7397
        }
7398
7399 11
        return $str;
7400
    }
7401
7402
    /**
7403
     * Replace the first "$search"-term with the "$replace"-term.
7404
     *
7405
     * @param string $search
7406
     * @param string $replace
7407
     * @param string $subject
7408
     *
7409
     * @psalm-pure
7410
     *
7411
     * @return string
7412
     *
7413
     * @psalm-suppress InvalidReturnType
7414
     */
7415 2
    public static function str_replace_first(
7416
        string $search,
7417
        string $replace,
7418
        string $subject
7419
    ): string {
7420 2
        $pos = self::strpos($subject, $search);
7421
7422 2
        if ($pos !== false) {
7423
            /**
7424
             * @psalm-suppress InvalidReturnStatement
7425
             */
7426 2
            return self::substr_replace(
7427 2
                $subject,
7428
                $replace,
7429
                $pos,
7430 2
                (int) self::strlen($search)
7431
            );
7432
        }
7433
7434 2
        return $subject;
7435
    }
7436
7437
    /**
7438
     * Replace the last "$search"-term with the "$replace"-term.
7439
     *
7440
     * @param string $search
7441
     * @param string $replace
7442
     * @param string $subject
7443
     *
7444
     * @psalm-pure
7445
     *
7446
     * @return string
7447
     *
7448
     * @psalm-suppress InvalidReturnType
7449
     */
7450 2
    public static function str_replace_last(
7451
        string $search,
7452
        string $replace,
7453
        string $subject
7454
    ): string {
7455 2
        $pos = self::strrpos($subject, $search);
7456 2
        if ($pos !== false) {
7457
            /**
7458
             * @psalm-suppress InvalidReturnStatement
7459
             */
7460 2
            return self::substr_replace(
7461 2
                $subject,
7462
                $replace,
7463
                $pos,
7464 2
                (int) self::strlen($search)
7465
            );
7466
        }
7467
7468 2
        return $subject;
7469
    }
7470
7471
    /**
7472
     * Shuffles all the characters in the string.
7473
     *
7474
     * INFO: uses random algorithm which is weak for cryptography purposes
7475
     *
7476
     * EXAMPLE: <code>UTF8::str_shuffle('fòô bàř fòô'); // 'àòôřb ffòô '</code>
7477
     *
7478
     * @param string $str      <p>The input string</p>
7479
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
7480
     *
7481
     * @return string
7482
     *                <p>The shuffled string.</p>
7483
     */
7484 5
    public static function str_shuffle(string $str, string $encoding = 'UTF-8'): string
7485
    {
7486 5
        if ($encoding === 'UTF-8') {
7487 5
            $indexes = \range(0, (int) \mb_strlen($str) - 1);
7488 5
            \shuffle($indexes);
7489
7490
            // init
7491 5
            $shuffled_str = '';
7492
7493 5
            foreach ($indexes as &$i) {
7494 5
                $tmp_sub_str = \mb_substr($str, $i, 1);
7495 5
                if ($tmp_sub_str !== false) {
7496 5
                    $shuffled_str .= $tmp_sub_str;
7497
                }
7498
            }
7499
        } else {
7500
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
7501
7502
            $indexes = \range(0, (int) self::strlen($str, $encoding) - 1);
7503
            \shuffle($indexes);
7504
7505
            // init
7506
            $shuffled_str = '';
7507
7508
            foreach ($indexes as &$i) {
7509
                $tmp_sub_str = self::substr($str, $i, 1, $encoding);
7510
                if ($tmp_sub_str !== false) {
7511
                    $shuffled_str .= $tmp_sub_str;
7512
                }
7513
            }
7514
        }
7515
7516 5
        return $shuffled_str;
7517
    }
7518
7519
    /**
7520
     * Returns the substring beginning at $start, and up to, but not including
7521
     * the index specified by $end. If $end is omitted, the function extracts
7522
     * the remaining string. If $end is negative, it is computed from the end
7523
     * of the string.
7524
     *
7525
     * @param string   $str
7526
     * @param int      $start    <p>Initial index from which to begin extraction.</p>
7527
     * @param int|null $end      [optional] <p>Index at which to end extraction. Default: null</p>
7528
     * @param string   $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
7529
     *
7530
     * @psalm-pure
7531
     *
7532
     * @return false|string
7533
     *                      <p>The extracted substring.</p><p>If <i>str</i> is shorter than <i>start</i>
7534
     *                      characters long, <b>FALSE</b> will be returned.
7535
     */
7536 18
    public static function str_slice(
7537
        string $str,
7538
        int $start,
7539
        int $end = null,
7540
        string $encoding = 'UTF-8'
7541
    ) {
7542 18
        if ($encoding === 'UTF-8') {
7543 7
            if ($end === null) {
7544 1
                $length = (int) \mb_strlen($str);
7545 6
            } elseif ($end >= 0 && $end <= $start) {
7546 2
                return '';
7547 4
            } elseif ($end < 0) {
7548 1
                $length = (int) \mb_strlen($str) + $end - $start;
7549
            } else {
7550 3
                $length = $end - $start;
7551
            }
7552
7553 5
            return \mb_substr($str, $start, $length);
7554
        }
7555
7556 11
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
7557
7558 11
        if ($end === null) {
7559 5
            $length = (int) self::strlen($str, $encoding);
7560 6
        } elseif ($end >= 0 && $end <= $start) {
7561 2
            return '';
7562 4
        } elseif ($end < 0) {
7563 1
            $length = (int) self::strlen($str, $encoding) + $end - $start;
7564
        } else {
7565 3
            $length = $end - $start;
7566
        }
7567
7568 9
        return self::substr($str, $start, $length, $encoding);
7569
    }
7570
7571
    /**
7572
     * Convert a string to e.g.: "snake_case"
7573
     *
7574
     * @param string $str
7575
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
7576
     *
7577
     * @psalm-pure
7578
     *
7579
     * @return string
7580
     *                <p>A string in snake_case.</p>
7581
     */
7582 22
    public static function str_snakeize(string $str, string $encoding = 'UTF-8'): string
7583
    {
7584 22
        if ($str === '') {
7585
            return '';
7586
        }
7587
7588 22
        $str = \str_replace(
7589 22
            '-',
7590 22
            '_',
7591 22
            self::normalize_whitespace($str)
7592
        );
7593
7594 22
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
7595 19
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
7596
        }
7597
7598 22
        $str = (string) \preg_replace_callback(
7599 22
            '/([\\p{N}|\\p{Lu}])/u',
7600
            /**
7601
             * @param string[] $matches
7602
             *
7603
             * @psalm-pure
7604
             *
7605
             * @return string
7606
             */
7607 22
            static function (array $matches) use ($encoding): string {
7608 9
                $match = $matches[1];
7609 9
                $match_int = (int) $match;
7610
7611 9
                if ((string) $match_int === $match) {
7612 4
                    return '_' . $match . '_';
7613
                }
7614
7615 5
                if ($encoding === 'UTF-8') {
7616 5
                    return '_' . \mb_strtolower($match);
7617
                }
7618
7619
                return '_' . self::strtolower($match, $encoding);
7620 22
            },
7621 22
            $str
7622
        );
7623
7624 22
        $str = (string) \preg_replace(
7625
            [
7626 22
                '/\\s+/u',           // convert spaces to "_"
7627
                '/^\\s+|\\s+$/u', // trim leading & trailing spaces
7628
                '/_+/',                 // remove double "_"
7629
            ],
7630
            [
7631 22
                '_',
7632
                '',
7633
                '_',
7634
            ],
7635 22
            $str
7636
        );
7637
7638 22
        return \trim(\trim($str, '_')); // trim leading & trailing "_" + whitespace
7639
    }
7640
7641
    /**
7642
     * Sort all characters according to code points.
7643
     *
7644
     * EXAMPLE: <code>UTF8::str_sort('  -ABC-中文空白-  '); // '    ---ABC中文白空'</code>
7645
     *
7646
     * @param string $str    <p>A UTF-8 string.</p>
7647
     * @param bool   $unique <p>Sort unique. If <strong>true</strong>, repeated characters are ignored.</p>
7648
     * @param bool   $desc   <p>If <strong>true</strong>, will sort characters in reverse code point order.</p>
7649
     *
7650
     * @psalm-pure
7651
     *
7652
     * @return string
7653
     *                <p>A string of sorted characters.</p>
7654
     */
7655 2
    public static function str_sort(string $str, bool $unique = false, bool $desc = false): string
7656
    {
7657
        /** @var int[] $array */
7658 2
        $array = self::codepoints($str);
7659
7660 2
        if ($unique) {
7661 2
            $array = \array_flip(\array_flip($array));
7662
        }
7663
7664 2
        if ($desc) {
7665 2
            \arsort($array);
7666
        } else {
7667 2
            \asort($array);
7668
        }
7669
7670 2
        return self::string($array);
7671
    }
7672
7673
    /**
7674
     * Convert a string to an array of Unicode characters.
7675
     *
7676
     * EXAMPLE: <code>
7677
     * UTF8::str_split_array(['中文空白', 'test'], 2); // [['中文', '空白'], ['te', 'st']]
7678
     * </code>
7679
     *
7680
     * @param int[]|string[] $input                   <p>The string[] or int[] to split into array.</p>
7681
     * @param int            $length                  [optional] <p>Max character length of each array
7682
     *                                                lement.</p>
7683
     * @param bool           $clean_utf8              [optional] <p>Remove non UTF-8 chars from the
7684
     *                                                string.</p>
7685
     * @param bool           $try_to_use_mb_functions [optional] <p>Set to false, if you don't want to use
7686
     *                                                "mb_substr"</p>
7687
     *
7688
     * @psalm-pure
7689
     *
7690
     * @return string[][]
7691
     *                    <p>An array containing chunks of the input.</p>
7692
     */
7693 1
    public static function str_split_array(
7694
        array $input,
7695
        int $length = 1,
7696
        bool $clean_utf8 = false,
7697
        bool $try_to_use_mb_functions = true
7698
    ): array {
7699 1
        foreach ($input as &$v) {
7700 1
            $v = self::str_split(
7701 1
                $v,
7702
                $length,
7703
                $clean_utf8,
7704
                $try_to_use_mb_functions
7705
            );
7706
        }
7707
7708
        /** @var string[][] $input */
7709 1
        return $input;
7710
    }
7711
7712
    /**
7713
     * Convert a string to an array of unicode characters.
7714
     *
7715
     * EXAMPLE: <code>UTF8::str_split('中文空白'); // array('中', '文', '空', '白')</code>
7716
     *
7717
     * @param int|string $input                   <p>The string or int to split into array.</p>
7718
     * @param int        $length                  [optional] <p>Max character length of each array
7719
     *                                            element.</p>
7720
     * @param bool       $clean_utf8              [optional] <p>Remove non UTF-8 chars from the
7721
     *                                            string.</p>
7722
     * @param bool       $try_to_use_mb_functions [optional] <p>Set to false, if you don't want to use
7723
     *                                            "mb_substr"</p>
7724
     *
7725
     * @psalm-pure
7726
     *
7727
     * @return string[]
7728
     *                  <p>An array containing chunks of chars from the input.</p>
7729
     */
7730 90
    public static function str_split(
7731
        $input,
7732
        int $length = 1,
7733
        bool $clean_utf8 = false,
7734
        bool $try_to_use_mb_functions = true
7735
    ): array {
7736 90
        if ($length <= 0) {
7737 3
            return [];
7738
        }
7739
7740
        // this is only an old fallback
7741
        /** @noinspection PhpSillyAssignmentInspection - hack for phpstan */
7742
        /** @var int|int[]|string|string[] $input */
7743 89
        $input = $input;
7744 89
        if (\is_array($input)) {
7745
            /** @psalm-suppress InvalidReturnStatement */
7746
            /** @phpstan-ignore-next-line - old code :/ */
7747
            return self::str_split_array(
0 ignored issues
show
Bug Best Practice introduced by
The expression return self::str_split_a...ry_to_use_mb_functions) returns the type array<mixed,string[]> which is incompatible with the documented return type string[].
Loading history...
7748
                $input,
7749
                $length,
7750
                $clean_utf8,
7751
                $try_to_use_mb_functions
7752
            );
7753
        }
7754
7755
        // init
7756 89
        $input = (string) $input;
7757
7758 89
        if ($input === '') {
7759 14
            return [];
7760
        }
7761
7762 86
        if ($clean_utf8) {
7763 19
            $input = self::clean($input);
7764
        }
7765
7766
        if (
7767 86
            $try_to_use_mb_functions
7768
            &&
7769 86
            self::$SUPPORT['mbstring'] === true
7770
        ) {
7771 82
            if (\function_exists('mb_str_split')) {
7772
                /**
7773
                 * @psalm-suppress ImpureFunctionCall - why?
7774
                 */
7775 82
                $return = \mb_str_split($input, $length);
7776 82
                if ($return !== false) {
7777 82
                    return $return;
7778
                }
7779
            }
7780
7781
            $i_max = \mb_strlen($input);
7782
            if ($i_max <= 127) {
7783
                $ret = [];
7784
                for ($i = 0; $i < $i_max; ++$i) {
7785
                    $ret[] = \mb_substr($input, $i, 1);
7786
                }
7787
            } else {
7788
                $return_array = [];
7789
                \preg_match_all('/./us', $input, $return_array);
7790
                $ret = $return_array[0] ?? [];
7791
            }
7792 23
        } elseif (self::$SUPPORT['pcre_utf8'] === true) {
7793 17
            $return_array = [];
7794 17
            \preg_match_all('/./us', $input, $return_array);
7795 17
            $ret = $return_array[0] ?? [];
7796
        } else {
7797
7798
            // fallback
7799
7800 8
            $ret = [];
7801 8
            $len = \strlen($input);
7802
7803 8
            for ($i = 0; $i < $len; ++$i) {
7804 8
                if (($input[$i] & "\x80") === "\x00") {
7805 8
                    $ret[] = $input[$i];
7806
                } elseif (
7807 8
                    isset($input[$i + 1])
7808
                    &&
7809 8
                    ($input[$i] & "\xE0") === "\xC0"
7810
                ) {
7811 4
                    if (($input[$i + 1] & "\xC0") === "\x80") {
7812 4
                        $ret[] = $input[$i] . $input[$i + 1];
7813
7814 4
                        ++$i;
7815
                    }
7816
                } elseif (
7817 6
                    isset($input[$i + 2])
7818
                    &&
7819 6
                    ($input[$i] & "\xF0") === "\xE0"
7820
                ) {
7821
                    if (
7822 6
                        ($input[$i + 1] & "\xC0") === "\x80"
7823
                        &&
7824 6
                        ($input[$i + 2] & "\xC0") === "\x80"
7825
                    ) {
7826 6
                        $ret[] = $input[$i] . $input[$i + 1] . $input[$i + 2];
7827
7828 6
                        $i += 2;
7829
                    }
7830
                } elseif (
7831
                    isset($input[$i + 3])
7832
                    &&
7833
                    ($input[$i] & "\xF8") === "\xF0"
7834
                ) {
7835
                    if (
7836
                        ($input[$i + 1] & "\xC0") === "\x80"
7837
                        &&
7838
                        ($input[$i + 2] & "\xC0") === "\x80"
7839
                        &&
7840
                        ($input[$i + 3] & "\xC0") === "\x80"
7841
                    ) {
7842
                        $ret[] = $input[$i] . $input[$i + 1] . $input[$i + 2] . $input[$i + 3];
7843
7844
                        $i += 3;
7845
                    }
7846
                }
7847
            }
7848
        }
7849
7850 23
        if ($length > 1) {
7851 2
            return \array_map(
7852 2
                static function (array $item): string {
7853 2
                    return \implode('', $item);
7854 2
                },
7855 2
                \array_chunk($ret, $length)
7856
            );
7857
        }
7858
7859 23
        if (isset($ret[0]) && $ret[0] === '') {
7860
            return [];
7861
        }
7862
7863 23
        return $ret;
7864
    }
7865
7866
    /**
7867
     * Splits the string with the provided regular expression, returning an
7868
     * array of strings. An optional integer $limit will truncate the
7869
     * results.
7870
     *
7871
     * @param string $str
7872
     * @param string $pattern <p>The regex with which to split the string.</p>
7873
     * @param int    $limit   [optional] <p>Maximum number of results to return. Default: -1 === no limit</p>
7874
     *
7875
     * @psalm-pure
7876
     *
7877
     * @return string[]
7878
     *                  <p>An array of strings.</p>
7879
     */
7880 16
    public static function str_split_pattern(string $str, string $pattern, int $limit = -1): array
7881
    {
7882 16
        if ($limit === 0) {
7883 2
            return [];
7884
        }
7885
7886 14
        if ($pattern === '') {
7887 1
            return [$str];
7888
        }
7889
7890 13
        if (self::$SUPPORT['mbstring'] === true) {
7891 13
            if ($limit >= 0) {
7892 8
                $result_tmp = \mb_split($pattern, $str);
7893 8
                if ($result_tmp === false) {
7894
                    return [];
7895
                }
7896
7897 8
                $result = [];
7898 8
                foreach ($result_tmp as $item_tmp) {
7899 8
                    if ($limit === 0) {
7900 4
                        break;
7901
                    }
7902 8
                    --$limit;
7903
7904 8
                    $result[] = $item_tmp;
7905
                }
7906
7907 8
                return $result;
7908
            }
7909
7910 5
            $result = \mb_split($pattern, $str);
7911 5
            if ($result === false) {
7912
                return [];
7913
            }
7914
7915 5
            return $result;
7916
        }
7917
7918
        if ($limit > 0) {
7919
            ++$limit;
7920
        } else {
7921
            $limit = -1;
7922
        }
7923
7924
        $array = \preg_split('/' . \preg_quote($pattern, '/') . '/u', $str, $limit);
7925
        if ($array === false) {
7926
            return [];
7927
        }
7928
7929
        if ($limit > 0 && \count($array) === $limit) {
7930
            \array_pop($array);
7931
        }
7932
7933
        return $array;
7934
    }
7935
7936
    /**
7937
     * Check if the string starts with the given substring.
7938
     *
7939
     * EXAMPLE: <code>
7940
     * UTF8::str_starts_with('ΚόσμεMiddleEnd', 'Κόσμε'); // true
7941
     * UTF8::str_starts_with('ΚόσμεMiddleEnd', 'κόσμε'); // false
7942
     * </code>
7943
     *
7944
     * @param string $haystack <p>The string to search in.</p>
7945
     * @param string $needle   <p>The substring to search for.</p>
7946
     *
7947
     * @psalm-pure
7948
     *
7949
     * @return bool
7950
     */
7951 19
    public static function str_starts_with(string $haystack, string $needle): bool
7952
    {
7953 19
        if ($needle === '') {
7954 2
            return true;
7955
        }
7956
7957 19
        if ($haystack === '') {
7958
            return false;
7959
        }
7960
7961 19
        if (\PHP_VERSION_ID >= 80000) {
7962
            /** @phpstan-ignore-next-line - only for PHP8 */
7963
            return \str_starts_with($haystack, $needle);
7964
        }
7965
7966 19
        return \strncmp($haystack, $needle, \strlen($needle)) === 0;
7967
    }
7968
7969
    /**
7970
     * Returns true if the string begins with any of $substrings, false otherwise.
7971
     *
7972
     * - case-sensitive
7973
     *
7974
     * @param string $str        <p>The input string.</p>
7975
     * @param array  $substrings <p>Substrings to look for.</p>
7976
     *
7977
     * @psalm-pure
7978
     *
7979
     * @return bool
7980
     *              <p>Whether or not $str starts with $substring.</p>
7981
     */
7982 8
    public static function str_starts_with_any(string $str, array $substrings): bool
7983
    {
7984 8
        if ($str === '') {
7985
            return false;
7986
        }
7987
7988 8
        if ($substrings === []) {
7989
            return false;
7990
        }
7991
7992 8
        foreach ($substrings as &$substring) {
7993 8
            if (self::str_starts_with($str, $substring)) {
7994 2
                return true;
7995
            }
7996
        }
7997
7998 6
        return false;
7999
    }
8000
8001
    /**
8002
     * Gets the substring after the first occurrence of a separator.
8003
     *
8004
     * @param string $str       <p>The input string.</p>
8005
     * @param string $separator <p>The string separator.</p>
8006
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
8007
     *
8008
     * @psalm-pure
8009
     *
8010
     * @return string
8011
     */
8012 1
    public static function str_substr_after_first_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
8013
    {
8014 1
        if ($separator === '' || $str === '') {
8015 1
            return '';
8016
        }
8017
8018 1
        if ($encoding === 'UTF-8') {
8019 1
            $offset = \mb_strpos($str, $separator);
8020 1
            if ($offset === false) {
8021 1
                return '';
8022
            }
8023
8024 1
            return (string) \mb_substr(
8025 1
                $str,
8026 1
                $offset + (int) \mb_strlen($separator)
8027
            );
8028
        }
8029
8030
        $offset = self::strpos($str, $separator, 0, $encoding);
8031
        if ($offset === false) {
8032
            return '';
8033
        }
8034
8035
        return (string) \mb_substr(
8036
            $str,
8037
            $offset + (int) self::strlen($separator, $encoding),
8038
            null,
8039
            $encoding
8040
        );
8041
    }
8042
8043
    /**
8044
     * Gets the substring after the last occurrence of a separator.
8045
     *
8046
     * @param string $str       <p>The input string.</p>
8047
     * @param string $separator <p>The string separator.</p>
8048
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
8049
     *
8050
     * @psalm-pure
8051
     *
8052
     * @return string
8053
     */
8054 1
    public static function str_substr_after_last_separator(
8055
        string $str,
8056
        string $separator,
8057
        string $encoding = 'UTF-8'
8058
    ): string {
8059 1
        if ($separator === '' || $str === '') {
8060 1
            return '';
8061
        }
8062
8063 1
        if ($encoding === 'UTF-8') {
8064 1
            $offset = \mb_strrpos($str, $separator);
8065 1
            if ($offset === false) {
8066 1
                return '';
8067
            }
8068
8069 1
            return (string) \mb_substr(
8070 1
                $str,
8071 1
                $offset + (int) \mb_strlen($separator)
8072
            );
8073
        }
8074
8075
        $offset = self::strrpos($str, $separator, 0, $encoding);
8076
        if ($offset === false) {
8077
            return '';
8078
        }
8079
8080
        return (string) self::substr(
8081
            $str,
8082
            $offset + (int) self::strlen($separator, $encoding),
8083
            null,
8084
            $encoding
8085
        );
8086
    }
8087
8088
    /**
8089
     * Gets the substring before the first occurrence of a separator.
8090
     *
8091
     * @param string $str       <p>The input string.</p>
8092
     * @param string $separator <p>The string separator.</p>
8093
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
8094
     *
8095
     * @psalm-pure
8096
     *
8097
     * @return string
8098
     */
8099 1
    public static function str_substr_before_first_separator(
8100
        string $str,
8101
        string $separator,
8102
        string $encoding = 'UTF-8'
8103
    ): string {
8104 1
        if ($separator === '' || $str === '') {
8105 1
            return '';
8106
        }
8107
8108 1
        if ($encoding === 'UTF-8') {
8109 1
            $offset = \mb_strpos($str, $separator);
8110 1
            if ($offset === false) {
8111 1
                return '';
8112
            }
8113
8114 1
            return (string) \mb_substr(
8115 1
                $str,
8116 1
                0,
8117 1
                $offset
8118
            );
8119
        }
8120
8121
        $offset = self::strpos($str, $separator, 0, $encoding);
8122
        if ($offset === false) {
8123
            return '';
8124
        }
8125
8126
        return (string) self::substr(
8127
            $str,
8128
            0,
8129
            $offset,
8130
            $encoding
8131
        );
8132
    }
8133
8134
    /**
8135
     * Gets the substring before the last occurrence of a separator.
8136
     *
8137
     * @param string $str       <p>The input string.</p>
8138
     * @param string $separator <p>The string separator.</p>
8139
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
8140
     *
8141
     * @psalm-pure
8142
     *
8143
     * @return string
8144
     */
8145 1
    public static function str_substr_before_last_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
8146
    {
8147 1
        if ($separator === '' || $str === '') {
8148 1
            return '';
8149
        }
8150
8151 1
        if ($encoding === 'UTF-8') {
8152 1
            $offset = \mb_strrpos($str, $separator);
8153 1
            if ($offset === false) {
8154 1
                return '';
8155
            }
8156
8157 1
            return (string) \mb_substr(
8158 1
                $str,
8159 1
                0,
8160 1
                $offset
8161
            );
8162
        }
8163
8164
        $offset = self::strrpos($str, $separator, 0, $encoding);
8165
        if ($offset === false) {
8166
            return '';
8167
        }
8168
8169
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
8170
8171
        return (string) self::substr(
8172
            $str,
8173
            0,
8174
            $offset,
8175
            $encoding
8176
        );
8177
    }
8178
8179
    /**
8180
     * Gets the substring after (or before via "$before_needle") the first occurrence of the "$needle".
8181
     *
8182
     * @param string $str           <p>The input string.</p>
8183
     * @param string $needle        <p>The string to look for.</p>
8184
     * @param bool   $before_needle [optional] <p>Default: false</p>
8185
     * @param string $encoding      [optional] <p>Default: 'UTF-8'</p>
8186
     *
8187
     * @psalm-pure
8188
     *
8189
     * @return string
8190
     */
8191 2
    public static function str_substr_first(
8192
        string $str,
8193
        string $needle,
8194
        bool $before_needle = false,
8195
        string $encoding = 'UTF-8'
8196
    ): string {
8197 2
        if ($str === '' || $needle === '') {
8198 2
            return '';
8199
        }
8200
8201 2
        if ($encoding === 'UTF-8') {
8202 2
            if ($before_needle) {
8203 1
                $part = \mb_strstr(
8204 1
                    $str,
8205 1
                    $needle,
8206 1
                    $before_needle
8207
                );
8208
            } else {
8209 1
                $part = \mb_strstr(
8210 1
                    $str,
8211 2
                    $needle
8212
                );
8213
            }
8214
        } else {
8215
            $part = self::strstr(
8216
                $str,
8217
                $needle,
8218
                $before_needle,
8219
                $encoding
8220
            );
8221
        }
8222
8223 2
        return $part === false ? '' : $part;
8224
    }
8225
8226
    /**
8227
     * Gets the substring after (or before via "$before_needle") the last occurrence of the "$needle".
8228
     *
8229
     * @param string $str           <p>The input string.</p>
8230
     * @param string $needle        <p>The string to look for.</p>
8231
     * @param bool   $before_needle [optional] <p>Default: false</p>
8232
     * @param string $encoding      [optional] <p>Default: 'UTF-8'</p>
8233
     *
8234
     * @psalm-pure
8235
     *
8236
     * @return string
8237
     */
8238 2
    public static function str_substr_last(
8239
        string $str,
8240
        string $needle,
8241
        bool $before_needle = false,
8242
        string $encoding = 'UTF-8'
8243
    ): string {
8244 2
        if ($str === '' || $needle === '') {
8245 2
            return '';
8246
        }
8247
8248 2
        if ($encoding === 'UTF-8') {
8249 2
            if ($before_needle) {
8250 1
                $part = \mb_strrchr(
8251 1
                    $str,
8252 1
                    $needle,
8253 1
                    $before_needle
8254
                );
8255
            } else {
8256 1
                $part = \mb_strrchr(
8257 1
                    $str,
8258 2
                    $needle
8259
                );
8260
            }
8261
        } else {
8262
            $part = self::strrchr(
8263
                $str,
8264
                $needle,
8265
                $before_needle,
8266
                $encoding
8267
            );
8268
        }
8269
8270 2
        return $part === false ? '' : $part;
8271
    }
8272
8273
    /**
8274
     * Surrounds $str with the given substring.
8275
     *
8276
     * @param string $str
8277
     * @param string $substring <p>The substring to add to both sides.</p>
8278
     *
8279
     * @psalm-pure
8280
     *
8281
     * @return string
8282
     *                <p>A string with the substring both prepended and appended.</p>
8283
     */
8284 5
    public static function str_surround(string $str, string $substring): string
8285
    {
8286 5
        return $substring . $str . $substring;
8287
    }
8288
8289
    /**
8290
     * Returns a trimmed string with the first letter of each word capitalized.
8291
     * Also accepts an array, $ignore, allowing you to list words not to be
8292
     * capitalized.
8293
     *
8294
     * @param string              $str
8295
     * @param array|string[]|null $ignore                        [optional] <p>An array of words not to capitalize or
8296
     *                                                           null. Default: null</p>
8297
     * @param string              $encoding                      [optional] <p>Default: 'UTF-8'</p>
8298
     * @param bool                $clean_utf8                    [optional] <p>Remove non UTF-8 chars from the
8299
     *                                                           string.</p>
8300
     * @param string|null         $lang                          [optional] <p>Set the language for special cases: az,
8301
     *                                                           el, lt, tr</p>
8302
     * @param bool                $try_to_keep_the_string_length [optional] <p>true === try to keep the string length:
8303
     *                                                           e.g. ẞ -> ß</p>
8304
     * @param bool                $use_trim_first                [optional] <p>true === trim the input string,
8305
     *                                                           first</p>
8306
     * @param string|null         $word_define_chars             [optional] <p>An string of chars that will be used as
8307
     *                                                           whitespace separator === words.</p>
8308
     *
8309
     * @psalm-pure
8310
     *
8311
     * @return string
8312
     *                <p>The titleized string.</p>
8313
     */
8314 10
    public static function str_titleize(
8315
        string $str,
8316
        array $ignore = null,
8317
        string $encoding = 'UTF-8',
8318
        bool $clean_utf8 = false,
8319
        string $lang = null,
8320
        bool $try_to_keep_the_string_length = false,
8321
        bool $use_trim_first = true,
8322
        string $word_define_chars = null
8323
    ): string {
8324 10
        if ($str === '') {
8325
            return '';
8326
        }
8327
8328 10
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
8329 9
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
8330
        }
8331
8332 10
        if ($use_trim_first) {
8333 10
            $str = \trim($str);
8334
        }
8335
8336 10
        if ($clean_utf8) {
8337
            $str = self::clean($str);
8338
        }
8339
8340 10
        $use_mb_functions = $lang === null && !$try_to_keep_the_string_length;
8341
8342 10
        if ($word_define_chars) {
8343 4
            $word_define_chars = \preg_quote($word_define_chars, '/');
8344
        } else {
8345 6
            $word_define_chars = '';
8346
        }
8347
8348 10
        $str = (string) \preg_replace_callback(
8349 10
            '/([^\\s' . $word_define_chars . ']+)/u',
8350 10
            static function (array $match) use ($try_to_keep_the_string_length, $lang, $ignore, $use_mb_functions, $encoding): string {
8351 10
                if ($ignore !== null && \in_array($match[0], $ignore, true)) {
8352 4
                    return $match[0];
8353
                }
8354
8355 10
                if ($use_mb_functions) {
8356 10
                    if ($encoding === 'UTF-8') {
8357 10
                        return \mb_strtoupper(\mb_substr($match[0], 0, 1))
8358 10
                               . \mb_strtolower(\mb_substr($match[0], 1));
8359
                    }
8360
8361
                    return \mb_strtoupper(\mb_substr($match[0], 0, 1, $encoding), $encoding)
8362
                           . \mb_strtolower(\mb_substr($match[0], 1, null, $encoding), $encoding);
8363
                }
8364
8365
                return self::ucfirst(
8366
                    self::strtolower(
8367
                        $match[0],
8368
                        $encoding,
8369
                        false,
8370
                        $lang,
8371
                        $try_to_keep_the_string_length
8372
                    ),
8373
                    $encoding,
8374
                    false,
8375
                    $lang,
8376
                    $try_to_keep_the_string_length
8377
                );
8378 10
            },
8379 10
            $str
8380
        );
8381
8382 10
        return $str;
8383
    }
8384
8385
    /**
8386
     * Convert a string into a obfuscate string.
8387
     *
8388
     * EXAMPLE: <code>
8389
     *
8390
     * UTF8::str_obfuscate('[email protected]', 0.5, '*', ['@', '.']); // e.g. "l***@m**lleke*.*r*"
8391
     * </code>
8392
     *
8393
     * @param string   $str
8394
     * @param float    $percent
8395
     * @param string   $obfuscateChar
8396
     * @param string[] $keepChars
8397
     *
8398
     * @psalm-pure
8399
     *
8400
     * @return string
8401
     *                <p>The obfuscate string.</p>
8402
     */
8403 1
    public static function str_obfuscate(
8404
        string $str,
8405
        float $percent = 0.5,
8406
        string $obfuscateChar = '*',
8407
        array $keepChars = []
8408
    ): string {
8409 1
        $obfuscateCharHelper = "\u{2603}";
8410 1
        $str = \str_replace($obfuscateChar, $obfuscateCharHelper, $str);
8411
8412 1
        $chars = self::chars($str);
8413 1
        $charsMax = \count($chars);
8414 1
        $charsMaxChange = \round($charsMax * $percent);
8415 1
        $charsCounter = 0;
8416 1
        $charKeyDone = [];
8417
8418 1
        while ($charsCounter < $charsMaxChange) {
8419 1
            foreach ($chars as $charKey => $char) {
8420 1
                if (isset($charKeyDone[$charKey])) {
8421 1
                    continue;
8422
                }
8423
8424 1
                if (\random_int(0, 100) > 50) {
8425 1
                    continue;
8426
                }
8427
8428 1
                if ($char === $obfuscateChar) {
8429
                    continue;
8430
                }
8431
8432 1
                ++$charsCounter;
8433 1
                $charKeyDone[$charKey] = true;
8434
8435 1
                if ($charsCounter > $charsMaxChange) {
8436
                    break;
8437
                }
8438
8439 1
                if (\in_array($char, $keepChars, true)) {
8440 1
                    continue;
8441
                }
8442
8443 1
                $chars[$charKey] = $obfuscateChar;
8444
            }
8445
        }
8446
8447 1
        $str = \implode('', $chars);
8448
8449 1
        return \str_replace($obfuscateCharHelper, $obfuscateChar, $str);
8450
    }
8451
8452
    /**
8453
     * Returns a trimmed string in proper title case.
8454
     *
8455
     * Also accepts an array, $ignore, allowing you to list words not to be
8456
     * capitalized.
8457
     *
8458
     * Adapted from John Gruber's script.
8459
     *
8460
     * @see https://gist.github.com/gruber/9f9e8650d68b13ce4d78
8461
     *
8462
     * @param string $str
8463
     * @param array  $ignore   <p>An array of words not to capitalize.</p>
8464
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
8465
     *
8466
     * @psalm-pure
8467
     *
8468
     * @return string
8469
     *                <p>The titleized string.</p>
8470
     */
8471 35
    public static function str_titleize_for_humans(
8472
        string $str,
8473
        array $ignore = [],
8474
        string $encoding = 'UTF-8'
8475
    ): string {
8476 35
        if ($str === '') {
8477
            return '';
8478
        }
8479
8480
        $small_words = [
8481 35
            '(?<!q&)a',
8482
            'an',
8483
            'and',
8484
            'as',
8485
            'at(?!&t)',
8486
            'but',
8487
            'by',
8488
            'en',
8489
            'for',
8490
            'if',
8491
            'in',
8492
            'of',
8493
            'on',
8494
            'or',
8495
            'the',
8496
            'to',
8497
            'v[.]?',
8498
            'via',
8499
            'vs[.]?',
8500
        ];
8501
8502 35
        if ($ignore !== []) {
8503 1
            $small_words = \array_merge($small_words, $ignore);
8504
        }
8505
8506 35
        $small_words_rx = \implode('|', $small_words);
8507 35
        $apostrophe_rx = '(?x: [\'’] [[:lower:]]* )?';
8508
8509 35
        $str = \trim($str);
8510
8511 35
        if (!self::has_lowercase($str)) {
8512 2
            $str = self::strtolower($str, $encoding);
8513
        }
8514
8515
        // the main substitutions
8516 35
        $str = (string) \preg_replace_callback(
8517
            '~\\b (_*) (?:                                                           # 1. Leading underscore and
8518
                        ( (?<=[ ][/\\\\]) [[:alpha:]]+ [-_[:alpha:]/\\\\]+ |                # 2. file path or 
8519 35
                          [-_[:alpha:]]+ [@.:] [-_[:alpha:]@.:/]+ ' . $apostrophe_rx . ' )  #    URL, domain, or email
8520
                        |
8521 35
                        ( (?i: ' . $small_words_rx . ' ) ' . $apostrophe_rx . ' )           # 3. or small word (case-insensitive)
8522
                        |
8523 35
                        ( [[:alpha:]] [[:lower:]\'’()\[\]{}]* ' . $apostrophe_rx . ' )     # 4. or word w/o internal caps
8524
                        |
8525 35
                        ( [[:alpha:]] [[:alpha:]\'’()\[\]{}]* ' . $apostrophe_rx . ' )     # 5. or some other word
8526
                      ) (_*) \\b                                                          # 6. With trailing underscore
8527
                    ~ux',
8528
            /**
8529
             * @param string[] $matches
8530
             *
8531
             * @psalm-pure
8532
             *
8533
             * @return string
8534
             */
8535 35
            static function (array $matches) use ($encoding): string {
8536
                // preserve leading underscore
8537 35
                $str = $matches[1];
8538 35
                if ($matches[2]) {
8539
                    // preserve URLs, domains, emails and file paths
8540 5
                    $str .= $matches[2];
8541 35
                } elseif ($matches[3]) {
8542
                    // lower-case small words
8543 25
                    $str .= self::strtolower($matches[3], $encoding);
8544 35
                } elseif ($matches[4]) {
8545
                    // capitalize word w/o internal caps
8546 34
                    $str .= static::ucfirst($matches[4], $encoding);
8547
                } else {
8548
                    // preserve other kinds of word (iPhone)
8549 7
                    $str .= $matches[5];
8550
                }
8551
                // preserve trailing underscore
8552 35
                $str .= $matches[6];
8553
8554 35
                return $str;
8555 35
            },
8556 35
            $str
8557
        );
8558
8559
        // Exceptions for small words: capitalize at start of title...
8560 35
        $str = (string) \preg_replace_callback(
8561
            '~(  \\A [[:punct:]]*            # start of title...
8562
                      |  [:.;?!][ ]+                # or of subsentence...
8563
                      |  [ ][\'"“‘(\[][ ]* )        # or of inserted subphrase...
8564 35
                      ( ' . $small_words_rx . ' ) \\b # ...followed by small word
8565
                     ~uxi',
8566
            /**
8567
             * @param string[] $matches
8568
             *
8569
             * @psalm-pure
8570
             *
8571
             * @return string
8572
             */
8573 35
            static function (array $matches) use ($encoding): string {
8574 11
                return $matches[1] . static::ucfirst($matches[2], $encoding);
8575 35
            },
8576 35
            $str
8577
        );
8578
8579
        // ...and end of title
8580 35
        $str = (string) \preg_replace_callback(
8581 35
            '~\\b ( ' . $small_words_rx . ' ) # small word...
8582
                      (?= [[:punct:]]* \Z          # ...at the end of the title...
8583
                      |   [\'"’”)\]] [ ] )         # ...or of an inserted subphrase?
8584
                     ~uxi',
8585
            /**
8586
             * @param string[] $matches
8587
             *
8588
             * @psalm-pure
8589
             *
8590
             * @return string
8591
             */
8592 35
            static function (array $matches) use ($encoding): string {
8593 3
                return static::ucfirst($matches[1], $encoding);
8594 35
            },
8595 35
            $str
8596
        );
8597
8598
        // Exceptions for small words in hyphenated compound words.
8599
        // e.g. "in-flight" -> In-Flight
8600 35
        $str = (string) \preg_replace_callback(
8601
            '~\\b
8602
                        (?<! -)                   # Negative lookbehind for a hyphen; we do not want to match man-in-the-middle but do want (in-flight)
8603 35
                        ( ' . $small_words_rx . ' )
8604
                        (?= -[[:alpha:]]+)        # lookahead for "-someword"
8605
                       ~uxi',
8606
            /**
8607
             * @param string[] $matches
8608
             *
8609
             * @psalm-pure
8610
             *
8611
             * @return string
8612
             */
8613 35
            static function (array $matches) use ($encoding): string {
8614
                return static::ucfirst($matches[1], $encoding);
8615 35
            },
8616 35
            $str
8617
        );
8618
8619
        // e.g. "Stand-in" -> "Stand-In" (Stand is already capped at this point)
8620 35
        $str = (string) \preg_replace_callback(
8621
            '~\\b
8622
                      (?<!…)                    # Negative lookbehind for a hyphen; we do not want to match man-in-the-middle but do want (stand-in)
8623
                      ( [[:alpha:]]+- )         # $1 = first word and hyphen, should already be properly capped
8624 35
                      ( ' . $small_words_rx . ' ) # ...followed by small word
8625
                      (?!	- )                 # Negative lookahead for another -
8626
                     ~uxi',
8627
            /**
8628
             * @param string[] $matches
8629
             *
8630
             * @psalm-pure
8631
             *
8632
             * @return string
8633
             */
8634 35
            static function (array $matches) use ($encoding): string {
8635
                return $matches[1] . static::ucfirst($matches[2], $encoding);
8636 35
            },
8637 35
            $str
8638
        );
8639
8640 35
        return $str;
8641
    }
8642
8643
    /**
8644
     * Get a binary representation of a specific string.
8645
     *
8646
     * EXAPLE: <code>UTF8::str_to_binary('😃'); // '11110000100111111001100010000011'</code>
8647
     *
8648
     * @param string $str <p>The input string.</p>
8649
     *
8650
     * @psalm-pure
8651
     *
8652
     * @return false|string
8653
     *                      <p>false on error</p>
8654
     */
8655 2
    public static function str_to_binary(string $str)
8656
    {
8657
        /** @var array|false $value - needed for PhpStan (stubs error) */
8658 2
        $value = \unpack('H*', $str);
8659 2
        if ($value === false) {
8660
            return false;
8661
        }
8662
8663
        /** @noinspection OffsetOperationsInspection */
8664 2
        return \base_convert($value[1], 16, 2);
8665
    }
8666
8667
    /**
8668
     * @param string   $str
8669
     * @param bool     $remove_empty_values <p>Remove empty values.</p>
8670
     * @param int|null $remove_short_values <p>The min. string length or null to disable</p>
8671
     *
8672
     * @psalm-pure
8673
     *
8674
     * @return string[]
8675
     */
8676 17
    public static function str_to_lines(string $str, bool $remove_empty_values = false, int $remove_short_values = null): array
8677
    {
8678 17
        if ($str === '') {
8679 1
            return $remove_empty_values ? [] : [''];
8680
        }
8681
8682 16
        if (self::$SUPPORT['mbstring'] === true) {
8683 16
            $return = \mb_split("[\r\n]{1,2}", $str);
8684
        } else {
8685
            $return = \preg_split("/[\r\n]{1,2}/u", $str);
8686
        }
8687
8688 16
        if ($return === false) {
8689
            return $remove_empty_values ? [] : [''];
8690
        }
8691
8692
        if (
8693 16
            $remove_short_values === null
8694
            &&
8695 16
            !$remove_empty_values
8696
        ) {
8697 16
            return $return;
8698
        }
8699
8700
        return self::reduce_string_array(
8701
            $return,
8702
            $remove_empty_values,
8703
            $remove_short_values
8704
        );
8705
    }
8706
8707
    /**
8708
     * Convert a string into an array of words.
8709
     *
8710
     * EXAMPLE: <code>UTF8::str_to_words('中文空白 oöäü#s', '#') // array('', '中文空白', ' ', 'oöäü#s', '')</code>
8711
     *
8712
     * @param string   $str
8713
     * @param string   $char_list           <p>Additional chars for the definition of "words".</p>
8714
     * @param bool     $remove_empty_values <p>Remove empty values.</p>
8715
     * @param int|null $remove_short_values <p>The min. string length or null to disable</p>
8716
     *
8717
     * @psalm-pure
8718
     *
8719
     * @return string[]
8720
     */
8721 16
    public static function str_to_words(
8722
        string $str,
8723
        string $char_list = '',
8724
        bool $remove_empty_values = false,
8725
        int $remove_short_values = null
8726
    ): array {
8727 16
        if ($str === '') {
8728 4
            return $remove_empty_values ? [] : [''];
8729
        }
8730
8731 16
        $char_list = self::rxClass($char_list, '\pL');
8732
8733 16
        $return = \preg_split("/({$char_list}+(?:[\p{Pd}’']{$char_list}+)*)/u", $str, -1, \PREG_SPLIT_DELIM_CAPTURE);
8734 16
        if ($return === false) {
8735
            return $remove_empty_values ? [] : [''];
8736
        }
8737
8738
        if (
8739 16
            $remove_short_values === null
8740
            &&
8741 16
            !$remove_empty_values
8742
        ) {
8743 16
            return $return;
0 ignored issues
show
Bug Best Practice introduced by
The expression return $return returns an array which contains values of type array which are incompatible with the documented value type string.
Loading history...
8744
        }
8745
8746 2
        $tmp_return = self::reduce_string_array(
8747 2
            $return,
8748
            $remove_empty_values,
8749
            $remove_short_values
8750
        );
8751
8752 2
        foreach ($tmp_return as &$item) {
8753 2
            $item = (string) $item;
8754
        }
8755
8756 2
        return $tmp_return;
8757
    }
8758
8759
    /**
8760
     * Truncates the string to a given length. If $substring is provided, and
8761
     * truncating occurs, the string is further truncated so that the substring
8762
     * may be appended without exceeding the desired length.
8763
     *
8764
     * @param string $str
8765
     * @param int    $length    <p>Desired length of the truncated string.</p>
8766
     * @param string $substring [optional] <p>The substring to append if it can fit. Default: ''</p>
8767
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
8768
     *
8769
     * @psalm-pure
8770
     *
8771
     * @return string
8772
     *                <p>A string after truncating.</p>
8773
     */
8774 22
    public static function str_truncate(
8775
        string $str,
8776
        int $length,
8777
        string $substring = '',
8778
        string $encoding = 'UTF-8'
8779
    ): string {
8780 22
        if ($str === '') {
8781
            return '';
8782
        }
8783
8784 22
        if ($encoding === 'UTF-8') {
8785 10
            if ($length >= (int) \mb_strlen($str)) {
8786 2
                return $str;
8787
            }
8788
8789 8
            if ($substring !== '') {
8790 4
                $length -= (int) \mb_strlen($substring);
8791
8792
                /** @noinspection UnnecessaryCastingInspection */
8793 4
                return (string) \mb_substr($str, 0, $length) . $substring;
8794
            }
8795
8796 4
            return (string) \mb_substr($str, 0, $length);
8797
        }
8798
8799 12
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
8800
8801 12
        if ($length >= (int) self::strlen($str, $encoding)) {
8802 2
            return $str;
8803
        }
8804
8805 10
        if ($substring !== '') {
8806 6
            $length -= (int) self::strlen($substring, $encoding);
8807
        }
8808
8809
        return (
8810 10
               (string) self::substr(
8811 10
                   $str,
8812 10
                   0,
8813
                   $length,
8814
                   $encoding
8815
               )
8816 10
               ) . $substring;
8817
    }
8818
8819
    /**
8820
     * Truncates the string to a given length, while ensuring that it does not
8821
     * split words. If $substring is provided, and truncating occurs, the
8822
     * string is further truncated so that the substring may be appended without
8823
     * exceeding the desired length.
8824
     *
8825
     * @param string $str
8826
     * @param int    $length                                 <p>Desired length of the truncated string.</p>
8827
     * @param string $substring                              [optional] <p>The substring to append if it can fit.
8828
     *                                                       Default:
8829
     *                                                       ''</p>
8830
     * @param string $encoding                               [optional] <p>Default: 'UTF-8'</p>
8831
     * @param bool   $ignore_do_not_split_words_for_one_word [optional] <p>Default: false</p>
8832
     *
8833
     * @psalm-pure
8834
     *
8835
     * @return string
8836
     *                <p>A string after truncating.</p>
8837
     */
8838 47
    public static function str_truncate_safe(
8839
        string $str,
8840
        int $length,
8841
        string $substring = '',
8842
        string $encoding = 'UTF-8',
8843
        bool $ignore_do_not_split_words_for_one_word = false
8844
    ): string {
8845 47
        if ($str === '' || $length <= 0) {
8846 1
            return $substring;
8847
        }
8848
8849 47
        if ($encoding === 'UTF-8') {
8850 21
            if ($length >= (int) \mb_strlen($str)) {
8851 5
                return $str;
8852
            }
8853
8854
            // need to further trim the string so we can append the substring
8855 17
            $length -= (int) \mb_strlen($substring);
8856 17
            if ($length <= 0) {
8857 1
                return $substring;
8858
            }
8859
8860
            /** @var false|string $truncated - needed for PhpStan (stubs error) */
8861 17
            $truncated = \mb_substr($str, 0, $length);
8862 17
            if ($truncated === false) {
8863
                return '';
8864
            }
8865
8866
            // if the last word was truncated
8867 17
            $space_position = \mb_strpos($str, ' ', $length - 1);
8868 17
            if ($space_position !== $length) {
8869
                // find pos of the last occurrence of a space, get up to that
8870 13
                $last_position = \mb_strrpos($truncated, ' ', 0);
8871
8872
                if (
8873 13
                    $last_position !== false
8874
                    ||
8875
                    (
8876 3
                        $space_position !== false
8877
                        &&
8878 13
                        !$ignore_do_not_split_words_for_one_word
8879
                    )
8880
                ) {
8881 17
                    $truncated = (string) \mb_substr($truncated, 0, (int) $last_position);
8882
                }
8883
            }
8884
        } else {
8885 26
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
8886
8887 26
            if ($length >= (int) self::strlen($str, $encoding)) {
8888 4
                return $str;
8889
            }
8890
8891
            // need to further trim the string so we can append the substring
8892 22
            $length -= (int) self::strlen($substring, $encoding);
8893 22
            if ($length <= 0) {
8894
                return $substring;
8895
            }
8896
8897 22
            $truncated = self::substr($str, 0, $length, $encoding);
8898
8899 22
            if ($truncated === false) {
8900
                return '';
8901
            }
8902
8903
            // if the last word was truncated
8904 22
            $space_position = self::strpos($str, ' ', $length - 1, $encoding);
8905 22
            if ($space_position !== $length) {
8906
                // find pos of the last occurrence of a space, get up to that
8907 12
                $last_position = self::strrpos($truncated, ' ', 0, $encoding);
8908
8909
                if (
8910 12
                    $last_position !== false
8911
                    ||
8912
                    (
8913 4
                        $space_position !== false
8914
                        &&
8915 12
                        !$ignore_do_not_split_words_for_one_word
8916
                    )
8917
                ) {
8918 9
                    $truncated = (string) self::substr($truncated, 0, (int) $last_position, $encoding);
8919
                }
8920
            }
8921
        }
8922
8923 39
        return $truncated . $substring;
8924
    }
8925
8926
    /**
8927
     * Returns a lowercase and trimmed string separated by underscores.
8928
     * Underscores are inserted before uppercase characters (with the exception
8929
     * of the first character of the string), and in place of spaces as well as
8930
     * dashes.
8931
     *
8932
     * @param string $str
8933
     *
8934
     * @psalm-pure
8935
     *
8936
     * @return string
8937
     *                <p>The underscored string.</p>
8938
     */
8939 16
    public static function str_underscored(string $str): string
8940
    {
8941 16
        return self::str_delimit($str, '_');
8942
    }
8943
8944
    /**
8945
     * Returns an UpperCamelCase version of the supplied string. It trims
8946
     * surrounding spaces, capitalizes letters following digits, spaces, dashes
8947
     * and underscores, and removes spaces, dashes, underscores.
8948
     *
8949
     * @param string      $str                           <p>The input string.</p>
8950
     * @param string      $encoding                      [optional] <p>Default: 'UTF-8'</p>
8951
     * @param bool        $clean_utf8                    [optional] <p>Remove non UTF-8 chars from the string.</p>
8952
     * @param string|null $lang                          [optional] <p>Set the language for special cases: az, el, lt,
8953
     *                                                   tr</p>
8954
     * @param bool        $try_to_keep_the_string_length [optional] <p>true === try to keep the string length: e.g. ẞ
8955
     *                                                   -> ß</p>
8956
     *
8957
     * @psalm-pure
8958
     *
8959
     * @return string
8960
     *                <p>A string in UpperCamelCase.</p>
8961
     */
8962 13
    public static function str_upper_camelize(
8963
        string $str,
8964
        string $encoding = 'UTF-8',
8965
        bool $clean_utf8 = false,
8966
        string $lang = null,
8967
        bool $try_to_keep_the_string_length = false
8968
    ): string {
8969 13
        return self::ucfirst(self::str_camelize($str, $encoding), $encoding, $clean_utf8, $lang, $try_to_keep_the_string_length);
8970
    }
8971
8972
    /**
8973
     * Get the number of words in a specific string.
8974
     *
8975
     * EXAMPLES: <code>
8976
     * // format: 0 -> return only word count (int)
8977
     * //
8978
     * UTF8::str_word_count('中文空白 öäü abc#c'); // 4
8979
     * UTF8::str_word_count('中文空白 öäü abc#c', 0, '#'); // 3
8980
     *
8981
     * // format: 1 -> return words (array)
8982
     * //
8983
     * UTF8::str_word_count('中文空白 öäü abc#c', 1); // array('中文空白', 'öäü', 'abc', 'c')
8984
     * UTF8::str_word_count('中文空白 öäü abc#c', 1, '#'); // array('中文空白', 'öäü', 'abc#c')
8985
     *
8986
     * // format: 2 -> return words with offset (array)
8987
     * //
8988
     * UTF8::str_word_count('中文空白 öäü ab#c', 2); // array(0 => '中文空白', 5 => 'öäü', 9 => 'abc', 13 => 'c')
8989
     * UTF8::str_word_count('中文空白 öäü ab#c', 2, '#'); // array(0 => '中文空白', 5 => 'öäü', 9 => 'abc#c')
8990
     * </code>
8991
     *
8992
     * @param string $str       <p>The input string.</p>
8993
     * @param int    $format    [optional] <p>
8994
     *                          <strong>0</strong> => return a number of words (default)<br>
8995
     *                          <strong>1</strong> => return an array of words<br>
8996
     *                          <strong>2</strong> => return an array of words with word-offset as key
8997
     *                          </p>
8998
     * @param string $char_list [optional] <p>Additional chars that contains to words and do not start a new word.</p>
8999
     *
9000
     * @psalm-pure
9001
     *
9002
     * @return int|string[]
9003
     *                      <p>The number of words in the string.</p>
9004
     */
9005 2
    public static function str_word_count(string $str, int $format = 0, string $char_list = '')
9006
    {
9007 2
        $str_parts = self::str_to_words($str, $char_list);
9008
9009 2
        $len = \count($str_parts);
9010
9011 2
        if ($format === 1) {
9012 2
            $number_of_words = [];
9013 2
            for ($i = 1; $i < $len; $i += 2) {
9014 2
                $number_of_words[] = $str_parts[$i];
9015
            }
9016 2
        } elseif ($format === 2) {
9017 2
            $number_of_words = [];
9018 2
            $offset = (int) self::strlen($str_parts[0]);
9019 2
            for ($i = 1; $i < $len; $i += 2) {
9020 2
                $number_of_words[$offset] = $str_parts[$i];
9021 2
                $offset += (int) self::strlen($str_parts[$i]) + (int) self::strlen($str_parts[$i + 1]);
9022
            }
9023
        } else {
9024 2
            $number_of_words = (int) (($len - 1) / 2);
9025
        }
9026
9027 2
        return $number_of_words;
9028
    }
9029
9030
    /**
9031
     * Case-insensitive string comparison.
9032
     *
9033
     * INFO: Case-insensitive version of UTF8::strcmp()
9034
     *
9035
     * EXAMPLE: <code>UTF8::strcasecmp("iñtërnâtiôn\nàlizætiøn", "Iñtërnâtiôn\nàlizætiøn"); // 0</code>
9036
     *
9037
     * @param string $str1     <p>The first string.</p>
9038
     * @param string $str2     <p>The second string.</p>
9039
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
9040
     *
9041
     * @psalm-pure
9042
     *
9043
     * @return int
9044
     *             <strong>&lt; 0</strong> if str1 is less than str2;<br>
9045
     *             <strong>&gt; 0</strong> if str1 is greater than str2,<br>
9046
     *             <strong>0</strong> if they are equal
9047
     */
9048 23
    public static function strcasecmp(
9049
        string $str1,
9050
        string $str2,
9051
        string $encoding = 'UTF-8'
9052
    ): int {
9053 23
        return self::strcmp(
9054 23
            self::strtocasefold(
9055 23
                $str1,
9056 23
                true,
9057 23
                false,
9058
                $encoding,
9059 23
                null,
9060 23
                false
9061
            ),
9062 23
            self::strtocasefold(
9063 23
                $str2,
9064 23
                true,
9065 23
                false,
9066
                $encoding,
9067 23
                null,
9068 23
                false
9069
            )
9070
        );
9071
    }
9072
9073
    /**
9074
     * Case-sensitive string comparison.
9075
     *
9076
     * EXAMPLE: <code>UTF8::strcmp("iñtërnâtiôn\nàlizætiøn", "iñtërnâtiôn\nàlizætiøn"); // 0</code>
9077
     *
9078
     * @param string $str1 <p>The first string.</p>
9079
     * @param string $str2 <p>The second string.</p>
9080
     *
9081
     * @psalm-pure
9082
     *
9083
     * @return int
9084
     *             <strong>&lt; 0</strong> if str1 is less than str2<br>
9085
     *             <strong>&gt; 0</strong> if str1 is greater than str2<br>
9086
     *             <strong>0</strong> if they are equal
9087
     */
9088 29
    public static function strcmp(string $str1, string $str2): int
9089
    {
9090 29
        if ($str1 === $str2) {
9091 21
            return 0;
9092
        }
9093
9094 24
        return \strcmp(
9095
            /** @phpstan-ignore-next-line - we use only NFD */
9096 24
            \Normalizer::normalize($str1, \Normalizer::NFD),
9097
            /** @phpstan-ignore-next-line - we use only NFD */
9098 24
            \Normalizer::normalize($str2, \Normalizer::NFD)
9099
        );
9100
    }
9101
9102
    /**
9103
     * Find length of initial segment not matching mask.
9104
     *
9105
     * @param string   $str
9106
     * @param string   $char_list
9107
     * @param int      $offset
9108
     * @param int|null $length
9109
     * @param string   $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
9110
     *
9111
     * @psalm-pure
9112
     *
9113
     * @return int
9114
     */
9115 12
    public static function strcspn(
9116
        string $str,
9117
        string $char_list,
9118
        int $offset = 0,
9119
        int $length = null,
9120
        string $encoding = 'UTF-8'
9121
    ): int {
9122 12
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
9123
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
9124
        }
9125
9126 12
        if ($char_list === '') {
9127 2
            return (int) self::strlen($str, $encoding);
9128
        }
9129
9130 11
        if ($offset || $length !== null) {
9131 3
            if ($encoding === 'UTF-8') {
9132 3
                if ($length === null) {
9133 2
                    $str_tmp = \mb_substr($str, $offset);
9134
                } else {
9135 3
                    $str_tmp = \mb_substr($str, $offset, $length);
9136
                }
9137
            } else {
9138
                $str_tmp = self::substr($str, $offset, $length, $encoding);
9139
            }
9140
9141 3
            if ($str_tmp === false) {
9142
                return 0;
9143
            }
9144
9145 3
            $str = $str_tmp;
9146
        }
9147
9148 11
        if ($str === '') {
9149 2
            return 0;
9150
        }
9151
9152 10
        $matches = [];
9153 10
        if (\preg_match('/^(.*?)' . self::rxClass($char_list) . '/us', $str, $matches)) {
9154 9
            $return = self::strlen($matches[1], $encoding);
9155 9
            if ($return === false) {
9156
                return 0;
9157
            }
9158
9159 9
            return $return;
9160
        }
9161
9162 2
        return (int) self::strlen($str, $encoding);
9163
    }
9164
9165
    /**
9166
     * Create a UTF-8 string from code points.
9167
     *
9168
     * INFO: opposite to UTF8::codepoints()
9169
     *
9170
     * EXAMPLE: <code>UTF8::string(array(246, 228, 252)); // 'öäü'</code>
9171
     *
9172
     * @param int|int[]|string|string[] $intOrHex <p>Integer or Hexadecimal codepoints.</p>
9173
     *
9174
     * @phpstan-param int[]|numeric-string[]|int|numeric-string $intOrHex
9175
     *
9176
     * @psalm-pure
9177
     *
9178
     * @return string
9179
     *                <p>A UTF-8 encoded string.</p>
9180
     */
9181 4
    public static function string($intOrHex): string
9182
    {
9183 4
        if ($intOrHex === []) {
9184 4
            return '';
9185
        }
9186
9187 4
        if (!\is_array($intOrHex)) {
9188 1
            $intOrHex = [$intOrHex];
9189
        }
9190
9191 4
        $str = '';
9192 4
        foreach ($intOrHex as $strPart) {
9193 4
            $str .= '&#' . (int) $strPart . ';';
9194
        }
9195
9196
        // We cannot use html_entity_decode() here, as it will not return
9197
        // characters for many values < 160.
9198 4
        return mb_convert_encoding($str, 'UTF-8', 'HTML-ENTITIES');
0 ignored issues
show
Bug Best Practice introduced by
The expression return mb_convert_encodi...TF-8', 'HTML-ENTITIES') could return the type array which is incompatible with the type-hinted return string. Consider adding an additional type-check to rule them out.
Loading history...
9199
    }
9200
9201
    /**
9202
     * Checks if string starts with "BOM" (Byte Order Mark Character) character.
9203
     *
9204
     * EXAMPLE: <code>UTF8::string_has_bom("\xef\xbb\xbf foobar"); // true</code>
9205
     *
9206
     * @param string $str <p>The input string.</p>
9207
     *
9208
     * @psalm-pure
9209
     *
9210
     * @return bool
9211
     *              <p>
9212
     *              <strong>true</strong> if the string has BOM at the start,<br>
9213
     *              <strong>false</strong> otherwise
9214
     *              </p>
9215
     */
9216 43
    public static function string_has_bom(string $str): bool
9217
    {
9218 43
        foreach (self::$BOM as $bom_string => &$bom_byte_length) {
9219 43
            if (\strncmp($str, $bom_string, $bom_byte_length) === 0) {
9220 11
                return true;
9221
            }
9222
        }
9223
9224 43
        return false;
9225
    }
9226
9227
    /**
9228
     * Strip HTML and PHP tags from a string + clean invalid UTF-8.
9229
     *
9230
     * EXAMPLE: <code>UTF8::strip_tags("<span>κόσμε\xa0\xa1</span>"); // 'κόσμε'</code>
9231
     *
9232
     * @see http://php.net/manual/en/function.strip-tags.php
9233
     *
9234
     * @param string      $str            <p>
9235
     *                                    The input string.
9236
     *                                    </p>
9237
     * @param string|null $allowable_tags [optional] <p>
9238
     *                                    You can use the optional second parameter to specify tags which should
9239
     *                                    not be stripped.
9240
     *                                    </p>
9241
     *                                    <p>
9242
     *                                    HTML comments and PHP tags are also stripped. This is hardcoded and
9243
     *                                    can not be changed with allowable_tags.
9244
     *                                    </p>
9245
     * @param bool        $clean_utf8     [optional] <p>Remove non UTF-8 chars from the string.</p>
9246
     *
9247
     * @psalm-pure
9248
     *
9249
     * @return string
9250
     *                <p>The stripped string.</p>
9251
     */
9252 4
    public static function strip_tags(
9253
        string $str,
9254
        string $allowable_tags = null,
9255
        bool $clean_utf8 = false
9256
    ): string {
9257 4
        if ($str === '') {
9258 1
            return '';
9259
        }
9260
9261 4
        if ($clean_utf8) {
9262 2
            $str = self::clean($str);
9263
        }
9264
9265 4
        if ($allowable_tags === null) {
9266 4
            return \strip_tags($str);
9267
        }
9268
9269 2
        return \strip_tags($str, $allowable_tags);
9270
    }
9271
9272
    /**
9273
     * Strip all whitespace characters. This includes tabs and newline
9274
     * characters, as well as multibyte whitespace such as the thin space
9275
     * and ideographic space.
9276
     *
9277
     * EXAMPLE: <code>UTF8::strip_whitespace('   Ο     συγγραφέας  '); // 'Οσυγγραφέας'</code>
9278
     *
9279
     * @param string $str
9280
     *
9281
     * @psalm-pure
9282
     *
9283
     * @return string
9284
     */
9285 36
    public static function strip_whitespace(string $str): string
9286
    {
9287 36
        if ($str === '') {
9288 3
            return '';
9289
        }
9290
9291 33
        return (string) \preg_replace('/[[:space:]]+/u', '', $str);
9292
    }
9293
9294
    /**
9295
     * Find the position of the first occurrence of a substring in a string, case-insensitive.
9296
     *
9297
     * INFO: use UTF8::stripos_in_byte() for the byte-length
9298
     *
9299
     * EXAMPLE: <code>UTF8::stripos('aσσb', 'ΣΣ'); // 1</code> (σσ == ΣΣ)
9300
     *
9301
     * @see http://php.net/manual/en/function.mb-stripos.php
9302
     *
9303
     * @param string $haystack   <p>The string from which to get the position of the first occurrence of needle.</p>
9304
     * @param string $needle     <p>The string to find in haystack.</p>
9305
     * @param int    $offset     [optional] <p>The position in haystack to start searching.</p>
9306
     * @param string $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
9307
     * @param bool   $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
9308
     *
9309
     * @psalm-pure
9310
     *
9311
     * @return false|int
9312
     *                   Return the <strong>(int)</strong> numeric position of the first occurrence of needle in the
9313
     *                   haystack string,<br> or <strong>false</strong> if needle is not found
9314
     */
9315 25
    public static function stripos(
9316
        string $haystack,
9317
        string $needle,
9318
        int $offset = 0,
9319
        string $encoding = 'UTF-8',
9320
        bool $clean_utf8 = false
9321
    ) {
9322 25
        if ($haystack === '') {
9323 5
            if (\PHP_VERSION_ID >= 80000 && $needle === '') {
9324
                return 0;
9325
            }
9326
9327 5
            return false;
9328
        }
9329
9330 24
        if ($needle === '' && \PHP_VERSION_ID < 80000) {
9331 2
            return false;
9332
        }
9333
9334 24
        if ($clean_utf8) {
9335
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
9336
            // if invalid characters are found in $haystack before $needle
9337 1
            $haystack = self::clean($haystack);
9338 1
            $needle = self::clean($needle);
9339
        }
9340
9341 24
        if (self::$SUPPORT['mbstring'] === true) {
9342 24
            if ($encoding === 'UTF-8') {
9343 24
                return \mb_stripos($haystack, $needle, $offset);
9344
            }
9345
9346 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
9347
9348 2
            return \mb_stripos($haystack, $needle, $offset, $encoding);
9349
        }
9350
9351 2
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
9352
9353
        if (
9354 2
            $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
9355
            &&
9356 2
            $offset >= 0 // grapheme_stripos() can't handle negative offset
9357
            &&
9358 2
            self::$SUPPORT['intl'] === true
9359
        ) {
9360
            $return_tmp = \grapheme_stripos($haystack, $needle, $offset);
9361
            if ($return_tmp !== false) {
9362
                return $return_tmp;
9363
            }
9364
        }
9365
9366
        //
9367
        // fallback for ascii only
9368
        //
9369
9370 2
        if (ASCII::is_ascii($haystack . $needle)) {
9371 2
            return \stripos($haystack, $needle, $offset);
9372
        }
9373
9374
        //
9375
        // fallback via vanilla php
9376
        //
9377
9378 2
        $haystack = self::strtocasefold($haystack, true, false, $encoding, null, false);
9379 2
        $needle = self::strtocasefold($needle, true, false, $encoding, null, false);
9380
9381 2
        return self::strpos($haystack, $needle, $offset, $encoding);
9382
    }
9383
9384
    /**
9385
     * Returns all of haystack starting from and including the first occurrence of needle to the end.
9386
     *
9387
     * EXAMPLE: <code>
9388
     * $str = 'iñtërnâtiônàlizætiøn';
9389
     * $search = 'NÂT';
9390
     *
9391
     * UTF8::stristr($str, $search)); // 'nâtiônàlizætiøn'
9392
     * UTF8::stristr($str, $search, true)); // 'iñtër'
9393
     * </code>
9394
     *
9395
     * @param string $haystack      <p>The input string. Must be valid UTF-8.</p>
9396
     * @param string $needle        <p>The string to look for. Must be valid UTF-8.</p>
9397
     * @param bool   $before_needle [optional] <p>
9398
     *                              If <b>TRUE</b>, it returns the part of the
9399
     *                              haystack before the first occurrence of the needle (excluding the needle).
9400
     *                              </p>
9401
     * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
9402
     * @param bool   $clean_utf8    [optional] <p>Remove non UTF-8 chars from the string.</p>
9403
     *
9404
     * @psalm-pure
9405
     *
9406
     * @return false|string
9407
     *                      <p>A sub-string,<br>or <strong>false</strong> if needle is not found.</p>
9408
     */
9409 13
    public static function stristr(
9410
        string $haystack,
9411
        string $needle,
9412
        bool $before_needle = false,
9413
        string $encoding = 'UTF-8',
9414
        bool $clean_utf8 = false
9415
    ) {
9416 13
        if ($haystack === '') {
9417 3
            if (\PHP_VERSION_ID >= 80000 && $needle === '') {
9418
                return '';
9419
            }
9420
9421 3
            return false;
9422
        }
9423
9424 11
        if ($clean_utf8) {
9425
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
9426
            // if invalid characters are found in $haystack before $needle
9427 1
            $needle = self::clean($needle);
9428 1
            $haystack = self::clean($haystack);
9429
        }
9430
9431 11
        if ($needle === '') {
9432 2
            if (\PHP_VERSION_ID >= 80000) {
9433
                return $haystack;
9434
            }
9435
9436 2
            return false;
9437
        }
9438
9439 10
        if (self::$SUPPORT['mbstring'] === true) {
9440 10
            if ($encoding === 'UTF-8') {
9441 10
                return \mb_stristr($haystack, $needle, $before_needle);
9442
            }
9443
9444 1
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
9445
9446 1
            return \mb_stristr($haystack, $needle, $before_needle, $encoding);
9447
        }
9448
9449
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
9450
9451
        if (
9452
            $encoding !== 'UTF-8'
9453
            &&
9454
            self::$SUPPORT['mbstring'] === false
9455
        ) {
9456
            /**
9457
             * @psalm-suppress ImpureFunctionCall - is is only a warning
9458
             */
9459
            \trigger_error('UTF8::stristr() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
9460
        }
9461
9462
        if (
9463
            $encoding === 'UTF-8' // INFO: "grapheme_stristr()" can't handle other encodings
9464
            &&
9465
            self::$SUPPORT['intl'] === true
9466
        ) {
9467
            $return_tmp = \grapheme_stristr($haystack, $needle, $before_needle);
9468
            if ($return_tmp !== false) {
9469
                return $return_tmp;
9470
            }
9471
        }
9472
9473
        if (ASCII::is_ascii($needle . $haystack)) {
9474
            return \stristr($haystack, $needle, $before_needle);
9475
        }
9476
9477
        \preg_match('/^(.*?)' . \preg_quote($needle, '/') . '/usi', $haystack, $match);
9478
9479
        if (!isset($match[1])) {
9480
            return false;
9481
        }
9482
9483
        if ($before_needle) {
9484
            return $match[1];
9485
        }
9486
9487
        return self::substr($haystack, (int) self::strlen($match[1], $encoding), null, $encoding);
9488
    }
9489
9490
    /**
9491
     * Get the string length, not the byte-length!
9492
     *
9493
     * INFO: use UTF8::strwidth() for the char-length
9494
     *
9495
     * EXAMPLE: <code>UTF8::strlen("Iñtërnâtiôn\xE9àlizætiøn")); // 20</code>
9496
     *
9497
     * @see http://php.net/manual/en/function.mb-strlen.php
9498
     *
9499
     * @param string $str        <p>The string being checked for length.</p>
9500
     * @param string $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
9501
     * @param bool   $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
9502
     *
9503
     * @psalm-pure
9504
     *
9505
     * @return false|int
9506
     *                   <p>
9507
     *                   The number <strong>(int)</strong> of characters in the string $str having character encoding
9508
     *                   $encoding.
9509
     *                   (One multi-byte character counted as +1).
9510
     *                   <br>
9511
     *                   Can return <strong>false</strong>, if e.g. mbstring is not installed and we process invalid
9512
     *                   chars.
9513
     *                   </p>
9514
     */
9515 174
    public static function strlen(
9516
        string $str,
9517
        string $encoding = 'UTF-8',
9518
        bool $clean_utf8 = false
9519
    ) {
9520 174
        if ($str === '') {
9521 21
            return 0;
9522
        }
9523
9524 172
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
9525 12
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
9526
        }
9527
9528 172
        if ($clean_utf8) {
9529
            // "mb_strlen" and "\iconv_strlen" returns wrong length,
9530
            // if invalid characters are found in $str
9531 5
            $str = self::clean($str);
9532
        }
9533
9534
        //
9535
        // fallback via mbstring
9536
        //
9537
9538 172
        if (self::$SUPPORT['mbstring'] === true) {
9539 166
            if ($encoding === 'UTF-8') {
9540
                /** @noinspection PhpUsageOfSilenceOperatorInspection - ignore warnings, it's working anyway */
9541 166
                return @\mb_strlen($str);
9542
            }
9543
9544
            /** @noinspection PhpUsageOfSilenceOperatorInspection - ignore warnings, it's working anyway */
9545 4
            return @\mb_strlen($str, $encoding);
9546
        }
9547
9548
        //
9549
        // fallback for binary || ascii only
9550
        //
9551
9552
        if (
9553 8
            $encoding === 'CP850'
9554
            ||
9555 8
            $encoding === 'ASCII'
9556
        ) {
9557
            return \strlen($str);
9558
        }
9559
9560
        if (
9561 8
            $encoding !== 'UTF-8'
9562
            &&
9563 8
            self::$SUPPORT['mbstring'] === false
9564
            &&
9565 8
            self::$SUPPORT['iconv'] === false
9566
        ) {
9567
            /**
9568
             * @psalm-suppress ImpureFunctionCall - is is only a warning
9569
             */
9570 2
            \trigger_error('UTF8::strlen() without mbstring / iconv cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
9571
        }
9572
9573
        //
9574
        // fallback via iconv
9575
        //
9576
9577 8
        if (self::$SUPPORT['iconv'] === true) {
9578
            $return_tmp = \iconv_strlen($str, $encoding);
9579
            if ($return_tmp !== false) {
9580
                return $return_tmp;
9581
            }
9582
        }
9583
9584
        //
9585
        // fallback via intl
9586
        //
9587
9588
        if (
9589 8
            $encoding === 'UTF-8' // INFO: "grapheme_strlen()" can't handle other encodings
9590
            &&
9591 8
            self::$SUPPORT['intl'] === true
9592
        ) {
9593
            $return_tmp = \grapheme_strlen($str);
9594
            if ($return_tmp !== null) {
9595
                return $return_tmp;
9596
            }
9597
        }
9598
9599
        //
9600
        // fallback for ascii only
9601
        //
9602
9603 8
        if (ASCII::is_ascii($str)) {
9604 4
            return \strlen($str);
9605
        }
9606
9607
        //
9608
        // fallback via vanilla php
9609
        //
9610
9611 8
        \preg_match_all('/./us', $str, $parts);
9612
9613 8
        $return_tmp = \count($parts[0]);
9614 8
        if ($return_tmp === 0) {
9615
            return false;
9616
        }
9617
9618 8
        return $return_tmp;
9619
    }
9620
9621
    /**
9622
     * Get string length in byte.
9623
     *
9624
     * @param string $str
9625
     *
9626
     * @psalm-pure
9627
     *
9628
     * @return int
9629
     */
9630 1
    public static function strlen_in_byte(string $str): int
9631
    {
9632 1
        if ($str === '') {
9633
            return 0;
9634
        }
9635
9636 1
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
9637
            // "mb_" is available if overload is used, so use it ...
9638
            return \mb_strlen($str, 'CP850'); // 8-BIT
9639
        }
9640
9641 1
        return \strlen($str);
9642
    }
9643
9644
    /**
9645
     * Case-insensitive string comparisons using a "natural order" algorithm.
9646
     *
9647
     * INFO: natural order version of UTF8::strcasecmp()
9648
     *
9649
     * EXAMPLES: <code>
9650
     * UTF8::strnatcasecmp('2', '10Hello WORLD 中文空白!'); // -1
9651
     * UTF8::strcasecmp('2Hello world 中文空白!', '10Hello WORLD 中文空白!'); // 1
9652
     *
9653
     * UTF8::strnatcasecmp('10Hello world 中文空白!', '2Hello WORLD 中文空白!'); // 1
9654
     * UTF8::strcasecmp('10Hello world 中文空白!', '2Hello WORLD 中文空白!'); // -1
9655
     * </code>
9656
     *
9657
     * @param string $str1     <p>The first string.</p>
9658
     * @param string $str2     <p>The second string.</p>
9659
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
9660
     *
9661
     * @psalm-pure
9662
     *
9663
     * @return int
9664
     *             <strong>&lt; 0</strong> if str1 is less than str2<br>
9665
     *             <strong>&gt; 0</strong> if str1 is greater than str2<br>
9666
     *             <strong>0</strong> if they are equal
9667
     */
9668 2
    public static function strnatcasecmp(string $str1, string $str2, string $encoding = 'UTF-8'): int
9669
    {
9670 2
        return self::strnatcmp(
9671 2
            self::strtocasefold($str1, true, false, $encoding, null, false),
9672 2
            self::strtocasefold($str2, true, false, $encoding, null, false)
9673
        );
9674
    }
9675
9676
    /**
9677
     * String comparisons using a "natural order" algorithm
9678
     *
9679
     * INFO: natural order version of UTF8::strcmp()
9680
     *
9681
     * EXAMPLES: <code>
9682
     * UTF8::strnatcmp('2Hello world 中文空白!', '10Hello WORLD 中文空白!'); // -1
9683
     * UTF8::strcmp('2Hello world 中文空白!', '10Hello WORLD 中文空白!'); // 1
9684
     *
9685
     * UTF8::strnatcmp('10Hello world 中文空白!', '2Hello WORLD 中文空白!'); // 1
9686
     * UTF8::strcmp('10Hello world 中文空白!', '2Hello WORLD 中文空白!'); // -1
9687
     * </code>
9688
     *
9689
     * @see http://php.net/manual/en/function.strnatcmp.php
9690
     *
9691
     * @param string $str1 <p>The first string.</p>
9692
     * @param string $str2 <p>The second string.</p>
9693
     *
9694
     * @psalm-pure
9695
     *
9696
     * @return int
9697
     *             <strong>&lt; 0</strong> if str1 is less than str2;<br>
9698
     *             <strong>&gt; 0</strong> if str1 is greater than str2;<br>
9699
     *             <strong>0</strong> if they are equal
9700
     */
9701 4
    public static function strnatcmp(string $str1, string $str2): int
9702
    {
9703 4
        if ($str1 === $str2) {
9704 4
            return 0;
9705
        }
9706
9707 4
        return \strnatcmp(
9708 4
            (string) self::strtonatfold($str1),
9709 4
            (string) self::strtonatfold($str2)
9710
        );
9711
    }
9712
9713
    /**
9714
     * Case-insensitive string comparison of the first n characters.
9715
     *
9716
     * EXAMPLE: <code>
9717
     * UTF8::strcasecmp("iñtërnâtiôn\nàlizætiøn321", "iñtërnâtiôn\nàlizætiøn123", 5); // 0
9718
     * </code>
9719
     *
9720
     * @see http://php.net/manual/en/function.strncasecmp.php
9721
     *
9722
     * @param string $str1     <p>The first string.</p>
9723
     * @param string $str2     <p>The second string.</p>
9724
     * @param int    $len      <p>The length of strings to be used in the comparison.</p>
9725
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
9726
     *
9727
     * @psalm-pure
9728
     *
9729
     * @return int
9730
     *             <strong>&lt; 0</strong> if <i>str1</i> is less than <i>str2</i>;<br>
9731
     *             <strong>&gt; 0</strong> if <i>str1</i> is greater than <i>str2</i>;<br>
9732
     *             <strong>0</strong> if they are equal
9733
     */
9734 2
    public static function strncasecmp(
9735
        string $str1,
9736
        string $str2,
9737
        int $len,
9738
        string $encoding = 'UTF-8'
9739
    ): int {
9740 2
        return self::strncmp(
9741 2
            self::strtocasefold($str1, true, false, $encoding, null, false),
9742 2
            self::strtocasefold($str2, true, false, $encoding, null, false),
9743
            $len
9744
        );
9745
    }
9746
9747
    /**
9748
     * String comparison of the first n characters.
9749
     *
9750
     * EXAMPLE: <code>
9751
     * UTF8::strncmp("Iñtërnâtiôn\nàlizætiøn321", "Iñtërnâtiôn\nàlizætiøn123", 5); // 0
9752
     * </code>
9753
     *
9754
     * @see http://php.net/manual/en/function.strncmp.php
9755
     *
9756
     * @param string $str1     <p>The first string.</p>
9757
     * @param string $str2     <p>The second string.</p>
9758
     * @param int    $len      <p>Number of characters to use in the comparison.</p>
9759
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
9760
     *
9761
     * @psalm-pure
9762
     *
9763
     * @return int
9764
     *             <strong>&lt; 0</strong> if <i>str1</i> is less than <i>str2</i>;<br>
9765
     *             <strong>&gt; 0</strong> if <i>str1</i> is greater than <i>str2</i>;<br>
9766
     *             <strong>0</strong> if they are equal
9767
     */
9768 4
    public static function strncmp(
9769
        string $str1,
9770
        string $str2,
9771
        int $len,
9772
        string $encoding = 'UTF-8'
9773
    ): int {
9774 4
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
9775
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
9776
        }
9777
9778 4
        if ($encoding === 'UTF-8') {
9779 4
            $str1 = (string) \mb_substr($str1, 0, $len);
9780 4
            $str2 = (string) \mb_substr($str2, 0, $len);
9781
        } else {
9782
            $str1 = (string) self::substr($str1, 0, $len, $encoding);
9783
            $str2 = (string) self::substr($str2, 0, $len, $encoding);
9784
        }
9785
9786 4
        return self::strcmp($str1, $str2);
9787
    }
9788
9789
    /**
9790
     * Search a string for any of a set of characters.
9791
     *
9792
     * EXAMPLE: <code>UTF8::strpbrk('-中文空白-', '白'); // '白-'</code>
9793
     *
9794
     * @see http://php.net/manual/en/function.strpbrk.php
9795
     *
9796
     * @param string $haystack  <p>The string where char_list is looked for.</p>
9797
     * @param string $char_list <p>This parameter is case-sensitive.</p>
9798
     *
9799
     * @psalm-pure
9800
     *
9801
     * @return false|string
9802
     *                      <p>The string starting from the character found, or false if it is not found.</p>
9803
     */
9804 2
    public static function strpbrk(string $haystack, string $char_list)
9805
    {
9806 2
        if ($haystack === '' || $char_list === '') {
9807 2
            return false;
9808
        }
9809
9810 2
        if (\preg_match('/' . self::rxClass($char_list) . '/us', $haystack, $m)) {
9811 2
            return \substr($haystack, (int) \strpos($haystack, $m[0]));
9812
        }
9813
9814 2
        return false;
9815
    }
9816
9817
    /**
9818
     * Find the position of the first occurrence of a substring in a string.
9819
     *
9820
     * INFO: use UTF8::strpos_in_byte() for the byte-length
9821
     *
9822
     * EXAMPLE: <code>UTF8::strpos('ABC-ÖÄÜ-中文空白-中文空白', '中'); // 8</code>
9823
     *
9824
     * @see http://php.net/manual/en/function.mb-strpos.php
9825
     *
9826
     * @param string     $haystack   <p>The string from which to get the position of the first occurrence of needle.</p>
9827
     * @param int|string $needle     <p>The string to find in haystack.<br>Or a code point as int.</p>
9828
     * @param int        $offset     [optional] <p>The search offset. If it is not specified, 0 is used.</p>
9829
     * @param string     $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
9830
     * @param bool       $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
9831
     *
9832
     * @psalm-pure
9833
     *
9834
     * @return false|int
9835
     *                   The <strong>(int)</strong> numeric position of the first occurrence of needle in the haystack
9836
     *                   string.<br> If needle is not found it returns false.
9837
     */
9838 52
    public static function strpos(
9839
        string $haystack,
9840
        $needle,
9841
        int $offset = 0,
9842
        string $encoding = 'UTF-8',
9843
        bool $clean_utf8 = false
9844
    ) {
9845 52
        if ($haystack === '') {
9846 4
            if (\PHP_VERSION_ID >= 80000) {
9847
                if ($needle === '') {
9848
                    return 0;
9849
                }
9850
            } else {
9851 4
                return false;
9852
            }
9853
        }
9854
9855
        // iconv and mbstring do not support integer $needle
9856 51
        if ((int) $needle === $needle) {
9857
            $needle = (string) self::chr($needle);
9858
        }
9859 51
        $needle = (string) $needle;
9860
9861 51
        if ($haystack === '') {
9862
            if (\PHP_VERSION_ID >= 80000 && $needle === '') {
9863
                return 0;
9864
            }
9865
9866
            return false;
9867
        }
9868
9869 51
        if ($needle === '' && \PHP_VERSION_ID < 80000) {
9870 2
            return false;
9871
        }
9872
9873 51
        if ($clean_utf8) {
9874
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
9875
            // if invalid characters are found in $haystack before $needle
9876 3
            $needle = self::clean($needle);
9877 3
            $haystack = self::clean($haystack);
9878
        }
9879
9880 51
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
9881 10
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
9882
        }
9883
9884
        //
9885
        // fallback via mbstring
9886
        //
9887
9888 51
        if (self::$SUPPORT['mbstring'] === true) {
9889 49
            if ($encoding === 'UTF-8') {
9890
                /** @noinspection PhpUsageOfSilenceOperatorInspection - Offset not contained in string */
9891 49
                return @\mb_strpos($haystack, $needle, $offset);
9892
            }
9893
9894
            /** @noinspection PhpUsageOfSilenceOperatorInspection - Offset not contained in string */
9895 2
            return @\mb_strpos($haystack, $needle, $offset, $encoding);
9896
        }
9897
9898
        //
9899
        // fallback for binary || ascii only
9900
        //
9901
        if (
9902 4
            $encoding === 'CP850'
9903
            ||
9904 4
            $encoding === 'ASCII'
9905
        ) {
9906 2
            return \strpos($haystack, $needle, $offset);
9907
        }
9908
9909
        if (
9910 4
            $encoding !== 'UTF-8'
9911
            &&
9912 4
            self::$SUPPORT['iconv'] === false
9913
            &&
9914 4
            self::$SUPPORT['mbstring'] === false
9915
        ) {
9916
            /**
9917
             * @psalm-suppress ImpureFunctionCall - is is only a warning
9918
             */
9919 2
            \trigger_error('UTF8::strpos() without mbstring / iconv cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
9920
        }
9921
9922
        //
9923
        // fallback via intl
9924
        //
9925
9926
        if (
9927 4
            $encoding === 'UTF-8' // INFO: "grapheme_strpos()" can't handle other encodings
9928
            &&
9929 4
            $offset >= 0 // grapheme_strpos() can't handle negative offset
9930
            &&
9931 4
            self::$SUPPORT['intl'] === true
9932
        ) {
9933
            $return_tmp = \grapheme_strpos($haystack, $needle, $offset);
9934
            if ($return_tmp !== false) {
9935
                return $return_tmp;
9936
            }
9937
        }
9938
9939
        //
9940
        // fallback via iconv
9941
        //
9942
9943
        if (
9944 4
            $offset >= 0 // iconv_strpos() can't handle negative offset
9945
            &&
9946 4
            self::$SUPPORT['iconv'] === true
9947
        ) {
9948
            // ignore invalid negative offset to keep compatibility
9949
            // with php < 5.5.35, < 5.6.21, < 7.0.6
9950
            $return_tmp = \iconv_strpos($haystack, $needle, $offset > 0 ? $offset : 0, $encoding);
9951
            if ($return_tmp !== false) {
9952
                return $return_tmp;
9953
            }
9954
        }
9955
9956
        //
9957
        // fallback for ascii only
9958
        //
9959
9960 4
        if (ASCII::is_ascii($haystack . $needle)) {
9961
            /** @noinspection PhpUsageOfSilenceOperatorInspection - Offset not contained in string */
9962 2
            return @\strpos($haystack, $needle, $offset);
9963
        }
9964
9965
        //
9966
        // fallback via vanilla php
9967
        //
9968
9969 4
        $haystack_tmp = self::substr($haystack, $offset, null, $encoding);
9970 4
        if ($haystack_tmp === false) {
9971
            $haystack_tmp = '';
9972
        }
9973 4
        $haystack = (string) $haystack_tmp;
9974
9975 4
        if ($offset < 0) {
9976
            $offset = 0;
9977
        }
9978
9979 4
        $pos = \strpos($haystack, $needle);
9980 4
        if ($pos === false) {
9981 3
            return false;
9982
        }
9983
9984 4
        if ($pos) {
9985 4
            return $offset + (int) self::strlen(\substr($haystack, 0, $pos), $encoding);
9986
        }
9987
9988 2
        return $offset + 0;
9989
    }
9990
9991
    /**
9992
     * Find the position of the first occurrence of a substring in a string.
9993
     *
9994
     * @param string $haystack <p>
9995
     *                         The string being checked.
9996
     *                         </p>
9997
     * @param string $needle   <p>
9998
     *                         The position counted from the beginning of haystack.
9999
     *                         </p>
10000
     * @param int    $offset   [optional] <p>
10001
     *                         The search offset. If it is not specified, 0 is used.
10002
     *                         </p>
10003
     *
10004
     * @psalm-pure
10005
     *
10006
     * @return false|int
10007
     *                   <p>The numeric position of the first occurrence of needle in the
10008
     *                   haystack string. If needle is not found, it returns false.</p>
10009
     */
10010 2
    public static function strpos_in_byte(string $haystack, string $needle, int $offset = 0)
10011
    {
10012 2
        if ($haystack === '' || $needle === '') {
10013
            return false;
10014
        }
10015
10016 2
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
10017
            // "mb_" is available if overload is used, so use it ...
10018
            return \mb_strpos($haystack, $needle, $offset, 'CP850'); // 8-BIT
10019
        }
10020
10021 2
        return \strpos($haystack, $needle, $offset);
10022
    }
10023
10024
    /**
10025
     * Find the position of the first occurrence of a substring in a string, case-insensitive.
10026
     *
10027
     * @param string $haystack <p>
10028
     *                         The string being checked.
10029
     *                         </p>
10030
     * @param string $needle   <p>
10031
     *                         The position counted from the beginning of haystack.
10032
     *                         </p>
10033
     * @param int    $offset   [optional] <p>
10034
     *                         The search offset. If it is not specified, 0 is used.
10035
     *                         </p>
10036
     *
10037
     * @psalm-pure
10038
     *
10039
     * @return false|int
10040
     *                   <p>The numeric position of the first occurrence of needle in the
10041
     *                   haystack string. If needle is not found, it returns false.</p>
10042
     */
10043 2
    public static function stripos_in_byte(string $haystack, string $needle, int $offset = 0)
10044
    {
10045 2
        if ($haystack === '' || $needle === '') {
10046
            return false;
10047
        }
10048
10049 2
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
10050
            // "mb_" is available if overload is used, so use it ...
10051
            return \mb_stripos($haystack, $needle, $offset, 'CP850'); // 8-BIT
10052
        }
10053
10054 2
        return \stripos($haystack, $needle, $offset);
10055
    }
10056
10057
    /**
10058
     * Find the last occurrence of a character in a string within another.
10059
     *
10060
     * EXAMPLE: <code>UTF8::strrchr('κόσμεκόσμε-äöü', 'κόσμε'); // 'κόσμε-äöü'</code>
10061
     *
10062
     * @see http://php.net/manual/en/function.mb-strrchr.php
10063
     *
10064
     * @param string $haystack      <p>The string from which to get the last occurrence of needle.</p>
10065
     * @param string $needle        <p>The string to find in haystack</p>
10066
     * @param bool   $before_needle [optional] <p>
10067
     *                              Determines which portion of haystack
10068
     *                              this function returns.
10069
     *                              If set to true, it returns all of haystack
10070
     *                              from the beginning to the last occurrence of needle.
10071
     *                              If set to false, it returns all of haystack
10072
     *                              from the last occurrence of needle to the end,
10073
     *                              </p>
10074
     * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
10075
     * @param bool   $clean_utf8    [optional] <p>Remove non UTF-8 chars from the string.</p>
10076
     *
10077
     * @psalm-pure
10078
     *
10079
     * @return false|string
10080
     *                      <p>The portion of haystack or false if needle is not found.</p>
10081
     */
10082 2
    public static function strrchr(
10083
        string $haystack,
10084
        string $needle,
10085
        bool $before_needle = false,
10086
        string $encoding = 'UTF-8',
10087
        bool $clean_utf8 = false
10088
    ) {
10089 2
        if ($haystack === '' || $needle === '') {
10090 2
            return false;
10091
        }
10092
10093 2
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
10094 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
10095
        }
10096
10097 2
        if ($clean_utf8) {
10098
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
10099
            // if invalid characters are found in $haystack before $needle
10100 2
            $needle = self::clean($needle);
10101 2
            $haystack = self::clean($haystack);
10102
        }
10103
10104
        //
10105
        // fallback via mbstring
10106
        //
10107
10108 2
        if (self::$SUPPORT['mbstring'] === true) {
10109 2
            if ($encoding === 'UTF-8') {
10110 2
                return \mb_strrchr($haystack, $needle, $before_needle);
10111
            }
10112
10113 2
            return \mb_strrchr($haystack, $needle, $before_needle, $encoding);
10114
        }
10115
10116
        //
10117
        // fallback for binary || ascii only
10118
        //
10119
10120
        if (
10121
            !$before_needle
10122
            &&
10123
            (
10124
                $encoding === 'CP850'
10125
                ||
10126
                $encoding === 'ASCII'
10127
            )
10128
        ) {
10129
            return \strrchr($haystack, $needle);
10130
        }
10131
10132
        if (
10133
            $encoding !== 'UTF-8'
10134
            &&
10135
            self::$SUPPORT['mbstring'] === false
10136
        ) {
10137
            /**
10138
             * @psalm-suppress ImpureFunctionCall - is is only a warning
10139
             */
10140
            \trigger_error('UTF8::strrchr() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
10141
        }
10142
10143
        //
10144
        // fallback via iconv
10145
        //
10146
10147
        if (self::$SUPPORT['iconv'] === true) {
10148
            $needle_tmp = self::substr($needle, 0, 1, $encoding);
10149
            if ($needle_tmp === false) {
10150
                return false;
10151
            }
10152
            $needle = $needle_tmp;
10153
10154
            $pos = \iconv_strrpos($haystack, $needle, $encoding);
10155
            if ($pos === false) {
10156
                return false;
10157
            }
10158
10159
            if ($before_needle) {
10160
                return self::substr($haystack, 0, $pos, $encoding);
10161
            }
10162
10163
            return self::substr($haystack, $pos, null, $encoding);
10164
        }
10165
10166
        //
10167
        // fallback via vanilla php
10168
        //
10169
10170
        $needle_tmp = self::substr($needle, 0, 1, $encoding);
10171
        if ($needle_tmp === false) {
10172
            return false;
10173
        }
10174
        $needle = $needle_tmp;
10175
10176
        $pos = self::strrpos($haystack, $needle, 0, $encoding);
10177
        if ($pos === false) {
10178
            return false;
10179
        }
10180
10181
        if ($before_needle) {
10182
            return self::substr($haystack, 0, $pos, $encoding);
10183
        }
10184
10185
        return self::substr($haystack, $pos, null, $encoding);
10186
    }
10187
10188
    /**
10189
     * Reverses characters order in the string.
10190
     *
10191
     * EXAMPLE: <code>UTF8::strrev('κ-öäü'); // 'üäö-κ'</code>
10192
     *
10193
     * @param string $str      <p>The input string.</p>
10194
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
10195
     *
10196
     * @psalm-pure
10197
     *
10198
     * @return string
10199
     *                <p>The string with characters in the reverse sequence.</p>
10200
     */
10201 10
    public static function strrev(string $str, string $encoding = 'UTF-8'): string
10202
    {
10203 10
        if ($str === '') {
10204 4
            return '';
10205
        }
10206
10207
        // init
10208 8
        $reversed = '';
10209
10210 8
        $str = self::emoji_encode($str, true);
10211
10212 8
        if ($encoding === 'UTF-8') {
10213 8
            if (self::$SUPPORT['intl'] === true) {
10214
                // try "grapheme" first: https://stackoverflow.com/questions/17496493/strrev-dosent-support-utf-8
10215 8
                $i = (int) \grapheme_strlen($str);
10216 8
                while ($i--) {
10217 8
                    $reversed_tmp = \grapheme_substr($str, $i, 1);
10218 8
                    if ($reversed_tmp !== false) {
10219 8
                        $reversed .= $reversed_tmp;
10220
                    }
10221
                }
10222
            } else {
10223
                $i = (int) \mb_strlen($str);
10224 8
                while ($i--) {
10225
                    $reversed_tmp = \mb_substr($str, $i, 1);
10226
                    if ($reversed_tmp !== false) {
10227
                        $reversed .= $reversed_tmp;
10228
                    }
10229
                }
10230
            }
10231
        } else {
10232
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
10233
10234
            $i = (int) self::strlen($str, $encoding);
10235
            while ($i--) {
10236
                $reversed_tmp = self::substr($str, $i, 1, $encoding);
10237
                if ($reversed_tmp !== false) {
10238
                    $reversed .= $reversed_tmp;
10239
                }
10240
            }
10241
        }
10242
10243 8
        return self::emoji_decode($reversed, true);
10244
    }
10245
10246
    /**
10247
     * Find the last occurrence of a character in a string within another, case-insensitive.
10248
     *
10249
     * EXAMPLE: <code>UTF8::strrichr('Aκόσμεκόσμε-äöü', 'aκόσμε'); // 'Aκόσμεκόσμε-äöü'</code>
10250
     *
10251
     * @see http://php.net/manual/en/function.mb-strrichr.php
10252
     *
10253
     * @param string $haystack      <p>The string from which to get the last occurrence of needle.</p>
10254
     * @param string $needle        <p>The string to find in haystack.</p>
10255
     * @param bool   $before_needle [optional] <p>
10256
     *                              Determines which portion of haystack
10257
     *                              this function returns.
10258
     *                              If set to true, it returns all of haystack
10259
     *                              from the beginning to the last occurrence of needle.
10260
     *                              If set to false, it returns all of haystack
10261
     *                              from the last occurrence of needle to the end,
10262
     *                              </p>
10263
     * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
10264
     * @param bool   $clean_utf8    [optional] <p>Remove non UTF-8 chars from the string.</p>
10265
     *
10266
     * @psalm-pure
10267
     *
10268
     * @return false|string
10269
     *                      <p>The portion of haystack or<br>false if needle is not found.</p>
10270
     */
10271 3
    public static function strrichr(
10272
        string $haystack,
10273
        string $needle,
10274
        bool $before_needle = false,
10275
        string $encoding = 'UTF-8',
10276
        bool $clean_utf8 = false
10277
    ) {
10278 3
        if ($haystack === '' || $needle === '') {
10279 2
            return false;
10280
        }
10281
10282 3
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
10283 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
10284
        }
10285
10286 3
        if ($clean_utf8) {
10287
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
10288
            // if invalid characters are found in $haystack before $needle
10289 2
            $needle = self::clean($needle);
10290 2
            $haystack = self::clean($haystack);
10291
        }
10292
10293
        //
10294
        // fallback via mbstring
10295
        //
10296
10297 3
        if (self::$SUPPORT['mbstring'] === true) {
10298 3
            if ($encoding === 'UTF-8') {
10299 3
                return \mb_strrichr($haystack, $needle, $before_needle);
10300
            }
10301
10302 2
            return \mb_strrichr($haystack, $needle, $before_needle, $encoding);
10303
        }
10304
10305
        //
10306
        // fallback via vanilla php
10307
        //
10308
10309
        $needle_tmp = self::substr($needle, 0, 1, $encoding);
10310
        if ($needle_tmp === false) {
10311
            return false;
10312
        }
10313
        $needle = $needle_tmp;
10314
10315
        $pos = self::strripos($haystack, $needle, 0, $encoding);
10316
        if ($pos === false) {
10317
            return false;
10318
        }
10319
10320
        if ($before_needle) {
10321
            return self::substr($haystack, 0, $pos, $encoding);
10322
        }
10323
10324
        return self::substr($haystack, $pos, null, $encoding);
10325
    }
10326
10327
    /**
10328
     * Find the position of the last occurrence of a substring in a string, case-insensitive.
10329
     *
10330
     * EXAMPLE: <code>UTF8::strripos('ABC-ÖÄÜ-中文空白-中文空白', '中'); // 13</code>
10331
     *
10332
     * @param string     $haystack   <p>The string to look in.</p>
10333
     * @param int|string $needle     <p>The string to look for.</p>
10334
     * @param int        $offset     [optional] <p>Number of characters to ignore in the beginning or end.</p>
10335
     * @param string     $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
10336
     * @param bool       $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
10337
     *
10338
     * @psalm-pure
10339
     *
10340
     * @return false|int
10341
     *                   <p>The <strong>(int)</strong> numeric position of the last occurrence of needle in the haystack
10342
     *                   string.<br>If needle is not found, it returns false.</p>
10343
     */
10344 14
    public static function strripos(
10345
        string $haystack,
10346
        $needle,
10347
        int $offset = 0,
10348
        string $encoding = 'UTF-8',
10349
        bool $clean_utf8 = false
10350
    ) {
10351 14
        if ($haystack === '') {
10352 3
            if (\PHP_VERSION_ID >= 80000) {
10353
                if ($needle === '') {
10354
                    return 0;
10355
                }
10356
            } else {
10357 3
                return false;
10358
            }
10359
        }
10360
10361
        // iconv and mbstring do not support integer $needle
10362 14
        if ((int) $needle === $needle && $needle >= 0) {
10363
            $needle = (string) self::chr($needle);
10364
        }
10365 14
        $needle = (string) $needle;
10366
10367 14
        if ($haystack === '') {
10368
            if (\PHP_VERSION_ID >= 80000 && $needle === '') {
10369
                return 0;
10370
            }
10371
10372
            return false;
10373
        }
10374
10375 14
        if ($needle === '' && \PHP_VERSION_ID < 80000) {
10376 3
            return false;
10377
        }
10378
10379 14
        if ($clean_utf8) {
10380
            // mb_strripos() && iconv_strripos() is not tolerant to invalid characters
10381 3
            $needle = self::clean($needle);
10382 3
            $haystack = self::clean($haystack);
10383
        }
10384
10385 14
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
10386 9
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
10387
        }
10388
10389
        //
10390
        // fallback via mbstrig
10391
        //
10392
10393 14
        if (self::$SUPPORT['mbstring'] === true) {
10394 14
            if ($encoding === 'UTF-8') {
10395 14
                return \mb_strripos($haystack, $needle, $offset);
10396
            }
10397
10398
            return \mb_strripos($haystack, $needle, $offset, $encoding);
10399
        }
10400
10401
        //
10402
        // fallback for binary || ascii only
10403
        //
10404
10405
        if (
10406
            $encoding === 'CP850'
10407
            ||
10408
            $encoding === 'ASCII'
10409
        ) {
10410
            return \strripos($haystack, $needle, $offset);
10411
        }
10412
10413
        if (
10414
            $encoding !== 'UTF-8'
10415
            &&
10416
            self::$SUPPORT['mbstring'] === false
10417
        ) {
10418
            /**
10419
             * @psalm-suppress ImpureFunctionCall - is is only a warning
10420
             */
10421
            \trigger_error('UTF8::strripos() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
10422
        }
10423
10424
        //
10425
        // fallback via intl
10426
        //
10427
10428
        if (
10429
            $encoding === 'UTF-8' // INFO: "grapheme_strripos()" can't handle other encodings
10430
            &&
10431
            $offset >= 0 // grapheme_strripos() can't handle negative offset
10432
            &&
10433
            self::$SUPPORT['intl'] === true
10434
        ) {
10435
            $return_tmp = \grapheme_strripos($haystack, $needle, $offset);
10436
            if ($return_tmp !== false) {
10437
                return $return_tmp;
10438
            }
10439
        }
10440
10441
        //
10442
        // fallback for ascii only
10443
        //
10444
10445
        if (ASCII::is_ascii($haystack . $needle)) {
10446
            return \strripos($haystack, $needle, $offset);
10447
        }
10448
10449
        //
10450
        // fallback via vanilla php
10451
        //
10452
10453
        $haystack = self::strtocasefold($haystack, true, false, $encoding);
10454
        $needle = self::strtocasefold($needle, true, false, $encoding);
10455
10456
        return self::strrpos($haystack, $needle, $offset, $encoding, $clean_utf8);
10457
    }
10458
10459
    /**
10460
     * Finds position of last occurrence of a string within another, case-insensitive.
10461
     *
10462
     * @param string $haystack <p>
10463
     *                         The string from which to get the position of the last occurrence
10464
     *                         of needle.
10465
     *                         </p>
10466
     * @param string $needle   <p>
10467
     *                         The string to find in haystack.
10468
     *                         </p>
10469
     * @param int    $offset   [optional] <p>
10470
     *                         The position in haystack
10471
     *                         to start searching.
10472
     *                         </p>
10473
     *
10474
     * @psalm-pure
10475
     *
10476
     * @return false|int
10477
     *                   <p>eturn the numeric position of the last occurrence of needle in the
10478
     *                   haystack string, or false if needle is not found.</p>
10479
     */
10480 2
    public static function strripos_in_byte(string $haystack, string $needle, int $offset = 0)
10481
    {
10482 2
        if ($haystack === '' || $needle === '') {
10483
            return false;
10484
        }
10485
10486 2
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
10487
            // "mb_" is available if overload is used, so use it ...
10488
            return \mb_strripos($haystack, $needle, $offset, 'CP850'); // 8-BIT
10489
        }
10490
10491 2
        return \strripos($haystack, $needle, $offset);
10492
    }
10493
10494
    /**
10495
     * Find the position of the last occurrence of a substring in a string.
10496
     *
10497
     * EXAMPLE: <code>UTF8::strrpos('ABC-ÖÄÜ-中文空白-中文空白', '中'); // 13</code>
10498
     *
10499
     * @see http://php.net/manual/en/function.mb-strrpos.php
10500
     *
10501
     * @param string     $haystack   <p>The string being checked, for the last occurrence of needle</p>
10502
     * @param int|string $needle     <p>The string to find in haystack.<br>Or a code point as int.</p>
10503
     * @param int        $offset     [optional] <p>May be specified to begin searching an arbitrary number of characters
10504
     *                               into the string. Negative values will stop searching at an arbitrary point prior to
10505
     *                               the end of the string.
10506
     *                               </p>
10507
     * @param string     $encoding   [optional] <p>Set the charset.</p>
10508
     * @param bool       $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
10509
     *
10510
     * @psalm-pure
10511
     *
10512
     * @return false|int
10513
     *                   <p>The <strong>(int)</strong> numeric position of the last occurrence of needle in the haystack
10514
     *                   string.<br>If needle is not found, it returns false.</p>
10515
     */
10516 35
    public static function strrpos(
10517
        string $haystack,
10518
        $needle,
10519
        int $offset = 0,
10520
        string $encoding = 'UTF-8',
10521
        bool $clean_utf8 = false
10522
    ) {
10523 35
        if ($haystack === '') {
10524 4
            if (\PHP_VERSION_ID >= 80000) {
10525
                if ($needle === '') {
10526
                    return 0;
10527
                }
10528
            } else {
10529 4
                return false;
10530
            }
10531
        }
10532
10533
        // iconv and mbstring do not support integer $needle
10534 34
        if ((int) $needle === $needle && $needle >= 0) {
10535 1
            $needle = (string) self::chr($needle);
10536
        }
10537 34
        $needle = (string) $needle;
10538
10539 34
        if ($haystack === '') {
10540
            if (\PHP_VERSION_ID >= 80000 && $needle === '') {
10541
                return 0;
10542
            }
10543
10544
            return false;
10545
        }
10546
10547 34
        if ($needle === '' && \PHP_VERSION_ID < 80000) {
10548 2
            return false;
10549
        }
10550
10551 34
        if ($clean_utf8) {
10552
            // mb_strrpos && iconv_strrpos is not tolerant to invalid characters
10553 4
            $needle = self::clean($needle);
10554 4
            $haystack = self::clean($haystack);
10555
        }
10556
10557 34
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
10558 8
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
10559
        }
10560
10561
        //
10562
        // fallback via mbstring
10563
        //
10564
10565 34
        if (self::$SUPPORT['mbstring'] === true) {
10566 34
            if ($encoding === 'UTF-8') {
10567 34
                return \mb_strrpos($haystack, $needle, $offset);
10568
            }
10569
10570 2
            return \mb_strrpos($haystack, $needle, $offset, $encoding);
10571
        }
10572
10573
        //
10574
        // fallback for binary || ascii only
10575
        //
10576
10577
        if (
10578
            $encoding === 'CP850'
10579
            ||
10580
            $encoding === 'ASCII'
10581
        ) {
10582
            return \strrpos($haystack, $needle, $offset);
10583
        }
10584
10585
        if (
10586
            $encoding !== 'UTF-8'
10587
            &&
10588
            self::$SUPPORT['mbstring'] === false
10589
        ) {
10590
            /**
10591
             * @psalm-suppress ImpureFunctionCall - is is only a warning
10592
             */
10593
            \trigger_error('UTF8::strrpos() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
10594
        }
10595
10596
        //
10597
        // fallback via intl
10598
        //
10599
10600
        if (
10601
            $offset >= 0 // grapheme_strrpos() can't handle negative offset
10602
            &&
10603
            $encoding === 'UTF-8' // INFO: "grapheme_strrpos()" can't handle other encodings
10604
            &&
10605
            self::$SUPPORT['intl'] === true
10606
        ) {
10607
            $return_tmp = \grapheme_strrpos($haystack, $needle, $offset);
10608
            if ($return_tmp !== false) {
10609
                return $return_tmp;
10610
            }
10611
        }
10612
10613
        //
10614
        // fallback for ascii only
10615
        //
10616
10617
        if (ASCII::is_ascii($haystack . $needle)) {
10618
            return \strrpos($haystack, $needle, $offset);
10619
        }
10620
10621
        //
10622
        // fallback via vanilla php
10623
        //
10624
10625
        $haystack_tmp = null;
10626
        if ($offset > 0) {
10627
            $haystack_tmp = self::substr($haystack, $offset);
10628
        } elseif ($offset < 0) {
10629
            $haystack_tmp = self::substr($haystack, 0, $offset);
10630
            $offset = 0;
10631
        }
10632
10633
        if ($haystack_tmp !== null) {
10634
            if ($haystack_tmp === false) {
10635
                $haystack_tmp = '';
10636
            }
10637
            $haystack = (string) $haystack_tmp;
10638
        }
10639
10640
        $pos = \strrpos($haystack, $needle);
10641
        if ($pos === false) {
10642
            return false;
10643
        }
10644
10645
        /** @var false|string $str_tmp - needed for PhpStan (stubs error) */
10646
        $str_tmp = \substr($haystack, 0, $pos);
10647
        if ($str_tmp === false) {
10648
            return false;
10649
        }
10650
10651
        return $offset + (int) self::strlen($str_tmp);
10652
    }
10653
10654
    /**
10655
     * Find the position of the last occurrence of a substring in a string.
10656
     *
10657
     * @param string $haystack <p>
10658
     *                         The string being checked, for the last occurrence
10659
     *                         of needle.
10660
     *                         </p>
10661
     * @param string $needle   <p>
10662
     *                         The string to find in haystack.
10663
     *                         </p>
10664
     * @param int    $offset   [optional] <p>May be specified to begin searching an arbitrary number of characters into
10665
     *                         the string. Negative values will stop searching at an arbitrary point
10666
     *                         prior to the end of the string.
10667
     *                         </p>
10668
     *
10669
     * @psalm-pure
10670
     *
10671
     * @return false|int
10672
     *                   <p>The numeric position of the last occurrence of needle in the
10673
     *                   haystack string. If needle is not found, it returns false.</p>
10674
     */
10675 2
    public static function strrpos_in_byte(string $haystack, string $needle, int $offset = 0)
10676
    {
10677 2
        if ($haystack === '' || $needle === '') {
10678
            return false;
10679
        }
10680
10681 2
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
10682
            // "mb_" is available if overload is used, so use it ...
10683
            return \mb_strrpos($haystack, $needle, $offset, 'CP850'); // 8-BIT
10684
        }
10685
10686 2
        return \strrpos($haystack, $needle, $offset);
10687
    }
10688
10689
    /**
10690
     * Finds the length of the initial segment of a string consisting entirely of characters contained within a given
10691
     * mask.
10692
     *
10693
     * EXAMPLE: <code>UTF8::strspn('iñtërnâtiônàlizætiøn', 'itñ'); // '3'</code>
10694
     *
10695
     * @param string   $str      <p>The input string.</p>
10696
     * @param string   $mask     <p>The mask of chars</p>
10697
     * @param int      $offset   [optional]
10698
     * @param int|null $length   [optional]
10699
     * @param string   $encoding [optional] <p>Set the charset.</p>
10700
     *
10701
     * @psalm-pure
10702
     *
10703
     * @return false|int
10704
     */
10705 10
    public static function strspn(
10706
        string $str,
10707
        string $mask,
10708
        int $offset = 0,
10709
        int $length = null,
10710
        string $encoding = 'UTF-8'
10711
    ) {
10712 10
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
10713
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
10714
        }
10715
10716 10
        if ($offset || $length !== null) {
10717 2
            if ($encoding === 'UTF-8') {
10718 2
                if ($length === null) {
10719
                    $str = (string) \mb_substr($str, $offset);
10720
                } else {
10721 2
                    $str = (string) \mb_substr($str, $offset, $length);
10722
                }
10723
            } else {
10724
                $str = (string) self::substr($str, $offset, $length, $encoding);
10725
            }
10726
        }
10727
10728 10
        if ($str === '' || $mask === '') {
10729 2
            return 0;
10730
        }
10731
10732 8
        $matches = [];
10733
10734 8
        return \preg_match('/^' . self::rxClass($mask) . '+/u', $str, $matches) ? (int) self::strlen($matches[0], $encoding) : 0;
10735
    }
10736
10737
    /**
10738
     * Returns part of haystack string from the first occurrence of needle to the end of haystack.
10739
     *
10740
     * EXAMPLE: <code>
10741
     * $str = 'iñtërnâtiônàlizætiøn';
10742
     * $search = 'nât';
10743
     *
10744
     * UTF8::strstr($str, $search)); // 'nâtiônàlizætiøn'
10745
     * UTF8::strstr($str, $search, true)); // 'iñtër'
10746
     * </code>
10747
     *
10748
     * @param string $haystack      <p>The input string. Must be valid UTF-8.</p>
10749
     * @param string $needle        <p>The string to look for. Must be valid UTF-8.</p>
10750
     * @param bool   $before_needle [optional] <p>
10751
     *                              If <b>TRUE</b>, strstr() returns the part of the
10752
     *                              haystack before the first occurrence of the needle (excluding the needle).
10753
     *                              </p>
10754
     * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
10755
     * @param bool   $clean_utf8    [optional] <p>Remove non UTF-8 chars from the string.</p>
10756
     *
10757
     * @psalm-pure
10758
     *
10759
     * @return false|string
10760
     *                      <p>A sub-string,<br>or <strong>false</strong> if needle is not found.</p>
10761
     */
10762 3
    public static function strstr(
10763
        string $haystack,
10764
        string $needle,
10765
        bool $before_needle = false,
10766
        string $encoding = 'UTF-8',
10767
        bool $clean_utf8 = false
10768
    ) {
10769 3
        if ($haystack === '') {
10770 2
            if (\PHP_VERSION_ID >= 80000 && $needle === '') {
10771
                return '';
10772
            }
10773
10774 2
            return false;
10775
        }
10776
10777 3
        if ($clean_utf8) {
10778
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
10779
            // if invalid characters are found in $haystack before $needle
10780
            $needle = self::clean($needle);
10781
            $haystack = self::clean($haystack);
10782
        }
10783
10784 3
        if ($needle === '') {
10785 1
            if (\PHP_VERSION_ID >= 80000) {
10786
                return $haystack;
10787
            }
10788
10789 1
            return false;
10790
        }
10791
10792 3
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
10793 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
10794
        }
10795
10796
        //
10797
        // fallback via mbstring
10798
        //
10799
10800 3
        if (self::$SUPPORT['mbstring'] === true) {
10801 3
            if ($encoding === 'UTF-8') {
10802 3
                return \mb_strstr($haystack, $needle, $before_needle);
10803
            }
10804
10805 2
            return \mb_strstr($haystack, $needle, $before_needle, $encoding);
10806
        }
10807
10808
        //
10809
        // fallback for binary || ascii only
10810
        //
10811
10812
        if (
10813
            $encoding === 'CP850'
10814
            ||
10815
            $encoding === 'ASCII'
10816
        ) {
10817
            return \strstr($haystack, $needle, $before_needle);
10818
        }
10819
10820
        if (
10821
            $encoding !== 'UTF-8'
10822
            &&
10823
            self::$SUPPORT['mbstring'] === false
10824
        ) {
10825
            /**
10826
             * @psalm-suppress ImpureFunctionCall - is is only a warning
10827
             */
10828
            \trigger_error('UTF8::strstr() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
10829
        }
10830
10831
        //
10832
        // fallback via intl
10833
        //
10834
10835
        if (
10836
            $encoding === 'UTF-8' // INFO: "grapheme_strstr()" can't handle other encodings
10837
            &&
10838
            self::$SUPPORT['intl'] === true
10839
        ) {
10840
            $return_tmp = \grapheme_strstr($haystack, $needle, $before_needle);
10841
            if ($return_tmp !== false) {
10842
                return $return_tmp;
10843
            }
10844
        }
10845
10846
        //
10847
        // fallback for ascii only
10848
        //
10849
10850
        if (ASCII::is_ascii($haystack . $needle)) {
10851
            return \strstr($haystack, $needle, $before_needle);
10852
        }
10853
10854
        //
10855
        // fallback via vanilla php
10856
        //
10857
10858
        \preg_match('/^(.*?)' . \preg_quote($needle, '/') . '/us', $haystack, $match);
10859
10860
        if (!isset($match[1])) {
10861
            return false;
10862
        }
10863
10864
        if ($before_needle) {
10865
            return $match[1];
10866
        }
10867
10868
        return self::substr($haystack, (int) self::strlen($match[1]));
10869
    }
10870
10871
    /**
10872
     * Finds first occurrence of a string within another.
10873
     *
10874
     * @param string $haystack      <p>
10875
     *                              The string from which to get the first occurrence
10876
     *                              of needle.
10877
     *                              </p>
10878
     * @param string $needle        <p>
10879
     *                              The string to find in haystack.
10880
     *                              </p>
10881
     * @param bool   $before_needle [optional] <p>
10882
     *                              Determines which portion of haystack
10883
     *                              this function returns.
10884
     *                              If set to true, it returns all of haystack
10885
     *                              from the beginning to the first occurrence of needle.
10886
     *                              If set to false, it returns all of haystack
10887
     *                              from the first occurrence of needle to the end,
10888
     *                              </p>
10889
     *
10890
     * @psalm-pure
10891
     *
10892
     * @return false|string
10893
     *                      <p>The portion of haystack,
10894
     *                      or false if needle is not found.</p>
10895
     */
10896 2
    public static function strstr_in_byte(
10897
        string $haystack,
10898
        string $needle,
10899
        bool $before_needle = false
10900
    ) {
10901 2
        if ($haystack === '' || $needle === '') {
10902
            return false;
10903
        }
10904
10905 2
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
10906
            // "mb_" is available if overload is used, so use it ...
10907
            return \mb_strstr($haystack, $needle, $before_needle, 'CP850'); // 8-BIT
10908
        }
10909
10910 2
        return \strstr($haystack, $needle, $before_needle);
10911
    }
10912
10913
    /**
10914
     * Unicode transformation for case-less matching.
10915
     *
10916
     * EXAMPLE: <code>UTF8::strtocasefold('ǰ◌̱'); // 'ǰ◌̱'</code>
10917
     *
10918
     * @see http://unicode.org/reports/tr21/tr21-5.html
10919
     *
10920
     * @param string      $str        <p>The input string.</p>
10921
     * @param bool        $full       [optional] <p>
10922
     *                                <b>true</b>, replace full case folding chars (default)<br>
10923
     *                                <b>false</b>, use only limited static array [UTF8::$COMMON_CASE_FOLD]
10924
     *                                </p>
10925
     * @param bool        $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
10926
     * @param string      $encoding   [optional] <p>Set the charset.</p>
10927
     * @param string|null $lang       [optional] <p>Set the language for special cases: az, el, lt, tr</p>
10928
     * @param bool        $lower      [optional] <p>Use lowercase string, otherwise use uppercase string. PS: uppercase
10929
     *                                is for some languages better ...</p>
10930
     *
10931
     * @psalm-pure
10932
     *
10933
     * @return string
10934
     */
10935 32
    public static function strtocasefold(
10936
        string $str,
10937
        bool $full = true,
10938
        bool $clean_utf8 = false,
10939
        string $encoding = 'UTF-8',
10940
        string $lang = null,
10941
        bool $lower = true
10942
    ): string {
10943 32
        if ($str === '') {
10944 5
            return '';
10945
        }
10946
10947 31
        if ($clean_utf8) {
10948
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
10949
            // if invalid characters are found in $haystack before $needle
10950 2
            $str = self::clean($str);
10951
        }
10952
10953 31
        $str = self::fixStrCaseHelper($str, $lower, $full);
10954
10955 31
        if ($lang === null && $encoding === 'UTF-8') {
10956 31
            if ($lower) {
10957 2
                return \mb_strtolower($str);
10958
            }
10959
10960 29
            return \mb_strtoupper($str);
10961
        }
10962
10963 2
        if ($lower) {
10964
            return self::strtolower($str, $encoding, false, $lang);
10965
        }
10966
10967 2
        return self::strtoupper($str, $encoding, false, $lang);
10968
    }
10969
10970
    /**
10971
     * Make a string lowercase.
10972
     *
10973
     * EXAMPLE: <code>UTF8::strtolower('DÉJÀ Σσς Iıİi'); // 'déjà σσς iıii'</code>
10974
     *
10975
     * @see http://php.net/manual/en/function.mb-strtolower.php
10976
     *
10977
     * @param string      $str                           <p>The string being lowercased.</p>
10978
     * @param string      $encoding                      [optional] <p>Set the charset for e.g. "mb_" function</p>
10979
     * @param bool        $clean_utf8                    [optional] <p>Remove non UTF-8 chars from the string.</p>
10980
     * @param string|null $lang                          [optional] <p>Set the language for special cases: az, el, lt,
10981
     *                                                   tr</p>
10982
     * @param bool        $try_to_keep_the_string_length [optional] <p>true === try to keep the string length: e.g. ẞ
10983
     *                                                   -> ß</p>
10984
     *
10985
     * @psalm-pure
10986
     *
10987
     * @return string
10988
     *                <p>String with all alphabetic characters converted to lowercase.</p>
10989
     */
10990 73
    public static function strtolower(
10991
        $str,
10992
        string $encoding = 'UTF-8',
10993
        bool $clean_utf8 = false,
10994
        string $lang = null,
10995
        bool $try_to_keep_the_string_length = false
10996
    ): string {
10997
        // init
10998 73
        $str = (string) $str;
10999
11000 73
        if ($str === '') {
11001 1
            return '';
11002
        }
11003
11004 72
        if ($clean_utf8) {
11005
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
11006
            // if invalid characters are found in $haystack before $needle
11007 2
            $str = self::clean($str);
11008
        }
11009
11010
        // hack for old php version or for the polyfill ...
11011 72
        if ($try_to_keep_the_string_length) {
11012
            $str = self::fixStrCaseHelper($str, true);
11013
        }
11014
11015 72
        if ($lang === null && $encoding === 'UTF-8') {
11016 13
            return \mb_strtolower($str);
11017
        }
11018
11019 61
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
11020
11021 61
        if ($lang !== null) {
11022 2
            if (self::$SUPPORT['intl'] === true) {
11023 2
                if (self::$INTL_TRANSLITERATOR_LIST === null) {
11024
                    self::$INTL_TRANSLITERATOR_LIST = self::getData('transliterator_list');
11025
                }
11026
11027 2
                $language_code = $lang . '-Lower';
11028 2
                if (!\in_array($language_code, self::$INTL_TRANSLITERATOR_LIST, true)) {
11029
                    /**
11030
                     * @psalm-suppress ImpureFunctionCall - is is only a warning
11031
                     */
11032
                    \trigger_error('UTF8::strtolower() cannot handle special language: ' . $lang . ' | supported: ' . \print_r(self::$INTL_TRANSLITERATOR_LIST, true), \E_USER_WARNING);
0 ignored issues
show
Bug introduced by
Are you sure print_r(self::INTL_TRANSLITERATOR_LIST, true) of type string|true can be used in concatenation? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

11032
                    \trigger_error('UTF8::strtolower() cannot handle special language: ' . $lang . ' | supported: ' . /** @scrutinizer ignore-type */ \print_r(self::$INTL_TRANSLITERATOR_LIST, true), \E_USER_WARNING);
Loading history...
11033
11034
                    $language_code = 'Any-Lower';
11035
                }
11036
11037 2
                return (string) \transliterator_transliterate($language_code, $str);
11038
            }
11039
11040
            /**
11041
             * @psalm-suppress ImpureFunctionCall - is is only a warning
11042
             */
11043
            \trigger_error('UTF8::strtolower() without intl cannot handle the "lang" parameter: ' . $lang, \E_USER_WARNING);
11044
        }
11045
11046
        // always fallback via symfony polyfill
11047 61
        return \mb_strtolower($str, $encoding);
11048
    }
11049
11050
    /**
11051
     * Make a string uppercase.
11052
     *
11053
     * EXAMPLE: <code>UTF8::strtoupper('Déjà Σσς Iıİi'); // 'DÉJÀ ΣΣΣ IIİI'</code>
11054
     *
11055
     * @see http://php.net/manual/en/function.mb-strtoupper.php
11056
     *
11057
     * @param string      $str                           <p>The string being uppercased.</p>
11058
     * @param string      $encoding                      [optional] <p>Set the charset.</p>
11059
     * @param bool        $clean_utf8                    [optional] <p>Remove non UTF-8 chars from the string.</p>
11060
     * @param string|null $lang                          [optional] <p>Set the language for special cases: az, el, lt,
11061
     *                                                   tr</p>
11062
     * @param bool        $try_to_keep_the_string_length [optional] <p>true === try to keep the string length: e.g. ẞ
11063
     *                                                   -> ß</p>
11064
     *
11065
     * @psalm-pure
11066
     *
11067
     * @return string
11068
     *                <p>String with all alphabetic characters converted to uppercase.</p>
11069
     */
11070 17
    public static function strtoupper(
11071
        $str,
11072
        string $encoding = 'UTF-8',
11073
        bool $clean_utf8 = false,
11074
        string $lang = null,
11075
        bool $try_to_keep_the_string_length = false
11076
    ): string {
11077
        // init
11078 17
        $str = (string) $str;
11079
11080 17
        if ($str === '') {
11081 1
            return '';
11082
        }
11083
11084 16
        if ($clean_utf8) {
11085
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
11086
            // if invalid characters are found in $haystack before $needle
11087 2
            $str = self::clean($str);
11088
        }
11089
11090
        // hack for old php version or for the polyfill ...
11091 16
        if ($try_to_keep_the_string_length) {
11092 2
            $str = self::fixStrCaseHelper($str);
11093
        }
11094
11095 16
        if ($lang === null && $encoding === 'UTF-8') {
11096 8
            return \mb_strtoupper($str);
11097
        }
11098
11099 10
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
11100
11101 10
        if ($lang !== null) {
11102 2
            if (self::$SUPPORT['intl'] === true) {
11103 2
                if (self::$INTL_TRANSLITERATOR_LIST === null) {
11104
                    self::$INTL_TRANSLITERATOR_LIST = self::getData('transliterator_list');
11105
                }
11106
11107 2
                $language_code = $lang . '-Upper';
11108 2
                if (!\in_array($language_code, self::$INTL_TRANSLITERATOR_LIST, true)) {
11109
                    /**
11110
                     * @psalm-suppress ImpureFunctionCall - is is only a warning
11111
                     */
11112
                    \trigger_error('UTF8::strtoupper() without intl for special language: ' . $lang, \E_USER_WARNING);
11113
11114
                    $language_code = 'Any-Upper';
11115
                }
11116
11117 2
                return (string) \transliterator_transliterate($language_code, $str);
11118
            }
11119
11120
            /**
11121
             * @psalm-suppress ImpureFunctionCall - is is only a warning
11122
             */
11123
            \trigger_error('UTF8::strtolower() without intl cannot handle the "lang"-parameter: ' . $lang, \E_USER_WARNING);
11124
        }
11125
11126
        // always fallback via symfony polyfill
11127 10
        return \mb_strtoupper($str, $encoding);
11128
    }
11129
11130
    /**
11131
     * Translate characters or replace sub-strings.
11132
     *
11133
     * EXAMPLE:
11134
     * <code>
11135
     * $array = [
11136
     *     'Hello'   => '○●◎',
11137
     *     '中文空白' => 'earth',
11138
     * ];
11139
     * UTF8::strtr('Hello 中文空白', $array); // '○●◎ earth'
11140
     * </code>
11141
     *
11142
     * @see http://php.net/manual/en/function.strtr.php
11143
     *
11144
     * @param string          $str  <p>The string being translated.</p>
11145
     * @param string|string[] $from <p>The string replacing from.</p>
11146
     * @param string|string[] $to   [optional] <p>The string being translated to to.</p>
11147
     *
11148
     * @psalm-pure
11149
     *
11150
     * @return string
11151
     *                <p>This function returns a copy of str, translating all occurrences of each character in "from"
11152
     *                to the corresponding character in "to".</p>
11153
     */
11154 2
    public static function strtr(string $str, $from, $to = ''): string
11155
    {
11156 2
        if ($str === '') {
11157
            return '';
11158
        }
11159
11160 2
        if ($from === $to) {
11161
            return $str;
11162
        }
11163
11164 2
        if ($to !== '') {
11165 2
            if (!\is_array($from)) {
11166 2
                $from = self::str_split($from);
11167
            }
11168
11169 2
            if (!\is_array($to)) {
11170 2
                $to = self::str_split($to);
11171
            }
11172
11173 2
            $count_from = \count($from);
11174 2
            $count_to = \count($to);
11175
11176 2
            if ($count_from !== $count_to) {
11177 2
                if ($count_from > $count_to) {
11178 2
                    $from = \array_slice($from, 0, $count_to);
11179 2
                } elseif ($count_from < $count_to) {
11180 2
                    $to = \array_slice($to, 0, $count_from);
11181
                }
11182
            }
11183
11184 2
            $from = \array_combine($from, $to);
11185 2
            if ($from === false) {
11186
                throw new \InvalidArgumentException('The number of elements for each array isn\'t equal or the arrays are empty: (from: ' . \print_r($from, true) . ' | to: ' . \print_r($to, true) . ')');
0 ignored issues
show
Bug introduced by
Are you sure print_r($from, true) of type string|true can be used in concatenation? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

11186
                throw new \InvalidArgumentException('The number of elements for each array isn\'t equal or the arrays are empty: (from: ' . /** @scrutinizer ignore-type */ \print_r($from, true) . ' | to: ' . \print_r($to, true) . ')');
Loading history...
Bug introduced by
Are you sure print_r($to, true) of type string|true can be used in concatenation? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

11186
                throw new \InvalidArgumentException('The number of elements for each array isn\'t equal or the arrays are empty: (from: ' . \print_r($from, true) . ' | to: ' . /** @scrutinizer ignore-type */ \print_r($to, true) . ')');
Loading history...
11187
            }
11188
        }
11189
11190 2
        if (\is_string($from)) {
11191 2
            return \str_replace($from, $to, $str);
0 ignored issues
show
Bug introduced by
It seems like $to can also be of type array<mixed,string[]>; however, parameter $replace of str_replace() does only seem to accept string|string[], maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

11191
            return \str_replace($from, /** @scrutinizer ignore-type */ $to, $str);
Loading history...
11192
        }
11193
11194 2
        return \strtr($str, $from);
11195
    }
11196
11197
    /**
11198
     * Return the width of a string.
11199
     *
11200
     * INFO: use UTF8::strlen() for the byte-length
11201
     *
11202
     * EXAMPLE: <code>UTF8::strwidth("Iñtërnâtiôn\xE9àlizætiøn")); // 21</code>
11203
     *
11204
     * @param string $str        <p>The input string.</p>
11205
     * @param string $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
11206
     * @param bool   $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
11207
     *
11208
     * @psalm-pure
11209
     *
11210
     * @return int
11211
     */
11212 2
    public static function strwidth(
11213
        string $str,
11214
        string $encoding = 'UTF-8',
11215
        bool $clean_utf8 = false
11216
    ): int {
11217 2
        if ($str === '') {
11218 2
            return 0;
11219
        }
11220
11221 2
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
11222 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
11223
        }
11224
11225 2
        if ($clean_utf8) {
11226
            // iconv and mbstring are not tolerant to invalid encoding
11227
            // further, their behaviour is inconsistent with that of PHP's substr
11228 2
            $str = self::clean($str);
11229
        }
11230
11231
        //
11232
        // fallback via mbstring
11233
        //
11234
11235 2
        if (self::$SUPPORT['mbstring'] === true) {
11236 2
            if ($encoding === 'UTF-8') {
11237 2
                return \mb_strwidth($str);
11238
            }
11239
11240
            return \mb_strwidth($str, $encoding);
11241
        }
11242
11243
        //
11244
        // fallback via vanilla php
11245
        //
11246
11247
        if ($encoding !== 'UTF-8') {
11248
            $str = self::encode('UTF-8', $str, false, $encoding);
11249
        }
11250
11251
        $wide = 0;
11252
        $str = (string) \preg_replace('/[\x{1100}-\x{115F}\x{2329}\x{232A}\x{2E80}-\x{303E}\x{3040}-\x{A4CF}\x{AC00}-\x{D7A3}\x{F900}-\x{FAFF}\x{FE10}-\x{FE19}\x{FE30}-\x{FE6F}\x{FF00}-\x{FF60}\x{FFE0}-\x{FFE6}\x{20000}-\x{2FFFD}\x{30000}-\x{3FFFD}]/u', '', $str, -1, $wide);
11253
11254
        return ($wide << 1) + (int) self::strlen($str);
11255
    }
11256
11257
    /**
11258
     * Get part of a string.
11259
     *
11260
     * EXAMPLE: <code>UTF8::substr('中文空白', 1, 2); // '文空'</code>
11261
     *
11262
     * @see http://php.net/manual/en/function.mb-substr.php
11263
     *
11264
     * @param string   $str        <p>The string being checked.</p>
11265
     * @param int      $offset     <p>The first position used in str.</p>
11266
     * @param int|null $length     [optional] <p>The maximum length of the returned string.</p>
11267
     * @param string   $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
11268
     * @param bool     $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
11269
     *
11270
     * @psalm-pure
11271
     *
11272
     * @return false|string
11273
     *                      The portion of <i>str</i> specified by the <i>offset</i> and
11274
     *                      <i>length</i> parameters.</p><p>If <i>str</i> is shorter than <i>offset</i>
11275
     *                      characters long, <b>FALSE</b> will be returned.
11276
     */
11277 172
    public static function substr(
11278
        string $str,
11279
        int $offset = 0,
11280
        int $length = null,
11281
        string $encoding = 'UTF-8',
11282
        bool $clean_utf8 = false
11283
    ) {
11284
        // empty string
11285 172
        if ($str === '' || $length === 0) {
11286 8
            return '';
11287
        }
11288
11289 168
        if ($clean_utf8) {
11290
            // iconv and mbstring are not tolerant to invalid encoding
11291
            // further, their behaviour is inconsistent with that of PHP's substr
11292 2
            $str = self::clean($str);
11293
        }
11294
11295
        // whole string
11296 168
        if (!$offset && $length === null) {
11297 7
            return $str;
11298
        }
11299
11300 163
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
11301 19
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
11302
        }
11303
11304
        //
11305
        // fallback via mbstring
11306
        //
11307
11308 163
        if (self::$SUPPORT['mbstring'] === true && $encoding === 'UTF-8') {
11309 161
            if ($length === null) {
11310 64
                return \mb_substr($str, $offset);
11311
            }
11312
11313 102
            return \mb_substr($str, $offset, $length);
11314
        }
11315
11316
        //
11317
        // fallback for binary || ascii only
11318
        //
11319
11320
        if (
11321 4
            $encoding === 'CP850'
11322
            ||
11323 4
            $encoding === 'ASCII'
11324
        ) {
11325
            if ($length === null) {
11326
                return \substr($str, $offset);
11327
            }
11328
11329
            return \substr($str, $offset, $length);
11330
        }
11331
11332
        // otherwise we need the string-length
11333 4
        $str_length = 0;
11334 4
        if ($offset || $length === null) {
11335 4
            $str_length = self::strlen($str, $encoding);
11336
        }
11337
11338
        // e.g.: invalid chars + mbstring not installed
11339 4
        if ($str_length === false) {
11340
            return false;
11341
        }
11342
11343
        // empty string
11344 4
        if ($offset === $str_length && !$length) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $length of type integer|null is loosely compared to false; this is ambiguous if the integer can be 0. You might want to explicitly use === null instead.

In PHP, under loose comparison (like ==, or !=, or switch conditions), values of different types might be equal.

For integer values, zero is a special case, in particular the following results might be unexpected:

0   == false // true
0   == null  // true
123 == false // false
123 == null  // false

// It is often better to use strict comparison
0 === false // false
0 === null  // false
Loading history...
11345
            return '';
11346
        }
11347
11348
        // impossible
11349 4
        if ($offset && $offset > $str_length) {
11350
            return '';
11351
        }
11352
11353 4
        $length = $length ?? $str_length;
11354
11355
        if (
11356 4
            $encoding !== 'UTF-8'
11357
            &&
11358 4
            self::$SUPPORT['mbstring'] === false
11359
        ) {
11360
            /**
11361
             * @psalm-suppress ImpureFunctionCall - is is only a warning
11362
             */
11363 2
            \trigger_error('UTF8::substr() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
11364
        }
11365
11366
        //
11367
        // fallback via intl
11368
        //
11369
11370
        if (
11371 4
            $encoding === 'UTF-8' // INFO: "grapheme_substr()" can't handle other encodings
11372
            &&
11373 4
            $offset >= 0 // grapheme_substr() can't handle negative offset
11374
            &&
11375 4
            self::$SUPPORT['intl'] === true
11376
        ) {
11377
            $return_tmp = \grapheme_substr($str, $offset, $length);
11378
            if ($return_tmp !== false) {
11379
                return $return_tmp;
11380
            }
11381
        }
11382
11383
        //
11384
        // fallback via iconv
11385
        //
11386
11387
        if (
11388 4
            $length >= 0 // "iconv_substr()" can't handle negative length
11389
            &&
11390 4
            self::$SUPPORT['iconv'] === true
11391
        ) {
11392
            $return_tmp = \iconv_substr($str, $offset, $length);
11393
            if ($return_tmp !== false) {
11394
                return $return_tmp;
11395
            }
11396
        }
11397
11398
        //
11399
        // fallback for ascii only
11400
        //
11401
11402 4
        if (ASCII::is_ascii($str)) {
11403
            return \substr($str, $offset, $length);
11404
        }
11405
11406
        //
11407
        // fallback via vanilla php
11408
        //
11409
11410
        // split to array, and remove invalid characters
11411
        // &&
11412
        // extract relevant part, and join to make sting again
11413 4
        return \implode('', \array_slice(self::str_split($str), $offset, $length));
11414
    }
11415
11416
    /**
11417
     * Binary-safe comparison of two strings from an offset, up to a length of characters.
11418
     *
11419
     * EXAMPLE: <code>
11420
     * UTF8::substr_compare("○●◎\r", '●◎', 0, 2); // -1
11421
     * UTF8::substr_compare("○●◎\r", '◎●', 1, 2); // 1
11422
     * UTF8::substr_compare("○●◎\r", '●◎', 1, 2); // 0
11423
     * </code>
11424
     *
11425
     * @param string   $str1               <p>The main string being compared.</p>
11426
     * @param string   $str2               <p>The secondary string being compared.</p>
11427
     * @param int      $offset             [optional] <p>The start position for the comparison. If negative, it starts
11428
     *                                     counting from the end of the string.</p>
11429
     * @param int|null $length             [optional] <p>The length of the comparison. The default value is the largest
11430
     *                                     of the length of the str compared to the length of main_str less the
11431
     *                                     offset.</p>
11432
     * @param bool     $case_insensitivity [optional] <p>If case_insensitivity is TRUE, comparison is case
11433
     *                                     insensitive.</p>
11434
     * @param string   $encoding           [optional] <p>Set the charset for e.g. "mb_" function</p>
11435
     *
11436
     * @psalm-pure
11437
     *
11438
     * @return int
11439
     *             <strong>&lt; 0</strong> if str1 is less than str2;<br>
11440
     *             <strong>&gt; 0</strong> if str1 is greater than str2,<br>
11441
     *             <strong>0</strong> if they are equal
11442
     */
11443 2
    public static function substr_compare(
11444
        string $str1,
11445
        string $str2,
11446
        int $offset = 0,
11447
        int $length = null,
11448
        bool $case_insensitivity = false,
11449
        string $encoding = 'UTF-8'
11450
    ): int {
11451
        if (
11452 2
            $offset !== 0
11453
            ||
11454 2
            $length !== null
11455
        ) {
11456 2
            if ($encoding === 'UTF-8') {
11457 2
                if ($length === null) {
11458 2
                    $str1 = (string) \mb_substr($str1, $offset);
11459
                } else {
11460 2
                    $str1 = (string) \mb_substr($str1, $offset, $length);
11461
                }
11462 2
                $str2 = (string) \mb_substr($str2, 0, (int) self::strlen($str1));
11463
            } else {
11464
                $encoding = self::normalize_encoding($encoding, 'UTF-8');
11465
11466
                $str1 = (string) self::substr($str1, $offset, $length, $encoding);
11467
                $str2 = (string) self::substr($str2, 0, (int) self::strlen($str1), $encoding);
11468
            }
11469
        }
11470
11471 2
        if ($case_insensitivity) {
11472 2
            return self::strcasecmp($str1, $str2, $encoding);
11473
        }
11474
11475 2
        return self::strcmp($str1, $str2);
11476
    }
11477
11478
    /**
11479
     * Count the number of substring occurrences.
11480
     *
11481
     * EXAMPLE: <code>UTF8::substr_count('中文空白', '文空', 1, 2); // 1</code>
11482
     *
11483
     * @see http://php.net/manual/en/function.substr-count.php
11484
     *
11485
     * @param string   $haystack   <p>The string to search in.</p>
11486
     * @param string   $needle     <p>The substring to search for.</p>
11487
     * @param int      $offset     [optional] <p>The offset where to start counting.</p>
11488
     * @param int|null $length     [optional] <p>
11489
     *                             The maximum length after the specified offset to search for the
11490
     *                             substring. It outputs a warning if the offset plus the length is
11491
     *                             greater than the haystack length.
11492
     *                             </p>
11493
     * @param string   $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
11494
     * @param bool     $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
11495
     *
11496
     * @psalm-pure
11497
     *
11498
     * @return false|int
11499
     *                   <p>This functions returns an integer or false if there isn't a string.</p>
11500
     */
11501 5
    public static function substr_count(
11502
        string $haystack,
11503
        string $needle,
11504
        int $offset = 0,
11505
        int $length = null,
11506
        string $encoding = 'UTF-8',
11507
        bool $clean_utf8 = false
11508
    ) {
11509 5
        if ($needle === '') {
11510 2
            return false;
11511
        }
11512
11513 5
        if ($haystack === '') {
11514 2
            if (\PHP_VERSION_ID >= 80000) {
11515
                return 0;
11516
            }
11517
11518 2
            return 0;
11519
        }
11520
11521 5
        if ($length === 0) {
11522 2
            return 0;
11523
        }
11524
11525 5
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
11526 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
11527
        }
11528
11529 5
        if ($clean_utf8) {
11530
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
11531
            // if invalid characters are found in $haystack before $needle
11532
            $needle = self::clean($needle);
11533
            $haystack = self::clean($haystack);
11534
        }
11535
11536 5
        if ($offset || $length > 0) {
11537 2
            if ($length === null) {
11538 2
                $length_tmp = self::strlen($haystack, $encoding);
11539 2
                if ($length_tmp === false) {
11540
                    return false;
11541
                }
11542 2
                $length = $length_tmp;
11543
            }
11544
11545 2
            if ($encoding === 'UTF-8') {
11546 2
                $haystack = (string) \mb_substr($haystack, $offset, $length);
11547
            } else {
11548 2
                $haystack = (string) \mb_substr($haystack, $offset, $length, $encoding);
11549
            }
11550
        }
11551
11552
        if (
11553 5
            $encoding !== 'UTF-8'
11554
            &&
11555 5
            self::$SUPPORT['mbstring'] === false
11556
        ) {
11557
            /**
11558
             * @psalm-suppress ImpureFunctionCall - is is only a warning
11559
             */
11560
            \trigger_error('UTF8::substr_count() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
11561
        }
11562
11563 5
        if (self::$SUPPORT['mbstring'] === true) {
11564 5
            if ($encoding === 'UTF-8') {
11565 5
                return \mb_substr_count($haystack, $needle);
11566
            }
11567
11568 2
            return \mb_substr_count($haystack, $needle, $encoding);
11569
        }
11570
11571
        \preg_match_all('/' . \preg_quote($needle, '/') . '/us', $haystack, $matches, \PREG_SET_ORDER);
11572
11573
        return \count($matches);
11574
    }
11575
11576
    /**
11577
     * Count the number of substring occurrences.
11578
     *
11579
     * @param string   $haystack <p>
11580
     *                           The string being checked.
11581
     *                           </p>
11582
     * @param string   $needle   <p>
11583
     *                           The string being found.
11584
     *                           </p>
11585
     * @param int      $offset   [optional] <p>
11586
     *                           The offset where to start counting
11587
     *                           </p>
11588
     * @param int|null $length   [optional] <p>
11589
     *                           The maximum length after the specified offset to search for the
11590
     *                           substring. It outputs a warning if the offset plus the length is
11591
     *                           greater than the haystack length.
11592
     *                           </p>
11593
     *
11594
     * @psalm-pure
11595
     *
11596
     * @return false|int
11597
     *                   <p>The number of times the
11598
     *                   needle substring occurs in the
11599
     *                   haystack string.</p>
11600
     */
11601 4
    public static function substr_count_in_byte(
11602
        string $haystack,
11603
        string $needle,
11604
        int $offset = 0,
11605
        int $length = null
11606
    ) {
11607 4
        if ($haystack === '' || $needle === '') {
11608 1
            return 0;
11609
        }
11610
11611
        if (
11612 3
            ($offset || $length !== null)
11613
            &&
11614 3
            self::$SUPPORT['mbstring_func_overload'] === true
11615
        ) {
11616
            if ($length === null) {
11617
                $length_tmp = self::strlen($haystack);
11618
                if ($length_tmp === false) {
11619
                    return false;
11620
                }
11621
                $length = $length_tmp;
11622
            }
11623
11624
            if (
11625
                (
11626
                    $length !== 0
11627
                    &&
11628
                    $offset !== 0
11629
                )
11630
                &&
11631
                ($length + $offset) <= 0
11632
                &&
11633
                \PHP_VERSION_ID < 71000 // output from "substr_count()" have changed in PHP 7.1
11634
            ) {
11635
                return false;
11636
            }
11637
11638
            /** @var false|string $haystack_tmp - needed for PhpStan (stubs error) */
11639
            $haystack_tmp = \substr($haystack, $offset, $length);
11640
            if ($haystack_tmp === false) {
11641
                $haystack_tmp = '';
11642
            }
11643
            $haystack = (string) $haystack_tmp;
11644
        }
11645
11646 3
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
11647
            // "mb_" is available if overload is used, so use it ...
11648
            return \mb_substr_count($haystack, $needle, 'CP850'); // 8-BIT
11649
        }
11650
11651 3
        if ($length === null) {
11652 3
            return \substr_count($haystack, $needle, $offset);
11653
        }
11654
11655
        return \substr_count($haystack, $needle, $offset, $length);
11656
    }
11657
11658
    /**
11659
     * Returns the number of occurrences of $substring in the given string.
11660
     * By default, the comparison is case-sensitive, but can be made insensitive
11661
     * by setting $case_sensitive to false.
11662
     *
11663
     * @param string $str            <p>The input string.</p>
11664
     * @param string $substring      <p>The substring to search for.</p>
11665
     * @param bool   $case_sensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
11666
     * @param string $encoding       [optional] <p>Set the charset for e.g. "mb_" function</p>
11667
     *
11668
     * @psalm-pure
11669
     *
11670
     * @return int
11671
     */
11672 15
    public static function substr_count_simple(
11673
        string $str,
11674
        string $substring,
11675
        bool $case_sensitive = true,
11676
        string $encoding = 'UTF-8'
11677
    ): int {
11678 15
        if ($str === '' || $substring === '') {
11679 2
            return 0;
11680
        }
11681
11682 13
        if ($encoding === 'UTF-8') {
11683 7
            if ($case_sensitive) {
11684
                return (int) \mb_substr_count($str, $substring);
11685
            }
11686
11687 7
            return (int) \mb_substr_count(
11688 7
                \mb_strtoupper($str),
11689 7
                \mb_strtoupper($substring)
11690
            );
11691
        }
11692
11693 6
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
11694
11695 6
        if ($case_sensitive) {
11696 3
            return (int) \mb_substr_count($str, $substring, $encoding);
11697
        }
11698
11699 3
        return (int) \mb_substr_count(
11700 3
            self::strtocasefold($str, true, false, $encoding, null, false),
11701 3
            self::strtocasefold($substring, true, false, $encoding, null, false),
11702 3
            $encoding
11703
        );
11704
    }
11705
11706
    /**
11707
     * Removes a prefix ($needle) from the beginning of the string ($haystack), case-insensitive.
11708
     *
11709
     * EXMAPLE: <code>
11710
     * UTF8::substr_ileft('ΚόσμεMiddleEnd', 'Κόσμε'); // 'MiddleEnd'
11711
     * UTF8::substr_ileft('ΚόσμεMiddleEnd', 'κόσμε'); // 'MiddleEnd'
11712
     * </code>
11713
     *
11714
     * @param string $haystack <p>The string to search in.</p>
11715
     * @param string $needle   <p>The substring to search for.</p>
11716
     *
11717
     * @psalm-pure
11718
     *
11719
     * @return string
11720
     *                <p>Return the sub-string.</p>
11721
     */
11722 2
    public static function substr_ileft(string $haystack, string $needle): string
11723
    {
11724 2
        if ($haystack === '') {
11725 2
            return '';
11726
        }
11727
11728 2
        if ($needle === '') {
11729 2
            return $haystack;
11730
        }
11731
11732 2
        if (self::str_istarts_with($haystack, $needle)) {
11733 2
            $haystack = (string) \mb_substr($haystack, (int) self::strlen($needle));
11734
        }
11735
11736 2
        return $haystack;
11737
    }
11738
11739
    /**
11740
     * Get part of a string process in bytes.
11741
     *
11742
     * @param string   $str    <p>The string being checked.</p>
11743
     * @param int      $offset <p>The first position used in str.</p>
11744
     * @param int|null $length [optional] <p>The maximum length of the returned string.</p>
11745
     *
11746
     * @psalm-pure
11747
     *
11748
     * @return false|string
11749
     *                      The portion of <i>str</i> specified by the <i>offset</i> and
11750
     *                      <i>length</i> parameters.</p><p>If <i>str</i> is shorter than <i>offset</i>
11751
     *                      characters long, <b>FALSE</b> will be returned.
11752
     */
11753 1
    public static function substr_in_byte(string $str, int $offset = 0, int $length = null)
11754
    {
11755
        // empty string
11756 1
        if ($str === '' || $length === 0) {
11757
            return '';
11758
        }
11759
11760
        // whole string
11761 1
        if (!$offset && $length === null) {
11762
            return $str;
11763
        }
11764
11765 1
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
11766
            // "mb_" is available if overload is used, so use it ...
11767
            return \mb_substr($str, $offset, $length, 'CP850'); // 8-BIT
11768
        }
11769
11770 1
        return \substr($str, $offset, $length ?? 2147483647);
11771
    }
11772
11773
    /**
11774
     * Removes a suffix ($needle) from the end of the string ($haystack), case-insensitive.
11775
     *
11776
     * EXAMPLE: <code>
11777
     * UTF8::substr_iright('BeginMiddleΚόσμε', 'Κόσμε'); // 'BeginMiddle'
11778
     * UTF8::substr_iright('BeginMiddleΚόσμε', 'κόσμε'); // 'BeginMiddle'
11779
     * </code>
11780
     *
11781
     * @param string $haystack <p>The string to search in.</p>
11782
     * @param string $needle   <p>The substring to search for.</p>
11783
     *
11784
     * @psalm-pure
11785
     *
11786
     * @return string
11787
     *                <p>Return the sub-string.<p>
11788
     */
11789 2
    public static function substr_iright(string $haystack, string $needle): string
11790
    {
11791 2
        if ($haystack === '') {
11792 2
            return '';
11793
        }
11794
11795 2
        if ($needle === '') {
11796 2
            return $haystack;
11797
        }
11798
11799 2
        if (self::str_iends_with($haystack, $needle)) {
11800 2
            $haystack = (string) \mb_substr($haystack, 0, (int) self::strlen($haystack) - (int) self::strlen($needle));
11801
        }
11802
11803 2
        return $haystack;
11804
    }
11805
11806
    /**
11807
     * Removes a prefix ($needle) from the beginning of the string ($haystack).
11808
     *
11809
     * EXAMPLE: <code>
11810
     * UTF8::substr_left('ΚόσμεMiddleEnd', 'Κόσμε'); // 'MiddleEnd'
11811
     * UTF8::substr_left('ΚόσμεMiddleEnd', 'κόσμε'); // 'ΚόσμεMiddleEnd'
11812
     * </code>
11813
     *
11814
     * @param string $haystack <p>The string to search in.</p>
11815
     * @param string $needle   <p>The substring to search for.</p>
11816
     *
11817
     * @psalm-pure
11818
     *
11819
     * @return string
11820
     *                <p>Return the sub-string.</p>
11821
     */
11822 2
    public static function substr_left(string $haystack, string $needle): string
11823
    {
11824 2
        if ($haystack === '') {
11825 2
            return '';
11826
        }
11827
11828 2
        if ($needle === '') {
11829 2
            return $haystack;
11830
        }
11831
11832 2
        if (self::str_starts_with($haystack, $needle)) {
11833 2
            $haystack = (string) \mb_substr($haystack, (int) self::strlen($needle));
11834
        }
11835
11836 2
        return $haystack;
11837
    }
11838
11839
    /**
11840
     * Replace text within a portion of a string.
11841
     *
11842
     * EXAMPLE: <code>UTF8::substr_replace(array('Iñtërnâtiônàlizætiøn', 'foo'), 'æ', 1); // array('Iæñtërnâtiônàlizætiøn', 'fæoo')</code>
11843
     *
11844
     * source: https://gist.github.com/stemar/8287074
11845
     *
11846
     * @param string|string[] $str         <p>The input string or an array of stings.</p>
11847
     * @param string|string[] $replacement <p>The replacement string or an array of stings.</p>
11848
     * @param int|int[]       $offset      <p>
11849
     *                                     If start is positive, the replacing will begin at the start'th offset
11850
     *                                     into string.
11851
     *                                     <br><br>
11852
     *                                     If start is negative, the replacing will begin at the start'th character
11853
     *                                     from the end of string.
11854
     *                                     </p>
11855
     * @param int|int[]|null  $length      [optional] <p>If given and is positive, it represents the length of the
11856
     *                                     portion of string which is to be replaced. If it is negative, it
11857
     *                                     represents the number of characters from the end of string at which to
11858
     *                                     stop replacing. If it is not given, then it will default to strlen(
11859
     *                                     string ); i.e. end the replacing at the end of string. Of course, if
11860
     *                                     length is zero then this function will have the effect of inserting
11861
     *                                     replacement into string at the given start offset.</p>
11862
     * @param string          $encoding    [optional] <p>Set the charset for e.g. "mb_" function</p>
11863
     *
11864
     * @psalm-pure
11865
     *
11866
     * @return string|string[]
11867
     *                         <p>The result string is returned. If string is an array then array is returned.</p>
11868
     *
11869
     * @template TSubstrReplace
11870
     * @phpstan-param TSubstrReplace $str
11871
     * @phpstan-return TSubstrReplace
11872
     */
11873 10
    public static function substr_replace(
11874
        $str,
11875
        $replacement,
11876
        $offset,
11877
        $length = null,
11878
        string $encoding = 'UTF-8'
11879
    ) {
11880 10
        if (\is_array($str)) {
11881 1
            $num = \count($str);
11882
11883
            // the replacement
11884 1
            if (\is_array($replacement)) {
11885 1
                $replacement = \array_slice($replacement, 0, $num);
11886
            } else {
11887 1
                $replacement = \array_pad([$replacement], $num, $replacement);
11888
            }
11889
11890
            // the offset
11891 1
            if (\is_array($offset)) {
11892 1
                $offset = \array_slice($offset, 0, $num);
11893 1
                foreach ($offset as &$value_tmp) {
11894 1
                    $value_tmp = (int) $value_tmp === $value_tmp ? $value_tmp : 0;
11895
                }
11896 1
                unset($value_tmp);
11897
            } else {
11898 1
                $offset = \array_pad([$offset], $num, $offset);
11899
            }
11900
11901
            // the length
11902 1
            if ($length === null) {
11903 1
                $length = \array_fill(0, $num, 0);
11904 1
            } elseif (\is_array($length)) {
11905 1
                $length = \array_slice($length, 0, $num);
11906 1
                foreach ($length as &$value_tmp_V2) {
11907 1
                    $value_tmp_V2 = (int) $value_tmp_V2 === $value_tmp_V2 ? $value_tmp_V2 : $num;
11908
                }
11909 1
                unset($value_tmp_V2);
11910
            } else {
11911 1
                $length = \array_pad([$length], $num, $length);
11912
            }
11913
11914
            // recursive call
11915
            /** @phpstan-ignore-next-line - phpstan currently can't handle recursive calls */
11916 1
            return \array_map([self::class, 'substr_replace'], $str, $replacement, $offset, $length);
11917
        }
11918
11919 10
        if (\is_array($replacement)) {
11920 1
            if ($replacement !== []) {
11921 1
                $replacement = $replacement[0];
11922
            } else {
11923 1
                $replacement = '';
11924
            }
11925
        }
11926
11927
        // init
11928 10
        $str = (string) $str;
11929 10
        $replacement = (string) $replacement;
11930
11931 10
        if (\is_array($length)) {
11932
            throw new \InvalidArgumentException('Parameter "$length" can only be an array, if "$str" is also an array.');
11933
        }
11934
11935 10
        if (\is_array($offset)) {
11936
            throw new \InvalidArgumentException('Parameter "$offset" can only be an array, if "$str" is also an array.');
11937
        }
11938
11939 10
        if ($str === '') {
11940 1
            return $replacement;
11941
        }
11942
11943 9
        if (self::$SUPPORT['mbstring'] === true) {
11944 9
            $string_length = (int) self::strlen($str, $encoding);
11945
11946 9
            if ($offset < 0) {
11947 1
                $offset = (int) \max(0, $string_length + $offset);
11948 9
            } elseif ($offset > $string_length) {
11949 1
                $offset = $string_length;
11950
            }
11951
11952 9
            if ($length !== null && $length < 0) {
11953 1
                $length = (int) \max(0, $string_length - $offset + $length);
11954 9
            } elseif ($length === null || $length > $string_length) {
11955 4
                $length = $string_length;
11956
            }
11957
11958 9
            if (($offset + $length) > $string_length) {
11959 4
                $length = $string_length - $offset;
11960
            }
11961
11962 9
            return ((string) \mb_substr($str, 0, $offset, $encoding)) .
11963 9
                   $replacement .
11964 9
                   ((string) \mb_substr($str, $offset + $length, $string_length - $offset - $length, $encoding));
11965
        }
11966
11967
        //
11968
        // fallback for ascii only
11969
        //
11970
11971
        if (ASCII::is_ascii($str)) {
11972
            return ($length === null) ?
11973
                \substr_replace($str, $replacement, $offset) :
11974
                \substr_replace($str, $replacement, $offset, $length);
11975
        }
11976
11977
        //
11978
        // fallback via vanilla php
11979
        //
11980
11981
        \preg_match_all('/./us', $str, $str_matches);
11982
        \preg_match_all('/./us', $replacement, $replacement_matches);
11983
11984
        if ($length === null) {
11985
            $length_tmp = self::strlen($str, $encoding);
11986
            if ($length_tmp === false) {
11987
                // e.g.: non mbstring support + invalid chars
11988
                return '';
11989
            }
11990
            $length = $length_tmp;
11991
        }
11992
11993
        \array_splice($str_matches[0], $offset, $length, $replacement_matches[0]);
11994
11995
        return \implode('', $str_matches[0]);
11996
    }
11997
11998
    /**
11999
     * Removes a suffix ($needle) from the end of the string ($haystack).
12000
     *
12001
     * EXAMPLE: <code>
12002
     * UTF8::substr_right('BeginMiddleΚόσμε', 'Κόσμε'); // 'BeginMiddle'
12003
     * UTF8::substr_right('BeginMiddleΚόσμε', 'κόσμε'); // 'BeginMiddleΚόσμε'
12004
     * </code>
12005
     *
12006
     * @param string $haystack <p>The string to search in.</p>
12007
     * @param string $needle   <p>The substring to search for.</p>
12008
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
12009
     *
12010
     * @psalm-pure
12011
     *
12012
     * @return string
12013
     *                <p>Return the sub-string.</p>
12014
     */
12015 2
    public static function substr_right(
12016
        string $haystack,
12017
        string $needle,
12018
        string $encoding = 'UTF-8'
12019
    ): string {
12020 2
        if ($haystack === '') {
12021 2
            return '';
12022
        }
12023
12024 2
        if ($needle === '') {
12025 2
            return $haystack;
12026
        }
12027
12028
        if (
12029 2
            $encoding === 'UTF-8'
12030
            &&
12031 2
            \substr($haystack, -\strlen($needle)) === $needle
12032
        ) {
12033 2
            return (string) \mb_substr($haystack, 0, (int) \mb_strlen($haystack) - (int) \mb_strlen($needle));
12034
        }
12035
12036 2
        if (\substr($haystack, -\strlen($needle)) === $needle) {
12037
            return (string) self::substr(
12038
                $haystack,
12039
                0,
12040
                (int) self::strlen($haystack, $encoding) - (int) self::strlen($needle, $encoding),
12041
                $encoding
12042
            );
12043
        }
12044
12045 2
        return $haystack;
12046
    }
12047
12048
    /**
12049
     * Returns a case swapped version of the string.
12050
     *
12051
     * EXAMPLE: <code>UTF8::swapCase('déJÀ σσς iıII'); // 'DÉjà ΣΣΣ IIii'</code>
12052
     *
12053
     * @param string $str        <p>The input string.</p>
12054
     * @param string $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
12055
     * @param bool   $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
12056
     *
12057
     * @psalm-pure
12058
     *
12059
     * @return string
12060
     *                <p>Each character's case swapped.</p>
12061
     */
12062 6
    public static function swapCase(string $str, string $encoding = 'UTF-8', bool $clean_utf8 = false): string
12063
    {
12064 6
        if ($str === '') {
12065 1
            return '';
12066
        }
12067
12068 6
        if ($clean_utf8) {
12069
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
12070
            // if invalid characters are found in $haystack before $needle
12071 2
            $str = self::clean($str);
12072
        }
12073
12074 6
        if ($encoding === 'UTF-8') {
12075 4
            return (string) (\mb_strtolower($str) ^ \mb_strtoupper($str) ^ $str);
12076
        }
12077
12078 4
        return (string) (self::strtolower($str, $encoding) ^ self::strtoupper($str, $encoding) ^ $str);
12079
    }
12080
12081
    /**
12082
     * Checks whether symfony-polyfills are used.
12083
     *
12084
     * @psalm-pure
12085
     *
12086
     * @return bool
12087
     *              <p><strong>true</strong> if in use, <strong>false</strong> otherwise</p>
12088
     *
12089
     * @internal <p>Please do not use it anymore, we will make is private in next major version.</p>
12090
     */
12091
    public static function symfony_polyfill_used(): bool
12092
    {
12093
        // init
12094
        $return = false;
12095
12096
        $return_tmp = \extension_loaded('mbstring');
12097
        if (!$return_tmp && \function_exists('mb_strlen')) {
12098
            $return = true;
12099
        }
12100
12101
        $return_tmp = \extension_loaded('iconv');
12102
        if (!$return_tmp && \function_exists('iconv')) {
12103
            $return = true;
12104
        }
12105
12106
        return $return;
12107
    }
12108
12109
    /**
12110
     * @param string $str
12111
     * @param int    $tab_length
12112
     *
12113
     * @psalm-pure
12114
     *
12115
     * @return string
12116
     */
12117 6
    public static function tabs_to_spaces(string $str, int $tab_length = 4): string
12118
    {
12119 6
        if ($tab_length === 4) {
12120 3
            $spaces = '    ';
12121 3
        } elseif ($tab_length === 2) {
12122 1
            $spaces = '  ';
12123
        } else {
12124 2
            $spaces = \str_repeat(' ', $tab_length);
12125
        }
12126
12127 6
        return \str_replace("\t", $spaces, $str);
12128
    }
12129
12130
    /**
12131
     * Converts the first character of each word in the string to uppercase
12132
     * and all other chars to lowercase.
12133
     *
12134
     * @param string      $str                           <p>The input string.</p>
12135
     * @param string      $encoding                      [optional] <p>Set the charset for e.g. "mb_" function</p>
12136
     * @param bool        $clean_utf8                    [optional] <p>Remove non UTF-8 chars from the string.</p>
12137
     * @param string|null $lang                          [optional] <p>Set the language for special cases: az, el, lt,
12138
     *                                                   tr</p>
12139
     * @param bool        $try_to_keep_the_string_length [optional] <p>true === try to keep the string length: e.g. ẞ
12140
     *                                                   -> ß</p>
12141
     *
12142
     * @psalm-pure
12143
     *
12144
     * @return string
12145
     *                <p>A string with all characters of $str being title-cased.</p>
12146
     */
12147 5
    public static function titlecase(
12148
        string $str,
12149
        string $encoding = 'UTF-8',
12150
        bool $clean_utf8 = false,
12151
        string $lang = null,
12152
        bool $try_to_keep_the_string_length = false
12153
    ): string {
12154 5
        if ($clean_utf8) {
12155
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
12156
            // if invalid characters are found in $haystack before $needle
12157
            $str = self::clean($str);
12158
        }
12159
12160
        if (
12161 5
            $lang === null
12162
            &&
12163 5
            !$try_to_keep_the_string_length
12164
        ) {
12165 5
            if ($encoding === 'UTF-8') {
12166 3
                return \mb_convert_case($str, \MB_CASE_TITLE);
12167
            }
12168
12169 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
12170
12171 2
            return \mb_convert_case($str, \MB_CASE_TITLE, $encoding);
12172
        }
12173
12174
        return self::str_titleize(
12175
            $str,
12176
            null,
12177
            $encoding,
12178
            false,
12179
            $lang,
12180
            $try_to_keep_the_string_length,
12181
            false
12182
        );
12183
    }
12184
12185
    /**
12186
     * Convert a string into ASCII.
12187
     *
12188
     * EXAMPLE: <code>UTF8::to_ascii('déjà σσς iıii'); // 'deja sss iiii'</code>
12189
     *
12190
     * @param string $str     <p>The input string.</p>
12191
     * @param string $unknown [optional] <p>Character use if character unknown. (default is ?)</p>
12192
     * @param bool   $strict  [optional] <p>Use "transliterator_transliterate()" from PHP-Intl | WARNING: bad
12193
     *                        performance</p>
12194
     *
12195
     * @psalm-pure
12196
     *
12197
     * @return string
12198
     */
12199 37
    public static function to_ascii(
12200
        string $str,
12201
        string $unknown = '?',
12202
        bool $strict = false
12203
    ): string {
12204 37
        return ASCII::to_transliterate($str, $unknown, $strict);
12205
    }
12206
12207
    /**
12208
     * @param bool|float|int|string $str
12209
     *
12210
     * @psalm-pure
12211
     *
12212
     * @return bool
12213
     */
12214 25
    public static function to_boolean($str): bool
12215
    {
12216
        // init
12217 25
        $str = (string) $str;
12218
12219 25
        if ($str === '') {
12220 2
            return false;
12221
        }
12222
12223
        // Info: http://php.net/manual/en/filter.filters.validate.php
12224
        $map = [
12225 23
            'true'  => true,
12226
            '1'     => true,
12227
            'on'    => true,
12228
            'yes'   => true,
12229
            'false' => false,
12230
            '0'     => false,
12231
            'off'   => false,
12232
            'no'    => false,
12233
        ];
12234
12235 23
        if (isset($map[$str])) {
12236 13
            return $map[$str];
12237
        }
12238
12239 10
        $key = \strtolower($str);
12240 10
        if (isset($map[$key])) {
12241 2
            return $map[$key];
12242
        }
12243
12244 8
        if (\is_numeric($str)) {
12245 6
            return ((float) $str) > 0;
12246
        }
12247
12248 2
        return (bool) \trim($str);
12249
    }
12250
12251
    /**
12252
     * Convert given string to safe filename (and keep string case).
12253
     *
12254
     * @param string $str
12255
     * @param bool   $use_transliterate No transliteration, conversion etc. is done by default - unsafe characters are
12256
     *                                  simply replaced with hyphen.
12257
     * @param string $fallback_char
12258
     *
12259
     * @psalm-pure
12260
     *
12261
     * @return string
12262
     */
12263 1
    public static function to_filename(
12264
        string $str,
12265
        bool $use_transliterate = false,
12266
        string $fallback_char = '-'
12267
    ): string {
12268 1
        return ASCII::to_filename(
12269 1
            $str,
12270
            $use_transliterate,
12271
            $fallback_char
12272
        );
12273
    }
12274
12275
    /**
12276
     * Convert a string into "ISO-8859"-encoding (Latin-1).
12277
     *
12278
     * EXAMPLE: <code>UTF8::to_utf8(UTF8::to_iso8859('  -ABC-中文空白-  ')); // '  -ABC-????-  '</code>
12279
     *
12280
     * @param string|string[] $str
12281
     *
12282
     * @psalm-pure
12283
     *
12284
     * @return string|string[]
12285
     *
12286
     * @template TToIso8859
12287
     * @phpstan-param TToIso8859 $str
12288
     * @phpstan-return TToIso8859
12289
     */
12290 8
    public static function to_iso8859($str)
12291
    {
12292 8
        if (\is_array($str)) {
12293 2
            foreach ($str as &$v) {
12294 2
                $v = self::to_iso8859($v);
12295
            }
12296
12297 2
            return $str;
12298
        }
12299
12300 8
        $str = (string) $str;
12301 8
        if ($str === '') {
12302 2
            return '';
12303
        }
12304
12305 8
        return self::utf8_decode($str);
12306
    }
12307
12308
    /**
12309
     * This function leaves UTF-8 characters alone, while converting almost all non-UTF8 to UTF8.
12310
     *
12311
     * <ul>
12312
     * <li>It decode UTF-8 codepoints and Unicode escape sequences.</li>
12313
     * <li>It assumes that the encoding of the original string is either WINDOWS-1252 or ISO-8859.</li>
12314
     * <li>WARNING: It does not remove invalid UTF-8 characters, so you maybe need to use "UTF8::clean()" for this
12315
     * case.</li>
12316
     * </ul>
12317
     *
12318
     * EXAMPLE: <code>UTF8::to_utf8(["\u0063\u0061\u0074"]); // array('cat')</code>
12319
     *
12320
     * @param string|string[] $str                        <p>Any string or array of strings.</p>
12321
     * @param bool            $decode_html_entity_to_utf8 <p>Set to true, if you need to decode html-entities.</p>
12322
     *
12323
     * @psalm-pure
12324
     *
12325
     * @return string|string[]
12326
     *                         <p>The UTF-8 encoded string</p>
12327
     *
12328
     * @template TToUtf8
12329
     * @phpstan-param TToUtf8 $str
12330
     * @phpstan-return TToUtf8
12331
     */
12332 42
    public static function to_utf8($str, bool $decode_html_entity_to_utf8 = false)
12333
    {
12334 42
        if (\is_array($str)) {
12335 4
            foreach ($str as &$v) {
12336 4
                $v = self::to_utf8_string($v, $decode_html_entity_to_utf8);
12337
            }
12338
12339
            /** @phpstan-var TToUtf8 $str */
12340 4
            return $str;
12341
        }
12342
12343
        /** @phpstan-var TToUtf8 $str */
12344 42
        $str = self::to_utf8_string($str, $decode_html_entity_to_utf8);
12345
12346 42
        return $str;
12347
    }
12348
12349
    /**
12350
     * This function leaves UTF-8 characters alone, while converting almost all non-UTF8 to UTF8.
12351
     *
12352
     * <ul>
12353
     * <li>It decode UTF-8 codepoints and Unicode escape sequences.</li>
12354
     * <li>It assumes that the encoding of the original string is either WINDOWS-1252 or ISO-8859.</li>
12355
     * <li>WARNING: It does not remove invalid UTF-8 characters, so you maybe need to use "UTF8::clean()" for this
12356
     * case.</li>
12357
     * </ul>
12358
     *
12359
     * EXAMPLE: <code>UTF8::to_utf8_string("\u0063\u0061\u0074"); // 'cat'</code>
12360
     *
12361
     * @param string $str                        <p>Any string.</p>
12362
     * @param bool   $decode_html_entity_to_utf8 <p>Set to true, if you need to decode html-entities.</p>
12363
     *
12364
     * @psalm-pure
12365
     *
12366
     * @return string
12367
     *                <p>The UTF-8 encoded string</p>
12368
     */
12369 42
    public static function to_utf8_string(string $str, bool $decode_html_entity_to_utf8 = false): string
12370
    {
12371 42
        if ($str === '') {
12372 7
            return $str;
12373
        }
12374
12375 42
        $max = \strlen($str);
12376 42
        $buf = '';
12377
12378 42
        for ($i = 0; $i < $max; ++$i) {
12379 42
            $c1 = $str[$i];
12380
12381 42
            if ($c1 >= "\xC0") { // should be converted to UTF8, if it's not UTF8 already
12382
12383 38
                if ($c1 <= "\xDF") { // looks like 2 bytes UTF8
12384
12385 35
                    $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
12386
12387 35
                    if ($c2 >= "\x80" && $c2 <= "\xBF") { // yeah, almost sure it's UTF8 already
12388 21
                        $buf .= $c1 . $c2;
12389 21
                        ++$i;
12390
                    } else { // not valid UTF8 - convert it
12391 35
                        $buf .= self::to_utf8_convert_helper($c1);
12392
                    }
12393 35
                } elseif ($c1 >= "\xE0" && $c1 <= "\xEF") { // looks like 3 bytes UTF8
12394
12395 34
                    $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
12396 34
                    $c3 = $i + 2 >= $max ? "\x00" : $str[$i + 2];
12397
12398 34
                    if ($c2 >= "\x80" && $c2 <= "\xBF" && $c3 >= "\x80" && $c3 <= "\xBF") { // yeah, almost sure it's UTF8 already
12399 17
                        $buf .= $c1 . $c2 . $c3;
12400 17
                        $i += 2;
12401
                    } else { // not valid UTF8 - convert it
12402 34
                        $buf .= self::to_utf8_convert_helper($c1);
12403
                    }
12404 28
                } elseif ($c1 >= "\xF0" && $c1 <= "\xF7") { // looks like 4 bytes UTF8
12405
12406 28
                    $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
12407 28
                    $c3 = $i + 2 >= $max ? "\x00" : $str[$i + 2];
12408 28
                    $c4 = $i + 3 >= $max ? "\x00" : $str[$i + 3];
12409
12410 28
                    if ($c2 >= "\x80" && $c2 <= "\xBF" && $c3 >= "\x80" && $c3 <= "\xBF" && $c4 >= "\x80" && $c4 <= "\xBF") { // yeah, almost sure it's UTF8 already
12411 10
                        $buf .= $c1 . $c2 . $c3 . $c4;
12412 10
                        $i += 3;
12413
                    } else { // not valid UTF8 - convert it
12414 28
                        $buf .= self::to_utf8_convert_helper($c1);
12415
                    }
12416
                } else { // doesn't look like UTF8, but should be converted
12417
12418 38
                    $buf .= self::to_utf8_convert_helper($c1);
12419
                }
12420 40
            } elseif (($c1 & "\xC0") === "\x80") { // needs conversion
12421
12422 4
                $buf .= self::to_utf8_convert_helper($c1);
12423
            } else { // it doesn't need conversion
12424
12425 40
                $buf .= $c1;
12426
            }
12427
        }
12428
12429
        // decode unicode escape sequences + unicode surrogate pairs
12430 42
        $buf = \preg_replace_callback(
12431 42
            '/\\\\u([dD][89abAB][0-9a-fA-F]{2})\\\\u([dD][cdefCDEF][\da-fA-F]{2})|\\\\u([0-9a-fA-F]{4})/',
12432
            /**
12433
             * @param array $matches
12434
             *
12435
             * @psalm-pure
12436
             *
12437
             * @return string
12438
             */
12439 42
            static function (array $matches): string {
12440 13
                if (isset($matches[3])) {
12441 13
                    $cp = (int) \hexdec($matches[3]);
12442
                } else {
12443
                    // http://unicode.org/faq/utf_bom.html#utf16-4
12444 1
                    $cp = ((int) \hexdec($matches[1]) << 10)
12445 1
                          + (int) \hexdec($matches[2])
12446 1
                          + 0x10000
12447 1
                          - (0xD800 << 10)
12448 1
                          - 0xDC00;
12449
                }
12450
12451
                // https://github.com/php/php-src/blob/php-7.3.2/ext/standard/html.c#L471
12452
                //
12453
                // php_utf32_utf8(unsigned char *buf, unsigned k)
12454
12455 13
                if ($cp < 0x80) {
12456 8
                    return (string) self::chr($cp);
12457
                }
12458
12459 10
                if ($cp < 0xA0) {
12460
                    /** @noinspection UnnecessaryCastingInspection */
12461
                    return (string) self::chr(0xC0 | $cp >> 6) . (string) self::chr(0x80 | $cp & 0x3F);
12462
                }
12463
12464 10
                return self::decimal_to_chr($cp);
12465 42
            },
12466 42
            $buf
12467
        );
12468
12469 42
        if ($buf === null) {
12470
            return '';
12471
        }
12472
12473
        // decode UTF-8 codepoints
12474 42
        if ($decode_html_entity_to_utf8) {
12475 3
            $buf = self::html_entity_decode($buf);
12476
        }
12477
12478 42
        return $buf;
12479
    }
12480
12481
    /**
12482
     * Returns the given string as an integer, or null if the string isn't numeric.
12483
     *
12484
     * @param string $str
12485
     *
12486
     * @psalm-pure
12487
     *
12488
     * @return int|null
12489
     *                  <p>null if the string isn't numeric</p>
12490
     */
12491 1
    public static function to_int(string $str)
12492
    {
12493 1
        if (\is_numeric($str)) {
12494 1
            return (int) $str;
12495
        }
12496
12497 1
        return null;
12498
    }
12499
12500
    /**
12501
     * Returns the given input as string, or null if the input isn't int|float|string
12502
     * and do not implement the "__toString()" method.
12503
     *
12504
     * @param float|int|object|string|null $input
12505
     *
12506
     * @psalm-pure
12507
     *
12508
     * @return string|null
12509
     *                     <p>null if the input isn't int|float|string and has no "__toString()" method</p>
12510
     */
12511 1
    public static function to_string($input)
12512
    {
12513 1
        if ($input === null) {
12514
            return null;
12515
        }
12516
12517
        /** @var string $input_type - hack for psalm */
12518 1
        $input_type = \gettype($input);
12519
12520
        if (
12521 1
            $input_type === 'string'
12522
            ||
12523 1
            $input_type === 'integer'
12524
            ||
12525 1
            $input_type === 'float'
12526
            ||
12527 1
            $input_type === 'double'
12528
        ) {
12529 1
            return (string) $input;
12530
        }
12531
12532
        /** @phpstan-ignore-next-line - "gettype": FP? */
12533 1
        if ($input_type === 'object' && \method_exists($input, '__toString')) {
12534 1
            return (string) $input;
12535
        }
12536
12537 1
        return null;
12538
    }
12539
12540
    /**
12541
     * Strip whitespace or other characters from the beginning and end of a UTF-8 string.
12542
     *
12543
     * INFO: This is slower then "trim()"
12544
     *
12545
     * We can only use the original-function, if we use <= 7-Bit in the string / chars
12546
     * but the check for ASCII (7-Bit) cost more time, then we can safe here.
12547
     *
12548
     * EXAMPLE: <code>UTF8::trim('   -ABC-中文空白-  '); // '-ABC-中文空白-'</code>
12549
     *
12550
     * @param string      $str   <p>The string to be trimmed</p>
12551
     * @param string|null $chars [optional] <p>Optional characters to be stripped</p>
12552
     *
12553
     * @psalm-pure
12554
     *
12555
     * @return string
12556
     *                <p>The trimmed string.</p>
12557
     */
12558 57
    public static function trim(string $str = '', string $chars = null): string
12559
    {
12560 57
        if ($str === '') {
12561 9
            return '';
12562
        }
12563
12564 50
        if (self::$SUPPORT['mbstring'] === true) {
12565 50
            if ($chars !== null) {
12566
                /** @noinspection PregQuoteUsageInspection */
12567 28
                $chars = \preg_quote($chars);
12568 28
                $pattern = "^[${chars}]+|[${chars}]+\$";
12569
            } else {
12570 22
                $pattern = '^[\\s]+|[\\s]+$';
12571
            }
12572
12573 50
            return (string) \mb_ereg_replace($pattern, '', $str);
12574
        }
12575
12576 8
        if ($chars !== null) {
12577
            $chars = \preg_quote($chars, '/');
12578
            $pattern = "^[${chars}]+|[${chars}]+\$";
12579
        } else {
12580 8
            $pattern = '^[\\s]+|[\\s]+$';
12581
        }
12582
12583 8
        return self::regex_replace($str, $pattern, '');
12584
    }
12585
12586
    /**
12587
     * Makes string's first char uppercase.
12588
     *
12589
     * EXAMPLE: <code>UTF8::ucfirst('ñtërnâtiônàlizætiøn foo'); // 'Ñtërnâtiônàlizætiøn foo'</code>
12590
     *
12591
     * @param string      $str                           <p>The input string.</p>
12592
     * @param string      $encoding                      [optional] <p>Set the charset for e.g. "mb_" function</p>
12593
     * @param bool        $clean_utf8                    [optional] <p>Remove non UTF-8 chars from the string.</p>
12594
     * @param string|null $lang                          [optional] <p>Set the language for special cases: az, el, lt,
12595
     *                                                   tr</p>
12596
     * @param bool        $try_to_keep_the_string_length [optional] <p>true === try to keep the string length: e.g. ẞ
12597
     *                                                   -> ß</p>
12598
     *
12599
     * @psalm-pure
12600
     *
12601
     * @return string
12602
     *                <p>The resulting string with with char uppercase.</p>
12603
     */
12604 69
    public static function ucfirst(
12605
        string $str,
12606
        string $encoding = 'UTF-8',
12607
        bool $clean_utf8 = false,
12608
        string $lang = null,
12609
        bool $try_to_keep_the_string_length = false
12610
    ): string {
12611 69
        if ($str === '') {
12612 3
            return '';
12613
        }
12614
12615 68
        if ($clean_utf8) {
12616
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
12617
            // if invalid characters are found in $haystack before $needle
12618 1
            $str = self::clean($str);
12619
        }
12620
12621 68
        $use_mb_functions = $lang === null && !$try_to_keep_the_string_length;
12622
12623 68
        if ($encoding === 'UTF-8') {
12624 22
            $str_part_two = (string) \mb_substr($str, 1);
12625
12626 22
            if ($use_mb_functions) {
12627 22
                $str_part_one = \mb_strtoupper(
12628 22
                    (string) \mb_substr($str, 0, 1)
12629
                );
12630
            } else {
12631
                $str_part_one = self::strtoupper(
12632
                    (string) \mb_substr($str, 0, 1),
12633
                    $encoding,
12634
                    false,
12635
                    $lang,
12636 22
                    $try_to_keep_the_string_length
12637
                );
12638
            }
12639
        } else {
12640 47
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
12641
12642 47
            $str_part_two = (string) self::substr($str, 1, null, $encoding);
12643
12644 47
            if ($use_mb_functions) {
12645 47
                $str_part_one = \mb_strtoupper(
12646 47
                    (string) \mb_substr($str, 0, 1, $encoding),
12647 47
                    $encoding
12648
                );
12649
            } else {
12650
                $str_part_one = self::strtoupper(
12651
                    (string) self::substr($str, 0, 1, $encoding),
12652
                    $encoding,
12653
                    false,
12654
                    $lang,
12655
                    $try_to_keep_the_string_length
12656
                );
12657
            }
12658
        }
12659
12660 68
        return $str_part_one . $str_part_two;
12661
    }
12662
12663
    /**
12664
     * Uppercase for all words in the string.
12665
     *
12666
     * EXAMPLE: <code>UTF8::ucwords('iñt ërn âTi ônà liz æti øn'); // 'Iñt Ërn ÂTi Ônà Liz Æti Øn'</code>
12667
     *
12668
     * @param string   $str        <p>The input string.</p>
12669
     * @param string[] $exceptions [optional] <p>Exclusion for some words.</p>
12670
     * @param string   $char_list  [optional] <p>Additional chars that contains to words and do not start a new
12671
     *                             word.</p>
12672
     * @param string   $encoding   [optional] <p>Set the charset.</p>
12673
     * @param bool     $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
12674
     *
12675
     * @psalm-pure
12676
     *
12677
     * @return string
12678
     */
12679 9
    public static function ucwords(
12680
        string $str,
12681
        array $exceptions = [],
12682
        string $char_list = '',
12683
        string $encoding = 'UTF-8',
12684
        bool $clean_utf8 = false
12685
    ): string {
12686 9
        if (!$str) {
12687 2
            return '';
12688
        }
12689
12690
        // INFO: mb_convert_case($str, MB_CASE_TITLE);
12691
        // -> MB_CASE_TITLE didn't only uppercase the first letter, it also lowercase all other letters
12692
12693 8
        if ($clean_utf8) {
12694
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
12695
            // if invalid characters are found in $haystack before $needle
12696 1
            $str = self::clean($str);
12697
        }
12698
12699 8
        $use_php_default_functions = !(bool) ($char_list . \implode('', $exceptions));
12700
12701
        if (
12702 8
            $use_php_default_functions
12703
            &&
12704 8
            ASCII::is_ascii($str)
12705
        ) {
12706
            return \ucwords($str);
12707
        }
12708
12709 8
        $words = self::str_to_words($str, $char_list);
12710 8
        $use_exceptions = $exceptions !== [];
12711
12712 8
        $words_str = '';
12713 8
        foreach ($words as &$word) {
12714 8
            if (!$word) {
12715 8
                continue;
12716
            }
12717
12718
            if (
12719 8
                !$use_exceptions
12720
                ||
12721 8
                !\in_array($word, $exceptions, true)
12722
            ) {
12723 8
                $words_str .= self::ucfirst($word, $encoding);
12724
            } else {
12725 1
                $words_str .= $word;
12726
            }
12727
        }
12728
12729 8
        return $words_str;
12730
    }
12731
12732
    /**
12733
     * Multi decode HTML entity + fix urlencoded-win1252-chars.
12734
     *
12735
     * EXAMPLE: <code>UTF8::urldecode('tes%20öäü%20\u00edtest+test'); // 'tes öäü ítest test'</code>
12736
     *
12737
     * e.g:
12738
     * 'test+test'                     => 'test test'
12739
     * 'D&#252;sseldorf'               => 'Düsseldorf'
12740
     * 'D%FCsseldorf'                  => 'Düsseldorf'
12741
     * 'D&#xFC;sseldorf'               => 'Düsseldorf'
12742
     * 'D%26%23xFC%3Bsseldorf'         => 'Düsseldorf'
12743
     * 'Düsseldorf'                   => 'Düsseldorf'
12744
     * 'D%C3%BCsseldorf'               => 'Düsseldorf'
12745
     * 'D%C3%83%C2%BCsseldorf'         => 'Düsseldorf'
12746
     * 'D%25C3%2583%25C2%25BCsseldorf' => 'Düsseldorf'
12747
     *
12748
     * @param string $str          <p>The input string.</p>
12749
     * @param bool   $multi_decode <p>Decode as often as possible.</p>
12750
     *
12751
     * @psalm-pure
12752
     *
12753
     * @return string
12754
     */
12755 4
    public static function urldecode(string $str, bool $multi_decode = true): string
12756
    {
12757 4
        if ($str === '') {
12758 3
            return '';
12759
        }
12760
12761 4
        $str = self::urldecode_unicode_helper($str);
12762
12763 4
        if ($multi_decode) {
12764
            do {
12765 3
                $str_compare = $str;
12766
12767
                /**
12768
                 * @psalm-suppress PossiblyInvalidArgument
12769
                 */
12770 3
                $str = \urldecode(
12771 3
                    self::html_entity_decode(
12772 3
                        self::to_utf8($str),
12773 3
                        \ENT_QUOTES | \ENT_HTML5
12774
                    )
12775
                );
12776 3
            } while ($str_compare !== $str);
12777
        } else {
12778
            /**
12779
             * @psalm-suppress PossiblyInvalidArgument
12780
             */
12781 1
            $str = \urldecode(
12782 1
                self::html_entity_decode(
12783 1
                    self::to_utf8($str),
12784 1
                    \ENT_QUOTES | \ENT_HTML5
12785
                )
12786
            );
12787
        }
12788
12789 4
        return self::fix_simple_utf8($str);
12790
    }
12791
12792
    /**
12793
     * Decodes a UTF-8 string to ISO-8859-1.
12794
     *
12795
     * EXAMPLE: <code>UTF8::encode('UTF-8', UTF8::utf8_decode('-ABC-中文空白-')); // '-ABC-????-'</code>
12796
     *
12797
     * @param string $str             <p>The input string.</p>
12798
     * @param bool   $keep_utf8_chars
12799
     *
12800
     * @psalm-pure
12801
     *
12802
     * @return string
12803
     */
12804 14
    public static function utf8_decode(string $str, bool $keep_utf8_chars = false): string
12805
    {
12806 14
        if ($str === '') {
12807 6
            return '';
12808
        }
12809
12810
        // save for later comparision
12811 14
        $str_backup = $str;
12812 14
        $len = \strlen($str);
12813
12814 14
        if (self::$ORD === null) {
12815
            self::$ORD = self::getData('ord');
12816
        }
12817
12818 14
        if (self::$CHR === null) {
12819
            self::$CHR = self::getData('chr');
12820
        }
12821
12822 14
        $no_char_found = '?';
12823 14
        for ($i = 0, $j = 0; $i < $len; ++$i, ++$j) {
12824 14
            switch ($str[$i] & "\xF0") {
12825 14
                case "\xC0":
12826 13
                case "\xD0":
12827 13
                    $c = (self::$ORD[$str[$i] & "\x1F"] << 6) | self::$ORD[$str[++$i] & "\x3F"];
12828 13
                    $str[$j] = $c < 256 ? self::$CHR[$c] : $no_char_found;
12829
12830 13
                    break;
12831
12832 13
                case "\xF0":
12833
                    ++$i;
12834
12835
                // no break
12836
12837 13
                case "\xE0":
12838 11
                    $str[$j] = $no_char_found;
12839 11
                    $i += 2;
12840
12841 11
                    break;
12842
12843
                default:
12844 12
                    $str[$j] = $str[$i];
12845
            }
12846
        }
12847
12848
        /** @var false|string $return - needed for PhpStan (stubs error) */
12849 14
        $return = \substr($str, 0, $j);
12850 14
        if ($return === false) {
12851
            $return = '';
12852
        }
12853
12854
        if (
12855 14
            $keep_utf8_chars
12856
            &&
12857 14
            (int) self::strlen($return) >= (int) self::strlen($str_backup)
12858
        ) {
12859 2
            return $str_backup;
12860
        }
12861
12862 14
        return $return;
12863
    }
12864
12865
    /**
12866
     * Encodes an ISO-8859-1 string to UTF-8.
12867
     *
12868
     * EXAMPLE: <code>UTF8::utf8_decode(UTF8::utf8_encode('-ABC-中文空白-')); // '-ABC-中文空白-'</code>
12869
     *
12870
     * @param string $str <p>The input string.</p>
12871
     *
12872
     * @psalm-pure
12873
     *
12874
     * @return string
12875
     */
12876 16
    public static function utf8_encode(string $str): string
12877
    {
12878 16
        if ($str === '') {
12879 14
            return '';
12880
        }
12881
12882
        /** @var false|string $str - the polyfill maybe return false */
12883 16
        $str = \utf8_encode($str);
0 ignored issues
show
Bug introduced by
It seems like $str can also be of type false; however, parameter $string of utf8_encode() does only seem to accept string, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

12883
        $str = \utf8_encode(/** @scrutinizer ignore-type */ $str);
Loading history...
12884
12885 16
        if ($str === false) {
12886
            return '';
12887
        }
12888
12889 16
        return $str;
12890
    }
12891
12892
    /**
12893
     * Returns an array with all utf8 whitespace characters.
12894
     *
12895
     * @see http://www.bogofilter.org/pipermail/bogofilter/2003-March/001889.html
12896
     *
12897
     * @psalm-pure
12898
     *
12899
     * @return string[]
12900
     *                  An array with all known whitespace characters as values and the type of whitespace as keys
12901
     *                  as defined in above URL
12902
     */
12903 2
    public static function whitespace_table(): array
12904
    {
12905 2
        return self::$WHITESPACE_TABLE;
12906
    }
12907
12908
    /**
12909
     * Limit the number of words in a string.
12910
     *
12911
     * EXAMPLE: <code>UTF8::words_limit('fòô bàř fòô', 2, ''); // 'fòô bàř'</code>
12912
     *
12913
     * @param string $str        <p>The input string.</p>
12914
     * @param int    $limit      <p>The limit of words as integer.</p>
12915
     * @param string $str_add_on <p>Replacement for the striped string.</p>
12916
     *
12917
     * @psalm-pure
12918
     *
12919
     * @return string
12920
     */
12921 2
    public static function words_limit(
12922
        string $str,
12923
        int $limit = 100,
12924
        string $str_add_on = '…'
12925
    ): string {
12926 2
        if ($str === '' || $limit < 1) {
12927 2
            return '';
12928
        }
12929
12930 2
        \preg_match('/^\\s*+(?:[^\\s]++\\s*+){1,' . $limit . '}/u', $str, $matches);
12931
12932
        if (
12933 2
            !isset($matches[0])
12934
            ||
12935 2
            \mb_strlen($str) === (int) \mb_strlen($matches[0])
12936
        ) {
12937 2
            return $str;
12938
        }
12939
12940 2
        return \rtrim($matches[0]) . $str_add_on;
12941
    }
12942
12943
    /**
12944
     * Wraps a string to a given number of characters
12945
     *
12946
     * EXAMPLE: <code>UTF8::wordwrap('Iñtërnâtiônàlizætiøn', 2, '<br>', true)); // 'Iñ<br>të<br>rn<br>ât<br>iô<br>nà<br>li<br>zæ<br>ti<br>øn'</code>
12947
     *
12948
     * @see http://php.net/manual/en/function.wordwrap.php
12949
     *
12950
     * @param string $str   <p>The input string.</p>
12951
     * @param int    $width [optional] <p>The column width.</p>
12952
     * @param string $break [optional] <p>The line is broken using the optional break parameter.</p>
12953
     * @param bool   $cut   [optional] <p>
12954
     *                      If the cut is set to true, the string is
12955
     *                      always wrapped at or before the specified width. So if you have
12956
     *                      a word that is larger than the given width, it is broken apart.
12957
     *                      </p>
12958
     *
12959
     * @psalm-pure
12960
     *
12961
     * @return string
12962
     *                <p>The given string wrapped at the specified column.</p>
12963
     */
12964 12
    public static function wordwrap(
12965
        string $str,
12966
        int $width = 75,
12967
        string $break = "\n",
12968
        bool $cut = false
12969
    ): string {
12970 12
        if ($str === '' || $break === '') {
12971 4
            return '';
12972
        }
12973
12974 10
        $str_split = \explode($break, $str);
12975
12976
        /** @var string[] $charsArray */
12977 10
        $charsArray = [];
12978 10
        $word_split = '';
12979 10
        foreach ($str_split as $i => $i_value) {
12980 10
            if ($i) {
12981 3
                $charsArray[] = $break;
12982 3
                $word_split .= '#';
12983
            }
12984
12985 10
            foreach (self::str_split($i_value) as $c) {
12986 10
                $charsArray[] = $c;
12987 10
                if ($c === ' ') {
12988 3
                    $word_split .= ' ';
12989
                } else {
12990 10
                    $word_split .= '?';
12991
                }
12992
            }
12993
        }
12994
12995 10
        $str_return = '';
12996 10
        $j = 0;
12997 10
        $b = -1;
12998 10
        $i = -1;
12999 10
        $word_split = \wordwrap($word_split, $width, '#', $cut);
13000
13001 10
        $max = \mb_strlen($word_split);
13002
        /** @noinspection PhpAssignmentInConditionInspection - is ok here */
13003 10
        while (($b = \mb_strpos($word_split, '#', $b + 1)) !== false) {
13004 8
            for (++$i; $i < $b; ++$i) {
13005 8
                if (isset($charsArray[$j])) {
13006 8
                    $str_return .= $charsArray[$j];
13007 8
                    unset($charsArray[$j]);
13008
                }
13009 8
                ++$j;
13010
13011
                // prevent endless loop, e.g. if there is a error in the "mb_*" polyfill
13012 8
                if ($i > $max) {
13013
                    break 2;
13014
                }
13015
            }
13016
13017
            if (
13018 8
                $break === $charsArray[$j]
13019
                ||
13020 8
                $charsArray[$j] === ' '
13021
            ) {
13022 5
                unset($charsArray[$j++]);
13023
            }
13024
13025 8
            $str_return .= $break;
13026
13027
            // prevent endless loop, e.g. if there is a error in the "mb_*" polyfill
13028 8
            if ($b > $max) {
13029
                break;
13030
            }
13031
        }
13032
13033 10
        return $str_return . \implode('', $charsArray);
13034
    }
13035
13036
    /**
13037
     * Line-Wrap the string after $limit, but split the string by "$delimiter" before ...
13038
     *    ... so that we wrap the per line.
13039
     *
13040
     * @param string      $str             <p>The input string.</p>
13041
     * @param int         $width           [optional] <p>The column width.</p>
13042
     * @param string      $break           [optional] <p>The line is broken using the optional break parameter.</p>
13043
     * @param bool        $cut             [optional] <p>
13044
     *                                     If the cut is set to true, the string is
13045
     *                                     always wrapped at or before the specified width. So if you have
13046
     *                                     a word that is larger than the given width, it is broken apart.
13047
     *                                     </p>
13048
     * @param bool        $add_final_break [optional] <p>
13049
     *                                     If this flag is true, then the method will add a $break at the end
13050
     *                                     of the result string.
13051
     *                                     </p>
13052
     * @param string|null $delimiter       [optional] <p>
13053
     *                                     You can change the default behavior, where we split the string by newline.
13054
     *                                     </p>
13055
     *
13056
     * @psalm-pure
13057
     *
13058
     * @return string
13059
     */
13060 1
    public static function wordwrap_per_line(
13061
        string $str,
13062
        int $width = 75,
13063
        string $break = "\n",
13064
        bool $cut = false,
13065
        bool $add_final_break = true,
13066
        string $delimiter = null
13067
    ): string {
13068 1
        if ($delimiter === null) {
13069 1
            $strings = \preg_split('/\\r\\n|\\r|\\n/', $str);
13070
        } else {
13071 1
            $strings = \explode($delimiter, $str);
13072
        }
13073
13074 1
        $string_helper_array = [];
13075 1
        if ($strings !== false) {
13076 1
            foreach ($strings as $value) {
13077 1
                $string_helper_array[] = self::wordwrap($value, $width, $break, $cut);
13078
            }
13079
        }
13080
13081 1
        if ($add_final_break) {
13082 1
            $final_break = $break;
13083
        } else {
13084 1
            $final_break = '';
13085
        }
13086
13087 1
        return \implode($delimiter ?? "\n", $string_helper_array) . $final_break;
13088
    }
13089
13090
    /**
13091
     * Returns an array of Unicode White Space characters.
13092
     *
13093
     * @psalm-pure
13094
     *
13095
     * @return string[]
13096
     *                  <p>An array with numeric code point as key and White Space Character as value.</p>
13097
     */
13098 2
    public static function ws(): array
13099
    {
13100 2
        return self::$WHITESPACE;
13101
    }
13102
13103
    /**
13104
     * Checks whether the passed string contains only byte sequences that are valid UTF-8 characters.
13105
     *
13106
     * EXAMPLE: <code>
13107
     * UTF8::is_utf8_string('Iñtërnâtiônàlizætiøn']); // true
13108
     * //
13109
     * UTF8::is_utf8_string("Iñtërnâtiônàlizætiøn\xA0\xA1"); // false
13110
     * </code>
13111
     *
13112
     * @see          http://hsivonen.iki.fi/php-utf8/
13113
     *
13114
     * @param string $str    <p>The string to be checked.</p>
13115
     * @param bool   $strict <p>Check also if the string is not UTF-16 or UTF-32.</p>
13116
     *
13117
     * @psalm-pure
13118
     *
13119
     * @return bool
13120
     *
13121
     * @noinspection ReturnTypeCanBeDeclaredInspection
13122
     */
13123 110
    private static function is_utf8_string(string $str, bool $strict = false)
13124
    {
13125 110
        if ($str === '') {
13126 15
            return true;
13127
        }
13128
13129 103
        if ($strict) {
13130 2
            $is_binary = self::is_binary($str, true);
13131
13132 2
            if ($is_binary && self::is_utf16($str, false) !== false) {
13133 2
                return false;
13134
            }
13135
13136 2
            if ($is_binary && self::is_utf32($str, false) !== false) {
13137
                return false;
13138
            }
13139
        }
13140
13141 103
        if (self::$SUPPORT['pcre_utf8']) {
13142
            // If even just the first character can be matched, when the /u
13143
            // modifier is used, then it's valid UTF-8. If the UTF-8 is somehow
13144
            // invalid, nothing at all will match, even if the string contains
13145
            // some valid sequences
13146 103
            return \preg_match('/^./us', $str) === 1;
13147
        }
13148
13149 2
        $mState = 0; // cached expected number of octets after the current octet
13150
        // until the beginning of the next UTF8 character sequence
13151 2
        $mUcs4 = 0; // cached Unicode character
13152 2
        $mBytes = 1; // cached expected number of octets in the current sequence
13153
13154 2
        if (self::$ORD === null) {
13155
            self::$ORD = self::getData('ord');
13156
        }
13157
13158 2
        $len = \strlen($str);
13159 2
        for ($i = 0; $i < $len; ++$i) {
13160 2
            $in = self::$ORD[$str[$i]];
13161
13162 2
            if ($mState === 0) {
13163
                // When mState is zero we expect either a US-ASCII character or a
13164
                // multi-octet sequence.
13165 2
                if ((0x80 & $in) === 0) {
13166
                    // US-ASCII, pass straight through.
13167 2
                    $mBytes = 1;
13168 2
                } elseif ((0xE0 & $in) === 0xC0) {
13169
                    // First octet of 2 octet sequence.
13170 2
                    $mUcs4 = $in;
13171 2
                    $mUcs4 = ($mUcs4 & 0x1F) << 6;
13172 2
                    $mState = 1;
13173 2
                    $mBytes = 2;
13174 2
                } elseif ((0xF0 & $in) === 0xE0) {
13175
                    // First octet of 3 octet sequence.
13176 2
                    $mUcs4 = $in;
13177 2
                    $mUcs4 = ($mUcs4 & 0x0F) << 12;
13178 2
                    $mState = 2;
13179 2
                    $mBytes = 3;
13180
                } elseif ((0xF8 & $in) === 0xF0) {
13181
                    // First octet of 4 octet sequence.
13182
                    $mUcs4 = $in;
13183
                    $mUcs4 = ($mUcs4 & 0x07) << 18;
13184
                    $mState = 3;
13185
                    $mBytes = 4;
13186
                } elseif ((0xFC & $in) === 0xF8) {
13187
                    /* First octet of 5 octet sequence.
13188
                     *
13189
                     * This is illegal because the encoded codepoint must be either
13190
                     * (a) not the shortest form or
13191
                     * (b) outside the Unicode range of 0-0x10FFFF.
13192
                     * Rather than trying to resynchronize, we will carry on until the end
13193
                     * of the sequence and let the later error handling code catch it.
13194
                     */
13195
                    $mUcs4 = $in;
13196
                    $mUcs4 = ($mUcs4 & 0x03) << 24;
13197
                    $mState = 4;
13198
                    $mBytes = 5;
13199
                } elseif ((0xFE & $in) === 0xFC) {
13200
                    // First octet of 6 octet sequence, see comments for 5 octet sequence.
13201
                    $mUcs4 = $in;
13202
                    $mUcs4 = ($mUcs4 & 1) << 30;
13203
                    $mState = 5;
13204
                    $mBytes = 6;
13205
                } else {
13206
                    // Current octet is neither in the US-ASCII range nor a legal first
13207
                    // octet of a multi-octet sequence.
13208 2
                    return false;
13209
                }
13210 2
            } elseif ((0xC0 & $in) === 0x80) {
13211
13212
                // When mState is non-zero, we expect a continuation of the multi-octet
13213
                // sequence
13214
13215
                // Legal continuation.
13216 2
                $shift = ($mState - 1) * 6;
13217 2
                $tmp = $in;
13218 2
                $tmp = ($tmp & 0x0000003F) << $shift;
13219 2
                $mUcs4 |= $tmp;
13220
                // Prefix: End of the multi-octet sequence. mUcs4 now contains the final
13221
                // Unicode code point to be output.
13222 2
                if (--$mState === 0) {
13223
                    // Check for illegal sequences and code points.
13224
                    //
13225
                    // From Unicode 3.1, non-shortest form is illegal
13226
                    if (
13227 2
                        ($mBytes === 2 && $mUcs4 < 0x0080)
13228
                        ||
13229 2
                        ($mBytes === 3 && $mUcs4 < 0x0800)
13230
                        ||
13231 2
                        ($mBytes === 4 && $mUcs4 < 0x10000)
13232
                        ||
13233 2
                        ($mBytes > 4)
13234
                        ||
13235
                        // From Unicode 3.2, surrogate characters are illegal.
13236 2
                        (($mUcs4 & 0xFFFFF800) === 0xD800)
13237
                        ||
13238
                        // Code points outside the Unicode range are illegal.
13239 2
                        ($mUcs4 > 0x10FFFF)
13240
                    ) {
13241
                        return false;
13242
                    }
13243
                    // initialize UTF8 cache
13244 2
                    $mState = 0;
13245 2
                    $mUcs4 = 0;
13246 2
                    $mBytes = 1;
13247
                }
13248
            } else {
13249
                // ((0xC0 & (*in) != 0x80) && (mState != 0))
13250
                // Incomplete multi-octet sequence.
13251
                return false;
13252
            }
13253
        }
13254
13255 2
        return $mState === 0;
13256
    }
13257
13258
    /**
13259
     * @param string $str
13260
     * @param bool   $use_lowercase      <p>Use uppercase by default, otherwise use lowercase.</p>
13261
     * @param bool   $use_full_case_fold <p>Convert not only common cases.</p>
13262
     *
13263
     * @psalm-pure
13264
     *
13265
     * @return string
13266
     *
13267
     * @noinspection ReturnTypeCanBeDeclaredInspection
13268
     */
13269 33
    private static function fixStrCaseHelper(
13270
        string $str,
13271
        bool $use_lowercase = false,
13272
        bool $use_full_case_fold = false
13273
    ) {
13274 33
        $upper = self::$COMMON_CASE_FOLD['upper'];
13275 33
        $lower = self::$COMMON_CASE_FOLD['lower'];
13276
13277 33
        if ($use_lowercase) {
13278 2
            $str = \str_replace(
13279 2
                $upper,
13280 2
                $lower,
13281 2
                $str
13282
            );
13283
        } else {
13284 31
            $str = \str_replace(
13285 31
                $lower,
13286 31
                $upper,
13287 31
                $str
13288
            );
13289
        }
13290
13291 33
        if ($use_full_case_fold) {
13292
            /**
13293
             * @psalm-suppress ImpureStaticVariable
13294
             *
13295
             * @var array<mixed>|null
13296
             */
13297 31
            static $FULL_CASE_FOLD = null;
13298 31
            if ($FULL_CASE_FOLD === null) {
13299 1
                $FULL_CASE_FOLD = self::getData('caseFolding_full');
13300
            }
13301
13302 31
            if ($use_lowercase) {
13303 2
                $str = \str_replace($FULL_CASE_FOLD[0], $FULL_CASE_FOLD[1], $str);
13304
            } else {
13305 29
                $str = \str_replace($FULL_CASE_FOLD[1], $FULL_CASE_FOLD[0], $str);
13306
            }
13307
        }
13308
13309 33
        return $str;
13310
    }
13311
13312
    /**
13313
     * get data from "/data/*.php"
13314
     *
13315
     * @param string $file
13316
     *
13317
     * @psalm-pure
13318
     *
13319
     * @return array
13320
     *
13321
     * @noinspection ReturnTypeCanBeDeclaredInspection
13322
     */
13323 7
    private static function getData(string $file)
13324
    {
13325
        /** @noinspection PhpIncludeInspection */
13326
        /** @noinspection UsingInclusionReturnValueInspection */
13327
        /** @psalm-suppress UnresolvableInclude */
13328 7
        return include __DIR__ . '/data/' . $file . '.php';
13329
    }
13330
13331
    /**
13332
     * @psalm-pure
13333
     *
13334
     * @return true|null
13335
     *
13336
     * @noinspection ReturnTypeCanBeDeclaredInspection
13337
     */
13338 1
    private static function initEmojiData()
13339
    {
13340 1
        if (self::$EMOJI_KEYS_CACHE === null) {
13341 1
            if (self::$EMOJI === null) {
13342 1
                self::$EMOJI = self::getData('emoji');
13343
            }
13344
13345
            /**
13346
             * @psalm-suppress ImpureFunctionCall - static sort function is used
13347
             */
13348 1
            \uksort(
13349 1
                self::$EMOJI,
13350 1
                static function (string $a, string $b): int {
13351 1
                    return \strlen($b) <=> \strlen($a);
13352 1
                }
13353
            );
13354
13355 1
            self::$EMOJI_KEYS_CACHE = \array_keys(self::$EMOJI);
13356 1
            self::$EMOJI_VALUES_CACHE = self::$EMOJI;
13357
13358 1
            foreach (self::$EMOJI_KEYS_CACHE as $key) {
13359 1
                $tmp_key = \crc32($key);
13360 1
                self::$EMOJI_KEYS_REVERSIBLE_CACHE[] = '_-_PORTABLE_UTF8_-_' . $tmp_key . '_-_' . \strrev((string) $tmp_key) . '_-_8FTU_ELBATROP_-_';
13361
            }
13362
13363 1
            return true;
13364
        }
13365
13366
        return null;
13367
    }
13368
13369
    /**
13370
     * Checks whether mbstring "overloaded" is active on the server.
13371
     *
13372
     * @psalm-pure
13373
     *
13374
     * @return bool
13375
     */
13376
    private static function mbstring_overloaded(): bool
13377
    {
13378
        /**
13379
         * INI directive 'mbstring.func_overload' is deprecated since PHP 7.2
13380
         */
13381
13382
        /** @noinspection PhpComposerExtensionStubsInspection */
13383
        /** @noinspection PhpUsageOfSilenceOperatorInspection */
13384
        /** @noinspection DeprecatedIniOptionsInspection */
13385
        return \defined('MB_OVERLOAD_STRING')
13386
               &&
13387
               ((int) @\ini_get('mbstring.func_overload') & \MB_OVERLOAD_STRING);
13388
    }
13389
13390
    /**
13391
     * @param array    $strings
13392
     * @param bool     $remove_empty_values
13393
     * @param int|null $remove_short_values
13394
     *
13395
     * @psalm-pure
13396
     *
13397
     * @return array
13398
     *
13399
     * @noinspection ReturnTypeCanBeDeclaredInspection
13400
     */
13401 2
    private static function reduce_string_array(
13402
        array $strings,
13403
        bool $remove_empty_values,
13404
        int $remove_short_values = null
13405
    ) {
13406
        // init
13407 2
        $return = [];
13408
13409 2
        foreach ($strings as &$str) {
13410
            if (
13411 2
                $remove_short_values !== null
13412
                &&
13413 2
                \mb_strlen($str) <= $remove_short_values
13414
            ) {
13415 2
                continue;
13416
            }
13417
13418
            if (
13419 2
                $remove_empty_values
13420
                &&
13421 2
                \trim($str) === ''
13422
            ) {
13423 2
                continue;
13424
            }
13425
13426 2
            $return[] = $str;
13427
        }
13428
13429 2
        return $return;
13430
    }
13431
13432
    /**
13433
     * rxClass
13434
     *
13435
     * @param string $s
13436
     * @param string $class
13437
     *
13438
     * @return string
13439
     *                    *
13440
     * @psalm-pure
13441
     */
13442 36
    private static function rxClass(string $s, string $class = '')
13443
    {
13444
        /**
13445
         * @psalm-suppress ImpureStaticVariable
13446
         *
13447
         * @var array<string,string>
13448
         */
13449 36
        static $RX_CLASS_CACHE = [];
13450
13451 36
        $cache_key = $s . '_' . $class;
13452
13453 36
        if (isset($RX_CLASS_CACHE[$cache_key])) {
13454 24
            return $RX_CLASS_CACHE[$cache_key];
13455
        }
13456
13457 16
        $class_array[] = $class;
0 ignored issues
show
Comprehensibility Best Practice introduced by
$class_array was never initialized. Although not strictly required by PHP, it is generally a good practice to add $class_array = array(); before regardless.
Loading history...
13458
13459
        /** @noinspection SuspiciousLoopInspection */
13460
        /** @noinspection AlterInForeachInspection */
13461 16
        foreach (self::str_split($s) as &$s) {
0 ignored issues
show
Bug introduced by
The expression self::str_split($s) cannot be used as a reference.

Let?s assume that you have the following foreach statement:

foreach ($array as &$itemValue) { }

$itemValue is assigned by reference. This is possible because the expression (in the example $array) can be used as a reference target.

However, if we were to replace $array with something different like the result of a function call as in

foreach (getArray() as &$itemValue) { }

then assigning by reference is not possible anymore as there is no target that could be modified.

Available Fixes

1. Do not assign by reference
foreach (getArray() as $itemValue) { }
2. Assign to a local variable first
$array = getArray();
foreach ($array as &$itemValue) {}
3. Return a reference
function &getArray() { $array = array(); return $array; }

foreach (getArray() as &$itemValue) { }
Loading history...
13462 15
            if ($s === '-') {
13463
                $class_array[0] = '-' . $class_array[0];
13464 15
            } elseif (!isset($s[2])) {
13465 15
                $class_array[0] .= \preg_quote($s, '/');
13466 1
            } elseif (self::strlen($s) === 1) {
13467 1
                $class_array[0] .= $s;
13468
            } else {
13469
                $class_array[] = $s;
13470
            }
13471
        }
13472
13473 16
        if ($class_array[0]) {
13474 16
            $class_array[0] = '[' . $class_array[0] . ']';
13475
        }
13476
13477 16
        if (\count($class_array) === 1) {
13478 16
            $return = $class_array[0];
13479
        } else {
13480
            $return = '(?:' . \implode('|', $class_array) . ')';
13481
        }
13482
13483 16
        $RX_CLASS_CACHE[$cache_key] = $return;
13484
13485 16
        return $return;
13486
    }
13487
13488
    /**
13489
     * Personal names such as "Marcus Aurelius" are sometimes typed incorrectly using lowercase ("marcus aurelius").
13490
     *
13491
     * @param string $names
13492
     * @param string $delimiter
13493
     * @param string $encoding
13494
     *
13495
     * @psalm-pure
13496
     *
13497
     * @return string
13498
     *
13499
     * @noinspection ReturnTypeCanBeDeclaredInspection
13500
     */
13501 1
    private static function str_capitalize_name_helper(
13502
        string $names,
13503
        string $delimiter,
13504
        string $encoding = 'UTF-8'
13505
    ) {
13506
        // init
13507 1
        $name_helper_array = \explode($delimiter, $names);
13508 1
        if ($name_helper_array === false) {
13509
            return '';
13510
        }
13511
13512
        $special_cases = [
13513
            'names' => [
13514 1
                'ab',
13515
                'af',
13516
                'al',
13517
                'and',
13518
                'ap',
13519
                'bint',
13520
                'binte',
13521
                'da',
13522
                'de',
13523
                'del',
13524
                'den',
13525
                'der',
13526
                'di',
13527
                'dit',
13528
                'ibn',
13529
                'la',
13530
                'mac',
13531
                'nic',
13532
                'of',
13533
                'ter',
13534
                'the',
13535
                'und',
13536
                'van',
13537
                'von',
13538
                'y',
13539
                'zu',
13540
            ],
13541
            'prefixes' => [
13542
                'al-',
13543
                "d'",
13544
                'ff',
13545
                "l'",
13546
                'mac',
13547
                'mc',
13548
                'nic',
13549
            ],
13550
        ];
13551
13552 1
        foreach ($name_helper_array as &$name) {
13553 1
            if (\in_array($name, $special_cases['names'], true)) {
13554 1
                continue;
13555
            }
13556
13557 1
            $continue = false;
13558
13559 1
            if ($delimiter === '-') {
13560 1
                foreach ((array) $special_cases['names'] as &$beginning) {
13561 1
                    if (\strncmp($name, $beginning, \strlen($beginning)) === 0) {
13562 1
                        $continue = true;
13563
13564 1
                        break;
13565
                    }
13566
                }
13567 1
                unset($beginning);
13568
            }
13569
13570 1
            foreach ((array) $special_cases['prefixes'] as &$beginning) {
13571 1
                if (\strncmp($name, $beginning, \strlen($beginning)) === 0) {
13572 1
                    $continue = true;
13573
13574 1
                    break;
13575
                }
13576
            }
13577 1
            unset($beginning);
13578
13579 1
            if ($continue) {
13580 1
                continue;
13581
            }
13582
13583 1
            $name = self::ucfirst($name, $encoding);
13584
        }
13585
13586 1
        return \implode($delimiter, $name_helper_array);
13587
    }
13588
13589
    /**
13590
     * Generic case-sensitive transformation for collation matching.
13591
     *
13592
     * @param string $str <p>The input string</p>
13593
     *
13594
     * @psalm-pure
13595
     *
13596
     * @return string|null
13597
     *
13598
     * @noinspection ReturnTypeCanBeDeclaredInspection
13599
     */
13600 6
    private static function strtonatfold(string $str)
13601
    {
13602 6
        $str = \Normalizer::normalize($str, \Normalizer::NFD);
13603 6
        if ($str === false) {
13604 2
            return '';
13605
        }
13606
13607 6
        return \preg_replace(
13608 6
            '/\p{Mn}+/u',
13609 6
            '',
13610 6
            $str
13611
        );
13612
    }
13613
13614
    /**
13615
     * @param int|string $input
13616
     *
13617
     * @psalm-pure
13618
     *
13619
     * @return string
13620
     *
13621
     * @noinspection ReturnTypeCanBeDeclaredInspection
13622
     */
13623 30
    private static function to_utf8_convert_helper($input)
13624
    {
13625
        // init
13626 30
        $buf = '';
13627
13628 30
        if (self::$ORD === null) {
13629
            self::$ORD = self::getData('ord');
13630
        }
13631
13632 30
        if (self::$CHR === null) {
13633
            self::$CHR = self::getData('chr');
13634
        }
13635
13636 30
        if (self::$WIN1252_TO_UTF8 === null) {
13637 1
            self::$WIN1252_TO_UTF8 = self::getData('win1252_to_utf8');
13638
        }
13639
13640 30
        $ordC1 = self::$ORD[$input];
13641 30
        if (isset(self::$WIN1252_TO_UTF8[$ordC1])) { // found in Windows-1252 special cases
13642 30
            $buf .= self::$WIN1252_TO_UTF8[$ordC1];
13643
        } else {
13644
            /** @noinspection OffsetOperationsInspection */
13645 1
            $cc1 = self::$CHR[$ordC1 / 64] | "\xC0";
13646 1
            $cc2 = ((string) $input & "\x3F") | "\x80";
0 ignored issues
show
Bug introduced by
Are you sure you want to use the bitwise & or did you mean &&?
Loading history...
Bug introduced by
Are you sure you want to use the bitwise | or did you mean ||?
Loading history...
13647 1
            $buf .= $cc1 . $cc2;
13648
        }
13649
13650 30
        return $buf;
13651
    }
13652
13653
    /**
13654
     * @param string $str
13655
     *
13656
     * @psalm-pure
13657
     *
13658
     * @return string
13659
     *
13660
     * @noinspection ReturnTypeCanBeDeclaredInspection
13661
     */
13662 9
    private static function urldecode_unicode_helper(string $str)
13663
    {
13664 9
        if (\strpos($str, '%u') === false) {
13665 9
            return $str;
13666
        }
13667
13668 7
        $pattern = '/%u([0-9a-fA-F]{3,4})/';
13669 7
        if (\preg_match($pattern, $str)) {
13670 7
            $str = (string) \preg_replace($pattern, '&#x\\1;', $str);
13671
        }
13672
13673 7
        return $str;
13674
    }
13675
}
13676