Passed
Push — master ( 512e55...80c17a )
by Lars
04:48 queued 01:48
created

UTF8   F

Complexity

Total Complexity 1751

Size/Duplication

Total Lines 13682
Duplicated Lines 0 %

Test Coverage

Coverage 79.79%

Importance

Changes 110
Bugs 53 Features 6
Metric Value
eloc 4230
c 110
b 53
f 6
dl 0
loc 13682
ccs 2965
cts 3716
cp 0.7979
rs 0.8
wmc 1751

272 Methods

Rating   Name   Duplication   Size   Complexity  
B str_camelize() 0 74 10
A str_capitalize_name() 0 8 1
A encode_mimeheader() 0 26 5
F extract_text() 0 175 34
B chr_to_decimal() 0 38 8
A add_bom_to_string() 0 7 2
A ctype_loaded() 0 3 1
D chr() 0 107 19
A chunk_split() 0 3 1
A css_identifier() 0 55 6
A css_stripe_media_queries() 0 6 1
A clean() 0 47 6
A __construct() 0 2 1
A codepoints() 0 36 5
A chr_map() 0 5 1
A cleanup() 0 24 2
A char_at() 0 7 2
A chars() 0 4 1
A chr_size_list() 0 17 3
A checkForSupport() 0 46 4
A collapse_whitespace() 0 7 2
A access() 0 11 4
A callback() 0 3 1
A binary_to_str() 0 12 3
A bom() 0 3 1
A array_change_key_case() 0 23 5
A count_chars() 0 11 1
B between() 0 48 8
A emoji_decode() 0 21 3
A decode_mimeheader() 0 8 3
A emoji_encode() 0 21 3
A decimal_to_chr() 0 5 1
F encode() 0 144 37
A chr_to_hex() 0 11 3
A emoji_from_country_code() 0 17 3
A file_has_bom() 0 8 2
A filter_input() 0 16 3
A is_bom() 0 10 3
A is_hexadecimal() 0 7 2
A get_unique_string() 0 21 3
A has_uppercase() 0 7 2
B get_file_type() 0 60 7
C filter() 0 59 14
A is_html() 0 14 2
A is_alpha() 0 7 2
B get_random_string() 0 54 10
A fix_utf8() 0 30 4
A first_char() 0 14 4
A is_serialized() 0 11 3
A is_uppercase() 0 7 2
A is_ascii() 0 3 1
A is_blank() 0 7 2
D getCharDirection() 0 104 117
A htmlspecialchars() 0 15 3
A filter_var_array() 0 15 2
A has_whitespace() 0 7 2
B is_binary() 0 39 10
A intlChar_loaded() 0 3 1
B is_url() 0 40 7
A finfo_loaded() 0 3 1
A fits_inside() 0 3 1
A is_binary_file() 0 16 4
A intl_loaded() 0 3 1
A html_stripe_empty_tags() 0 6 1
A is_lowercase() 0 7 2
A iconv_loaded() 0 3 1
A filter_var() 0 15 2
A is_empty() 0 3 1
B html_encode() 0 54 11
A is_alphanumeric() 0 7 2
A fix_simple_utf8() 0 32 5
B is_json() 0 26 8
A is_printable() 0 3 1
A int_to_hex() 0 7 2
A has_lowercase() 0 7 2
A is_base64() 0 17 5
A hex_to_int() 0 14 3
A hex_to_chr() 0 4 1
A htmlentities() 0 28 3
A filter_input_array() 0 15 3
A getSupportInfo() 0 13 3
A is_punctuation() 0 3 1
C html_entity_decode() 0 58 13
C file_get_contents() 0 61 12
A is_utf8() 0 13 4
A html_escape() 0 6 1
D is_utf16() 0 76 18
A lcfirst() 0 44 5
A mbstring_overloaded() 0 12 2
A json_loaded() 0 3 1
A lcwords() 0 34 6
D is_utf32() 0 76 18
A json_decode() 0 17 3
A json_encode() 0 13 3
A remove_left() 0 28 4
A strncasecmp() 0 10 1
A remove_right() 0 25 4
A str_substr_after_first_separator() 0 28 6
A max() 0 14 3
A parse_str() 0 18 4
A str_contains() 0 15 3
B str_to_lines() 0 28 8
A substr_in_byte() 0 18 6
A stripos_in_byte() 0 12 4
A strnatcasecmp() 0 5 1
A substr_left() 0 15 4
D strlen() 0 104 19
A str_isubstr_last() 0 25 4
A to_int() 0 7 2
A str_replace_beginning() 0 25 6
C stripos() 0 67 14
A str_offset_exists() 0 10 2
D strrchr() 0 104 20
A to_filename() 0 9 1
A str_iends_with() 0 11 3
A max_chr_width() 0 8 2
C utf8_decode() 0 59 13
A ltrim() 0 26 5
A remove_html() 0 3 1
B str_longest_common_suffix() 0 54 10
C wordwrap() 0 70 14
B ucfirst() 0 57 7
A str_pad_both() 0 12 1
A str_substr_last() 0 33 6
A mbstring_loaded() 0 3 1
A str_limit() 0 26 6
A string() 0 18 4
B str_obfuscate() 0 47 8
D normalize_encoding() 0 147 16
B rxClass() 0 44 8
A str_ensure_right() 0 13 4
B str_titleize_for_humans() 0 170 7
A normalize_whitespace() 0 11 1
A str_starts_with() 0 16 4
A str_humanize() 0 15 1
C substr_count_in_byte() 0 55 15
A strlen_in_byte() 0 12 3
A str_ireplace_ending() 0 21 6
A rtrim() 0 26 5
C str_longest_common_substring() 0 76 16
A regex_replace() 0 20 3
A titlecase() 0 35 5
A getData() 0 6 1
B strtolower() 0 58 10
A urldecode() 0 35 4
A str_isubstr_before_first_separator() 0 19 5
B strrev() 0 43 10
A replace_all() 0 11 2
D substr_replace() 0 123 27
A strstr_in_byte() 0 15 4
A str_matches_pattern() 0 3 1
C str_titleize() 0 69 12
A str_split_array() 0 17 2
A ws() 0 3 1
A str_replace_first() 0 20 2
A str_pad_right() 0 12 1
B ucwords() 0 51 9
A to_boolean() 0 35 5
C stristr() 0 79 17
B strwidth() 0 43 8
A trim() 0 26 5
A str_upper_camelize() 0 8 1
A substr_compare() 0 33 6
D substr_count() 0 73 17
A strnatcmp() 0 9 2
D str_pad() 0 146 16
A urldecode_unicode_helper() 0 12 3
A str_ireplace() 0 31 5
A str_replace_ending() 0 24 6
A string_has_bom() 0 9 3
B strtr() 0 41 11
B str_contains_all() 0 22 9
A normalize_line_ending() 0 3 1
A str_isubstr_after_last_separator() 0 26 5
D range() 0 71 23
B strspn() 0 30 10
A strcasecmp() 0 21 1
A rawurldecode() 0 35 4
B str_capitalize_name_helper() 0 86 10
A utf8_encode() 0 14 3
A normalize_msword() 0 3 1
C str_detect_encoding() 0 111 14
A spaces_to_tabs() 0 11 3
A str_istarts_with() 0 11 3
A str_replace() 0 18 1
A substr_iright() 0 15 4
A replace() 0 11 2
A to_iso8859() 0 16 4
A words_limit() 0 20 5
A strip_tags() 0 18 4
A pcre_utf8_support() 0 4 1
A str_isubstr_before_last_separator() 0 24 6
D str_truncate_safe() 0 86 18
A substr_right() 0 31 6
D str_split() 0 134 29
A str_ends_with_any() 0 13 4
A strrpos_in_byte() 0 12 4
F strrpos() 0 136 31
A remove_html_breaks() 0 3 1
A showSupport() 0 16 3
A remove_invisible_characters() 0 11 1
A single_chr_html_encode() 0 18 4
A str_replace_last() 0 19 2
A str_substr_before_last_separator() 0 31 6
B strtocasefold() 0 33 7
A tabs_to_spaces() 0 11 3
B str_truncate() 0 43 7
F strripos() 0 113 25
A strpos_in_byte() 0 12 4
A str_ends_with() 0 16 4
A to_ascii() 0 6 1
A reduce_string_array() 0 29 6
B str_longest_common_prefix() 0 51 8
A str_pad_left() 0 12 1
A str_substr_first() 0 33 6
A remove_bom() 0 22 5
A str_repeat() 0 5 1
A strpbrk() 0 11 4
D to_utf8_string() 0 110 33
A whitespace_table() 0 3 1
A substr_count_simple() 0 31 6
A str_iends_with_any() 0 13 4
A str_substr_after_last_separator() 0 31 6
A str_isubstr_after_first_separator() 0 26 5
B str_snakeize() 0 57 6
A str_sort() 0 16 3
A to_utf8() 0 15 3
A str_underscored() 0 3 1
A str_offset_get() 0 14 4
A strip_whitespace() 0 7 2
B str_limit_after_word() 0 51 11
A swapCase() 0 17 4
A substr_ileft() 0 15 4
A str_dasherize() 0 3 1
A str_ensure_left() 0 11 3
C ord() 0 68 16
B to_string() 0 27 8
A strtonatfold() 0 11 2
C strcspn() 0 48 12
A fixStrCaseHelper() 0 41 5
C str_split_pattern() 0 54 13
D strstr() 0 107 21
A str_isubstr_first() 0 25 4
A str_last_char() 0 16 4
A str_ireplace_beginning() 0 22 6
A str_substr_before_first_separator() 0 32 6
F substr() 0 137 31
A wordwrap_per_line() 0 28 5
A str_surround() 0 3 1
A strncmp() 0 19 4
A str_insert() 0 28 4
A replace_diamond_question_mark() 0 43 5
D is_utf8_string() 0 133 28
B str_delimit() 0 31 8
A to_utf8_convert_helper() 0 28 5
B strtoupper() 0 58 10
A min() 0 14 3
A str_starts_with_any() 0 17 5
B strrichr() 0 54 11
A str_istarts_with_any() 0 17 5
B str_contains_any() 0 28 8
A initEmojiData() 0 29 4
A remove_duplicates() 0 16 4
B str_slice() 0 33 10
F strpos() 0 151 33
A str_shuffle() 0 33 6
A strcmp() 0 11 2
A str_word_count() 0 23 5
A strripos_in_byte() 0 12 4
A str_to_binary() 0 10 2
A symfony_polyfill_used() 0 16 5
B str_to_words() 0 36 8

How to fix   Complexity   

Complex Class

Complex classes like UTF8 often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.

Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.

While breaking up the class, it is a good idea to analyze how other classes use UTF8, and based on these observations, apply Extract Interface, too.

1
<?php
2
3
declare(strict_types=1);
4
5
namespace voku\helper;
6
7
/**
8
 * @psalm-immutable
9
 */
10
final class UTF8
11
{
12
    /**
13
     * Bom => Byte-Length
14
     *
15
     * INFO: https://en.wikipedia.org/wiki/Byte_order_mark
16
     *
17
     * @var array<string, int>
18
     */
19
    private static $BOM = [
20
        "\xef\xbb\xbf"     => 3, // UTF-8 BOM
21
        ''              => 6, // UTF-8 BOM as "WINDOWS-1252" (one char has [maybe] more then one byte ...)
22
        "\x00\x00\xfe\xff" => 4, // UTF-32 (BE) BOM
23
        '  þÿ'             => 6, // UTF-32 (BE) BOM as "WINDOWS-1252"
24
        "\xff\xfe\x00\x00" => 4, // UTF-32 (LE) BOM
25
        'ÿþ  '             => 6, // UTF-32 (LE) BOM as "WINDOWS-1252"
26
        "\xfe\xff"         => 2, // UTF-16 (BE) BOM
27
        'þÿ'               => 4, // UTF-16 (BE) BOM as "WINDOWS-1252"
28
        "\xff\xfe"         => 2, // UTF-16 (LE) BOM
29
        'ÿþ'               => 4, // UTF-16 (LE) BOM as "WINDOWS-1252"
30
    ];
31
32
    /**
33
     * Numeric code point => UTF-8 Character
34
     *
35
     * url: http://www.w3schools.com/charsets/ref_utf_punctuation.asp
36
     *
37
     * @var array<int, string>
38
     */
39
    private static $WHITESPACE = [
40
        // NULL Byte
41
        0 => "\x0",
42
        // Tab
43
        9 => "\x9",
44
        // New Line
45
        10 => "\xa",
46
        // Vertical Tab
47
        11 => "\xb",
48
        // Carriage Return
49
        13 => "\xd",
50
        // Ordinary Space
51
        32 => "\x20",
52
        // NO-BREAK SPACE
53
        160 => "\xc2\xa0",
54
        // OGHAM SPACE MARK
55
        5760 => "\xe1\x9a\x80",
56
        // MONGOLIAN VOWEL SEPARATOR
57
        6158 => "\xe1\xa0\x8e",
58
        // EN QUAD
59
        8192 => "\xe2\x80\x80",
60
        // EM QUAD
61
        8193 => "\xe2\x80\x81",
62
        // EN SPACE
63
        8194 => "\xe2\x80\x82",
64
        // EM SPACE
65
        8195 => "\xe2\x80\x83",
66
        // THREE-PER-EM SPACE
67
        8196 => "\xe2\x80\x84",
68
        // FOUR-PER-EM SPACE
69
        8197 => "\xe2\x80\x85",
70
        // SIX-PER-EM SPACE
71
        8198 => "\xe2\x80\x86",
72
        // FIGURE SPACE
73
        8199 => "\xe2\x80\x87",
74
        // PUNCTUATION SPACE
75
        8200 => "\xe2\x80\x88",
76
        // THIN SPACE
77
        8201 => "\xe2\x80\x89",
78
        // HAIR SPACE
79
        8202 => "\xe2\x80\x8a",
80
        // LINE SEPARATOR
81
        8232 => "\xe2\x80\xa8",
82
        // PARAGRAPH SEPARATOR
83
        8233 => "\xe2\x80\xa9",
84
        // NARROW NO-BREAK SPACE
85
        8239 => "\xe2\x80\xaf",
86
        // MEDIUM MATHEMATICAL SPACE
87
        8287 => "\xe2\x81\x9f",
88
        // HALFWIDTH HANGUL FILLER
89
        65440 => "\xef\xbe\xa0",
90
        // IDEOGRAPHIC SPACE
91
        12288 => "\xe3\x80\x80",
92
    ];
93
94
    /**
95
     * @var array<string, string>
96
     */
97
    private static $WHITESPACE_TABLE = [
98
        'SPACE'                     => "\x20",
99
        'NO-BREAK SPACE'            => "\xc2\xa0",
100
        'OGHAM SPACE MARK'          => "\xe1\x9a\x80",
101
        'EN QUAD'                   => "\xe2\x80\x80",
102
        'EM QUAD'                   => "\xe2\x80\x81",
103
        'EN SPACE'                  => "\xe2\x80\x82",
104
        'EM SPACE'                  => "\xe2\x80\x83",
105
        'THREE-PER-EM SPACE'        => "\xe2\x80\x84",
106
        'FOUR-PER-EM SPACE'         => "\xe2\x80\x85",
107
        'SIX-PER-EM SPACE'          => "\xe2\x80\x86",
108
        'FIGURE SPACE'              => "\xe2\x80\x87",
109
        'PUNCTUATION SPACE'         => "\xe2\x80\x88",
110
        'THIN SPACE'                => "\xe2\x80\x89",
111
        'HAIR SPACE'                => "\xe2\x80\x8a",
112
        'LINE SEPARATOR'            => "\xe2\x80\xa8",
113
        'PARAGRAPH SEPARATOR'       => "\xe2\x80\xa9",
114
        'ZERO WIDTH SPACE'          => "\xe2\x80\x8b",
115
        'NARROW NO-BREAK SPACE'     => "\xe2\x80\xaf",
116
        'MEDIUM MATHEMATICAL SPACE' => "\xe2\x81\x9f",
117
        'IDEOGRAPHIC SPACE'         => "\xe3\x80\x80",
118
        'HALFWIDTH HANGUL FILLER'   => "\xef\xbe\xa0",
119
    ];
120
121
    /**
122
     * @var array
123
     *
124
     * @phpstan-var array{upper: string[], lower: string[]}
125
     */
126
    private static $COMMON_CASE_FOLD = [
127
        'upper' => [
128
            'µ',
129
            'ſ',
130
            "\xCD\x85",
131
            'ς',
132
            'ẞ',
133
            "\xCF\x90",
134
            "\xCF\x91",
135
            "\xCF\x95",
136
            "\xCF\x96",
137
            "\xCF\xB0",
138
            "\xCF\xB1",
139
            "\xCF\xB5",
140
            "\xE1\xBA\x9B",
141
            "\xE1\xBE\xBE",
142
        ],
143
        'lower' => [
144
            'μ',
145
            's',
146
            'ι',
147
            'σ',
148
            'ß',
149
            'β',
150
            'θ',
151
            'φ',
152
            'π',
153
            'κ',
154
            'ρ',
155
            'ε',
156
            "\xE1\xB9\xA1",
157
            'ι',
158
        ],
159
    ];
160
161
    /**
162
     * @var array
163
     *
164
     * @phpstan-var array<string, mixed>
165
     */
166
    private static $SUPPORT = [];
167
168
    /**
169
     * @var string[]|null
170
     *
171
     * @phpstan-var array<string, string>|null
172
     */
173
    private static $BROKEN_UTF8_FIX;
174
175
    /**
176
     * @var string[]|null
177
     *
178
     * @phpstan-var array<int, string>|null
179
     */
180
    private static $WIN1252_TO_UTF8;
181
182
    /**
183
     * @var string[]|null
184
     *
185
     * @phpstan-var array<int ,string>|null
186
     */
187
    private static $INTL_TRANSLITERATOR_LIST;
188
189
    /**
190
     * @var string[]|null
191
     *
192
     * @phpstan-var array<string>|null
193
     */
194
    private static $ENCODINGS;
195
196
    /**
197
     * @var int[]|null
198
     *
199
     * @phpstan-var array<string ,int>|null
200
     */
201
    private static $ORD;
202
203
    /**
204
     * @var string[]|null
205
     *
206
     * @phpstan-var array<string, string>|null
207
     */
208
    private static $EMOJI;
209
210
    /**
211
     * @var string[]|null
212
     *
213
     * @phpstan-var array<string>|null
214
     */
215
    private static $EMOJI_VALUES_CACHE;
216
217
    /**
218
     * @var string[]|null
219
     *
220
     * @phpstan-var array<string>|null
221
     */
222
    private static $EMOJI_KEYS_CACHE;
223
224
    /**
225
     * @var string[]|null
226
     *
227
     * @phpstan-var array<string>|null
228
     */
229
    private static $EMOJI_KEYS_REVERSIBLE_CACHE;
230
231
    /**
232
     * @var string[]|null
233
     *
234
     * @phpstan-var array<int, string>|null
235
     */
236
    private static $CHR;
237
238
    /**
239
     * __construct()
240
     */
241 34
    public function __construct()
242
    {
243 34
    }
244
245
    /**
246
     * Return the character at the specified position: $str[1] like functionality.
247
     *
248
     * EXAMPLE: <code>UTF8::access('fòô', 1); // 'ò'</code>
249
     *
250
     * @param string $str      <p>A UTF-8 string.</p>
251
     * @param int    $pos      <p>The position of character to return.</p>
252
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
253
     *
254
     * @psalm-pure
255
     *
256
     * @return string
257
     *                <p>Single multi-byte character.</p>
258
     */
259 3
    public static function access(string $str, int $pos, string $encoding = 'UTF-8'): string
260
    {
261 3
        if ($str === '' || $pos < 0) {
262 2
            return '';
263
        }
264
265 3
        if ($encoding === 'UTF-8') {
266 3
            return (string) \mb_substr($str, $pos, 1);
267
        }
268
269
        return (string) self::substr($str, $pos, 1, $encoding);
270
    }
271
272
    /**
273
     * Prepends UTF-8 BOM character to the string and returns the whole string.
274
     *
275
     * INFO: If BOM already existed there, the Input string is returned.
276
     *
277
     * EXAMPLE: <code>UTF8::add_bom_to_string('fòô'); // "\xEF\xBB\xBF" . 'fòô'</code>
278
     *
279
     * @param string $str <p>The input string.</p>
280
     *
281
     * @psalm-pure
282
     *
283
     * @return string
284
     *                <p>The output string that contains BOM.</p>
285
     */
286 2
    public static function add_bom_to_string(string $str): string
287
    {
288 2
        if (!self::string_has_bom($str)) {
289 2
            $str = self::bom() . $str;
290
        }
291
292 2
        return $str;
293
    }
294
295
    /**
296
     * Changes all keys in an array.
297
     *
298
     * @param array<string, mixed> $array    <p>The array to work on</p>
299
     * @param int                  $case     [optional] <p> Either <strong>CASE_UPPER</strong><br>
300
     *                                       or <strong>CASE_LOWER</strong> (default)</p>
301
     * @param string               $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
302
     *
303
     * @psalm-pure
304
     *
305
     * @return string[]
306
     *                  <p>An array with its keys lower- or uppercased.</p>
307
     */
308 2
    public static function array_change_key_case(
309
        array $array,
310
        int $case = \CASE_LOWER,
311
        string $encoding = 'UTF-8'
312
    ): array {
313
        if (
314 2
            $case !== \CASE_LOWER
315
            &&
316 2
            $case !== \CASE_UPPER
317
        ) {
318
            $case = \CASE_LOWER;
319
        }
320
321 2
        $return = [];
322 2
        foreach ($array as $key => &$value) {
323 2
            $key = $case === \CASE_LOWER
324 2
                ? self::strtolower($key, $encoding)
325 2
                : self::strtoupper($key, $encoding);
326
327 2
            $return[$key] = $value;
328
        }
329
330 2
        return $return;
331
    }
332
333
    /**
334
     * Returns the substring between $start and $end, if found, or an empty
335
     * string. An optional offset may be supplied from which to begin the
336
     * search for the start string.
337
     *
338
     * @param string $str
339
     * @param string $start    <p>Delimiter marking the start of the substring.</p>
340
     * @param string $end      <p>Delimiter marking the end of the substring.</p>
341
     * @param int    $offset   [optional] <p>Index from which to begin the search. Default: 0</p>
342
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
343
     *
344
     * @psalm-pure
345
     *
346
     * @return string
347
     */
348 16
    public static function between(
349
        string $str,
350
        string $start,
351
        string $end,
352
        int $offset = 0,
353
        string $encoding = 'UTF-8'
354
    ): string {
355 16
        if ($encoding === 'UTF-8') {
356 8
            $start_position = \mb_strpos($str, $start, $offset);
357 8
            if ($start_position === false) {
358 1
                return '';
359
            }
360
361 7
            $substr_index = $start_position + (int) \mb_strlen($start);
362 7
            $end_position = \mb_strpos($str, $end, $substr_index);
363
            if (
364 7
                $end_position === false
365
                ||
366 7
                $end_position === $substr_index
367
            ) {
368 2
                return '';
369
            }
370
371 5
            return (string) \mb_substr($str, $substr_index, $end_position - $substr_index);
372
        }
373
374 8
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
375
376 8
        $start_position = self::strpos($str, $start, $offset, $encoding);
377 8
        if ($start_position === false) {
378 1
            return '';
379
        }
380
381 7
        $substr_index = $start_position + (int) self::strlen($start, $encoding);
382 7
        $end_position = self::strpos($str, $end, $substr_index, $encoding);
383
        if (
384 7
            $end_position === false
385
            ||
386 7
            $end_position === $substr_index
387
        ) {
388 2
            return '';
389
        }
390
391 5
        return (string) self::substr(
392 5
            $str,
393 5
            $substr_index,
394 5
            $end_position - $substr_index,
395 5
            $encoding
396
        );
397
    }
398
399
    /**
400
     * Convert binary into a string.
401
     *
402
     * INFO: opposite to UTF8::str_to_binary()
403
     *
404
     * EXAMPLE: <code>UTF8::binary_to_str('11110000100111111001100010000011'); // '😃'</code>
405
     *
406
     * @param string $bin 1|0
407
     *
408
     * @psalm-pure
409
     *
410
     * @return string
411
     */
412 2
    public static function binary_to_str($bin): string
413
    {
414 2
        if (!isset($bin[0])) {
415
            return '';
416
        }
417
418 2
        $convert = \base_convert($bin, 2, 16);
419 2
        if ($convert === '0') {
420 1
            return '';
421
        }
422
423 2
        return \pack('H*', $convert);
424
    }
425
426
    /**
427
     * Returns the UTF-8 Byte Order Mark Character.
428
     *
429
     * INFO: take a look at UTF8::$bom for e.g. UTF-16 and UTF-32 BOM values
430
     *
431
     * EXAMPLE: <code>UTF8::bom(); // "\xEF\xBB\xBF"</code>
432
     *
433
     * @psalm-pure
434
     *
435
     * @return string
436
     *                <p>UTF-8 Byte Order Mark.</p>
437
     */
438 4
    public static function bom(): string
439
    {
440 4
        return "\xef\xbb\xbf";
441
    }
442
443
    /**
444
     * @alias of UTF8::chr_map()
445
     *
446
     * @param callable $callback
447
     * @param string   $str
448
     *
449
     * @psalm-pure
450
     *
451
     * @return string[]
452
     *
453
     * @see   UTF8::chr_map()
454
     */
455 2
    public static function callback($callback, string $str): array
456
    {
457 2
        return self::chr_map($callback, $str);
458
    }
459
460
    /**
461
     * Returns the character at $index, with indexes starting at 0.
462
     *
463
     * @param string $str      <p>The input string.</p>
464
     * @param int    $index    <p>Position of the character.</p>
465
     * @param string $encoding [optional] <p>Default is UTF-8</p>
466
     *
467
     * @psalm-pure
468
     *
469
     * @return string
470
     *                <p>The character at $index.</p>
471
     */
472 9
    public static function char_at(string $str, int $index, string $encoding = 'UTF-8'): string
473
    {
474 9
        if ($encoding === 'UTF-8') {
475 5
            return (string) \mb_substr($str, $index, 1);
476
        }
477
478 4
        return (string) self::substr($str, $index, 1, $encoding);
479
    }
480
481
    /**
482
     * Returns an array consisting of the characters in the string.
483
     *
484
     * @param string $str <p>The input string.</p>
485
     *
486
     * @psalm-pure
487
     *
488
     * @return string[]
489
     *                  <p>An array of chars.</p>
490
     */
491 4
    public static function chars(string $str): array
492
    {
493
        /** @var string[] */
494 4
        return self::str_split($str);
0 ignored issues
show
Bug Best Practice introduced by
The expression return self::str_split($str) returns an array which contains values of type string[] which are incompatible with the documented value type string.
Loading history...
495
    }
496
497
    /**
498
     * This method will auto-detect your server environment for UTF-8 support.
499
     *
500
     * @return true|null
501
     *
502
     * @internal <p>You don't need to run it manually, it will be triggered if it's needed.</p>
503
     */
504 4
    public static function checkForSupport()
505
    {
506 4
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
507
            self::$SUPPORT['already_checked_via_portable_utf8'] = true;
508
509
            // http://php.net/manual/en/book.mbstring.php
510
            self::$SUPPORT['mbstring'] = self::mbstring_loaded();
511
512
            self::$SUPPORT['mbstring_func_overload'] = self::mbstring_overloaded();
513
            if (self::$SUPPORT['mbstring'] === true) {
514
                \mb_internal_encoding('UTF-8');
515
                \mb_regex_encoding('UTF-8');
516
                self::$SUPPORT['mbstring_internal_encoding'] = 'UTF-8';
517
            }
518
519
            // http://php.net/manual/en/book.iconv.php
520
            self::$SUPPORT['iconv'] = self::iconv_loaded();
521
522
            // http://php.net/manual/en/book.intl.php
523
            self::$SUPPORT['intl'] = self::intl_loaded();
524
525
            // http://php.net/manual/en/class.intlchar.php
526
            self::$SUPPORT['intlChar'] = self::intlChar_loaded();
527
528
            // http://php.net/manual/en/book.ctype.php
529
            self::$SUPPORT['ctype'] = self::ctype_loaded();
530
531
            // http://php.net/manual/en/class.finfo.php
532
            self::$SUPPORT['finfo'] = self::finfo_loaded();
533
534
            // http://php.net/manual/en/book.json.php
535
            self::$SUPPORT['json'] = self::json_loaded();
536
537
            // http://php.net/manual/en/book.pcre.php
538
            self::$SUPPORT['pcre_utf8'] = self::pcre_utf8_support();
539
540
            self::$SUPPORT['symfony_polyfill_used'] = self::symfony_polyfill_used();
541
            if (self::$SUPPORT['symfony_polyfill_used'] === true) {
542
                \mb_internal_encoding('UTF-8');
543
                self::$SUPPORT['mbstring_internal_encoding'] = 'UTF-8';
544
            }
545
546
            return true;
547
        }
548
549 4
        return null;
550
    }
551
552
    /**
553
     * Generates a UTF-8 encoded character from the given code point.
554
     *
555
     * INFO: opposite to UTF8::ord()
556
     *
557
     * EXAMPLE: <code>UTF8::chr(0x2603); // '☃'</code>
558
     *
559
     * @param int    $code_point <p>The code point for which to generate a character.</p>
560
     * @param string $encoding   [optional] <p>Default is UTF-8</p>
561
     *
562
     * @psalm-pure
563
     *
564
     * @return string|null
565
     *                     <p>Multi-byte character, returns null on failure or empty input.</p>
566
     */
567 21
    public static function chr($code_point, string $encoding = 'UTF-8')
568
    {
569
        // init
570
        /**
571
         * @psalm-suppress ImpureStaticVariable
572
         *
573
         * @var array<string,string>
574
         */
575 21
        static $CHAR_CACHE = [];
576
577 21
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
578 5
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
579
        }
580
581
        if (
582 21
            $encoding !== 'UTF-8'
583
            &&
584 21
            $encoding !== 'ISO-8859-1'
585
            &&
586 21
            $encoding !== 'WINDOWS-1252'
587
            &&
588 21
            self::$SUPPORT['mbstring'] === false
589
        ) {
590
            /**
591
             * @psalm-suppress ImpureFunctionCall - is is only a warning
592
             */
593
            \trigger_error('UTF8::chr() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
594
        }
595
596 21
        if (!\is_int($code_point) || $code_point <= 0) {
0 ignored issues
show
introduced by
The condition is_int($code_point) is always true.
Loading history...
597 5
            return null;
598
        }
599
600 21
        $cache_key = $code_point . '_' . $encoding;
601 21
        if (isset($CHAR_CACHE[$cache_key])) {
602 19
            return $CHAR_CACHE[$cache_key];
603
        }
604
605 10
        if ($code_point <= 0x80) { // only for "simple"-chars
606
607 9
            if (self::$CHR === null) {
608 1
                self::$CHR = self::getData('chr');
609
            }
610
611
            /**
612
             * @psalm-suppress PossiblyNullArrayAccess
613
             */
614 9
            $chr = self::$CHR[$code_point];
615
616 9
            if ($encoding !== 'UTF-8') {
617 1
                $chr = self::encode($encoding, $chr);
618
            }
619
620 9
            return $CHAR_CACHE[$cache_key] = $chr;
621
        }
622
623
        //
624
        // fallback via "IntlChar"
625
        //
626
627 6
        if (self::$SUPPORT['intlChar'] === true) {
628 6
            $chr = \IntlChar::chr($code_point);
629
630 6
            if ($encoding !== 'UTF-8') {
631
                $chr = self::encode($encoding, $chr);
632
            }
633
634 6
            return $CHAR_CACHE[$cache_key] = $chr;
635
        }
636
637
        //
638
        // fallback via vanilla php
639
        //
640
641
        if (self::$CHR === null) {
642
            self::$CHR = self::getData('chr');
643
        }
644
645
        $code_point = (int) $code_point;
646
        if ($code_point <= 0x7FF) {
647
            /**
648
             * @psalm-suppress PossiblyNullArrayAccess
649
             */
650
            $chr = self::$CHR[($code_point >> 6) + 0xC0] .
651
                   self::$CHR[($code_point & 0x3F) + 0x80];
652
        } elseif ($code_point <= 0xFFFF) {
653
            /**
654
             * @psalm-suppress PossiblyNullArrayAccess
655
             */
656
            $chr = self::$CHR[($code_point >> 12) + 0xE0] .
657
                   self::$CHR[(($code_point >> 6) & 0x3F) + 0x80] .
658
                   self::$CHR[($code_point & 0x3F) + 0x80];
659
        } else {
660
            /**
661
             * @psalm-suppress PossiblyNullArrayAccess
662
             */
663
            $chr = self::$CHR[($code_point >> 18) + 0xF0] .
664
                   self::$CHR[(($code_point >> 12) & 0x3F) + 0x80] .
665
                   self::$CHR[(($code_point >> 6) & 0x3F) + 0x80] .
666
                   self::$CHR[($code_point & 0x3F) + 0x80];
667
        }
668
669
        if ($encoding !== 'UTF-8') {
670
            $chr = self::encode($encoding, $chr);
671
        }
672
673
        return $CHAR_CACHE[$cache_key] = $chr;
674
    }
675
676
    /**
677
     * Applies callback to all characters of a string.
678
     *
679
     * EXAMPLE: <code>UTF8::chr_map([UTF8::class, 'strtolower'], 'Κόσμε'); // ['κ','ό', 'σ', 'μ', 'ε']</code>
680
     *
681
     * @param callable $callback <p>The callback function.</p>
682
     * @param string   $str      <p>UTF-8 string to run callback on.</p>
683
     *
684
     * @psalm-pure
685
     *
686
     * @return string[]
687
     *                  <p>The outcome of the callback, as array.</p>
688
     */
689 2
    public static function chr_map($callback, string $str): array
690
    {
691 2
        return \array_map(
692 2
            $callback,
693 2
            self::str_split($str)
694
        );
695
    }
696
697
    /**
698
     * Generates an array of byte length of each character of a Unicode string.
699
     *
700
     * 1 byte => U+0000  - U+007F
701
     * 2 byte => U+0080  - U+07FF
702
     * 3 byte => U+0800  - U+FFFF
703
     * 4 byte => U+10000 - U+10FFFF
704
     *
705
     * EXAMPLE: <code>UTF8::chr_size_list('中文空白-test'); // [3, 3, 3, 3, 1, 1, 1, 1, 1]</code>
706
     *
707
     * @param string $str <p>The original unicode string.</p>
708
     *
709
     * @psalm-pure
710
     *
711
     * @return int[]
712
     *               <p>An array of byte lengths of each character.</p>
713
     */
714 4
    public static function chr_size_list(string $str): array
715
    {
716 4
        if ($str === '') {
717 4
            return [];
718
        }
719
720 4
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
721
            return \array_map(
722
                static function (string $data): int {
723
                    // "mb_" is available if overload is used, so use it ...
724
                    return \mb_strlen($data, 'CP850'); // 8-BIT
725
                },
726
                self::str_split($str)
727
            );
728
        }
729
730 4
        return \array_map('\strlen', self::str_split($str));
731
    }
732
733
    /**
734
     * Get a decimal code representation of a specific character.
735
     *
736
     * INFO: opposite to UTF8::decimal_to_chr()
737
     *
738
     * EXAMPLE: <code>UTF8::chr_to_decimal('§'); // 0xa7</code>
739
     *
740
     * @param string $char <p>The input character.</p>
741
     *
742
     * @psalm-pure
743
     *
744
     * @return int
745
     */
746 5
    public static function chr_to_decimal(string $char): int
747
    {
748 5
        if (self::$SUPPORT['iconv'] === true) {
749 5
            $chr_tmp = \iconv('UTF-8', 'UCS-4LE', $char);
750 5
            if ($chr_tmp !== false) {
751
                /** @phpstan-ignore-next-line - "unpack": only false if the format string contains errors */
752 5
                return \unpack('V', $chr_tmp)[1];
753
            }
754
        }
755
756
        $code = self::ord($char[0]);
757
        $bytes = 1;
758
759
        if (!($code & 0x80)) {
760
            // 0xxxxxxx
761
            return $code;
762
        }
763
764
        if (($code & 0xe0) === 0xc0) {
765
            // 110xxxxx
766
            $bytes = 2;
767
            $code &= ~0xc0;
768
        } elseif (($code & 0xf0) === 0xe0) {
769
            // 1110xxxx
770
            $bytes = 3;
771
            $code &= ~0xe0;
772
        } elseif (($code & 0xf8) === 0xf0) {
773
            // 11110xxx
774
            $bytes = 4;
775
            $code &= ~0xf0;
776
        }
777
778
        for ($i = 2; $i <= $bytes; ++$i) {
779
            // 10xxxxxx
780
            $code = ($code << 6) + (self::ord($char[$i - 1]) & ~0x80);
781
        }
782
783
        return $code;
784
    }
785
786
    /**
787
     * Get hexadecimal code point (U+xxxx) of a UTF-8 encoded character.
788
     *
789
     * EXAMPLE: <code>UTF8::chr_to_hex('§'); // U+00a7</code>
790
     *
791
     * @param int|string $char   <p>The input character</p>
792
     * @param string     $prefix [optional]
793
     *
794
     * @psalm-pure
795
     *
796
     * @return string
797
     *                <p>The code point encoded as U+xxxx.</p>
798
     */
799 2
    public static function chr_to_hex($char, string $prefix = 'U+'): string
800
    {
801 2
        if ($char === '') {
802 2
            return '';
803
        }
804
805 2
        if ($char === '&#0;') {
806
            $char = '';
807
        }
808
809 2
        return self::int_to_hex(self::ord((string) $char), $prefix);
810
    }
811
812
    /**
813
     * Splits a string into smaller chunks and multiple lines, using the specified line ending character.
814
     *
815
     * EXAMPLE: <code>UTF8::chunk_split('ABC-ÖÄÜ-中文空白-κόσμε', 3); // "ABC\r\n-ÖÄ\r\nÜ-中\r\n文空白\r\n-κό\r\nσμε"</code>
816
     *
817
     * @param string $body         <p>The original string to be split.</p>
818
     * @param int    $chunk_length [optional] <p>The maximum character length of a chunk.</p>
819
     * @param string $end          [optional] <p>The character(s) to be inserted at the end of each chunk.</p>
820
     *
821
     * @psalm-pure
822
     *
823
     * @return string
824
     *                <p>The chunked string.</p>
825
     */
826 4
    public static function chunk_split(string $body, int $chunk_length = 76, string $end = "\r\n"): string
827
    {
828 4
        return \implode($end, self::str_split($body, $chunk_length));
829
    }
830
831
    /**
832
     * Accepts a string and removes all non-UTF-8 characters from it + extras if needed.
833
     *
834
     * EXAMPLE: <code>UTF8::clean("\xEF\xBB\xBF„Abcdef\xc2\xa0\x20…” — 😃 - Düsseldorf", true, true); // '„Abcdef  …” — 😃 - Düsseldorf'</code>
835
     *
836
     * @param string $str                                     <p>The string to be sanitized.</p>
837
     * @param bool   $remove_bom                              [optional] <p>Set to true, if you need to remove
838
     *                                                        UTF-BOM.</p>
839
     * @param bool   $normalize_whitespace                    [optional] <p>Set to true, if you need to normalize the
840
     *                                                        whitespace.</p>
841
     * @param bool   $normalize_msword                        [optional] <p>Set to true, if you need to normalize MS
842
     *                                                        Word chars e.g.: "…"
843
     *                                                        => "..."</p>
844
     * @param bool   $keep_non_breaking_space                 [optional] <p>Set to true, to keep non-breaking-spaces,
845
     *                                                        in
846
     *                                                        combination with
847
     *                                                        $normalize_whitespace</p>
848
     * @param bool   $replace_diamond_question_mark           [optional] <p>Set to true, if you need to remove diamond
849
     *                                                        question mark e.g.: "�"</p>
850
     * @param bool   $remove_invisible_characters             [optional] <p>Set to false, if you not want to remove
851
     *                                                        invisible characters e.g.: "\0"</p>
852
     * @param bool   $remove_invisible_characters_url_encoded [optional] <p>Set to true, if you not want to remove
853
     *                                                        invisible url encoded characters e.g.: "%0B"<br> WARNING:
854
     *                                                        maybe contains false-positives e.g. aa%0Baa -> aaaa.
855
     *                                                        </p>
856
     *
857
     * @psalm-pure
858
     *
859
     * @return string
860
     *                <p>An clean UTF-8 encoded string.</p>
861
     */
862 90
    public static function clean(
863
        string $str,
864
        bool $remove_bom = false,
865
        bool $normalize_whitespace = false,
866
        bool $normalize_msword = false,
867
        bool $keep_non_breaking_space = false,
868
        bool $replace_diamond_question_mark = false,
869
        bool $remove_invisible_characters = true,
870
        bool $remove_invisible_characters_url_encoded = false
871
    ): string {
872
        // http://stackoverflow.com/questions/1401317/remove-non-utf8-characters-from-string
873
        // caused connection reset problem on larger strings
874
875 90
        $regex = '/
876
          (
877
            (?: [\x00-\x7F]               # single-byte sequences   0xxxxxxx
878
            |   [\xC0-\xDF][\x80-\xBF]    # double-byte sequences   110xxxxx 10xxxxxx
879
            |   [\xE0-\xEF][\x80-\xBF]{2} # triple-byte sequences   1110xxxx 10xxxxxx * 2
880
            |   [\xF0-\xF7][\x80-\xBF]{3} # quadruple-byte sequence 11110xxx 10xxxxxx * 3
881
            ){1,100}                      # ...one or more times
882
          )
883
        | ( [\x80-\xBF] )                 # invalid byte in range 10000000 - 10111111
884
        | ( [\xC0-\xFF] )                 # invalid byte in range 11000000 - 11111111
885
        /x';
886 90
        $str = (string) \preg_replace($regex, '$1', $str);
887
888 90
        if ($replace_diamond_question_mark) {
889 33
            $str = self::replace_diamond_question_mark($str);
890
        }
891
892 90
        if ($remove_invisible_characters) {
893 90
            $str = self::remove_invisible_characters($str, $remove_invisible_characters_url_encoded);
894
        }
895
896 90
        if ($normalize_whitespace) {
897 37
            $str = self::normalize_whitespace($str, $keep_non_breaking_space);
898
        }
899
900 90
        if ($normalize_msword) {
901 4
            $str = self::normalize_msword($str);
902
        }
903
904 90
        if ($remove_bom) {
905 37
            $str = self::remove_bom($str);
906
        }
907
908 90
        return $str;
909
    }
910
911
    /**
912
     * Clean-up a string and show only printable UTF-8 chars at the end  + fix UTF-8 encoding.
913
     *
914
     * EXAMPLE: <code>UTF8::cleanup("\xEF\xBB\xBF„Abcdef\xc2\xa0\x20…” — 😃 - Düsseldorf", true, true); // '„Abcdef  …” — 😃 - Düsseldorf'</code>
915
     *
916
     * @param string $str <p>The input string.</p>
917
     *
918
     * @psalm-pure
919
     *
920
     * @return string
921
     */
922 33
    public static function cleanup($str): string
923
    {
924
        // init
925 33
        $str = (string) $str;
926
927 33
        if ($str === '') {
928 5
            return '';
929
        }
930
931
        // fixed ISO <-> UTF-8 Errors
932 33
        $str = self::fix_simple_utf8($str);
933
934
        // remove all none UTF-8 symbols
935
        // && remove diamond question mark (�)
936
        // && remove remove invisible characters (e.g. "\0")
937
        // && remove BOM
938
        // && normalize whitespace chars (but keep non-breaking-spaces)
939 33
        return self::clean(
940 33
            $str,
941 33
            true,
942 33
            true,
943 33
            false,
944 33
            true,
945 33
            true
946
        );
947
    }
948
949
    /**
950
     * Accepts a string or a array of strings and returns an array of Unicode code points.
951
     *
952
     * INFO: opposite to UTF8::string()
953
     *
954
     * EXAMPLE: <code>
955
     * UTF8::codepoints('κöñ'); // array(954, 246, 241)
956
     * // ... OR ...
957
     * UTF8::codepoints('κöñ', true); // array('U+03ba', 'U+00f6', 'U+00f1')
958
     * </code>
959
     *
960
     * @param string|string[] $arg         <p>A UTF-8 encoded string or an array of such strings.</p>
961
     * @param bool            $use_u_style <p>If True, will return code points in U+xxxx format,
962
     *                                     default, code points will be returned as integers.</p>
963
     *
964
     * @psalm-pure
965
     *
966
     * @return int[]|string[]
967
     *                        <p>
968
     *                        The array of code points:<br>
969
     *                        int[] for $u_style === false<br>
970
     *                        string[] for $u_style === true<br>
971
     *                        </p>
972
     */
973 12
    public static function codepoints($arg, bool $use_u_style = false): array
974
    {
975 12
        if (\is_string($arg)) {
976 12
            $arg = self::str_split($arg);
977
        }
978
979
        /**
980
         * @psalm-suppress DocblockTypeContradiction
981
         */
982 12
        if (!\is_array($arg)) {
0 ignored issues
show
introduced by
The condition is_array($arg) is always true.
Loading history...
983 4
            return [];
984
        }
985
986 12
        if ($arg === []) {
987 7
            return [];
988
        }
989
990 11
        $arg = \array_map(
991
            [
992 11
                self::class,
993
                'ord',
994
            ],
995 11
            $arg
996
        );
997
998 11
        if ($use_u_style) {
999 2
            $arg = \array_map(
1000
                [
1001 2
                    self::class,
1002
                    'int_to_hex',
1003
                ],
1004 2
                $arg
1005
            );
1006
        }
1007
1008 11
        return $arg;
1009
    }
1010
1011
    /**
1012
     * Trims the string and replaces consecutive whitespace characters with a
1013
     * single space. This includes tabs and newline characters, as well as
1014
     * multibyte whitespace such as the thin space and ideographic space.
1015
     *
1016
     * @param string $str <p>The input string.</p>
1017
     *
1018
     * @psalm-pure
1019
     *
1020
     * @return string
1021
     *                <p>A string with trimmed $str and condensed whitespace.</p>
1022
     */
1023 13
    public static function collapse_whitespace(string $str): string
1024
    {
1025 13
        if (self::$SUPPORT['mbstring'] === true) {
1026 13
            return \trim((string) \mb_ereg_replace('[[:space:]]+', ' ', $str));
1027
        }
1028
1029
        return \trim(self::regex_replace($str, '[[:space:]]+', ' '));
1030
    }
1031
1032
    /**
1033
     * Returns count of characters used in a string.
1034
     *
1035
     * EXAMPLE: <code>UTF8::count_chars('κaκbκc'); // array('κ' => 3, 'a' => 1, 'b' => 1, 'c' => 1)</code>
1036
     *
1037
     * @param string $str                     <p>The input string.</p>
1038
     * @param bool   $clean_utf8              [optional] <p>Remove non UTF-8 chars from the string.</p>
1039
     * @param bool   $try_to_use_mb_functions [optional] <p>Set to false, if you don't want to use
1040
     *
1041
     * @psalm-pure
1042
     *
1043
     * @return int[]
1044
     *               <p>An associative array of Character as keys and
1045
     *               their count as values.</p>
1046
     */
1047 19
    public static function count_chars(
1048
        string $str,
1049
        bool $clean_utf8 = false,
1050
        bool $try_to_use_mb_functions = true
1051
    ): array {
1052 19
        return \array_count_values(
1053 19
            self::str_split(
1054 19
                $str,
1055 19
                1,
1056 19
                $clean_utf8,
1057 19
                $try_to_use_mb_functions
1058
            )
1059
        );
1060
    }
1061
1062
    /**
1063
     * Create a valid CSS identifier for e.g. "class"- or "id"-attributes.
1064
     *
1065
     * EXAMPLE: <code>UTF8::css_identifier('123foo/bar!!!'); // _23foo-bar</code>
1066
     *
1067
     * copy&past from https://github.com/drupal/core/blob/8.8.x/lib/Drupal/Component/Utility/Html.php#L95
1068
     *
1069
     * @param string   $str        <p>INFO: if no identifier is given e.g. " " or "", we will create a unique string automatically</p>
1070
     * @param string[] $filter
1071
     * @param bool     $strip_tags
1072
     * @param bool     $strtolower
1073
     *
1074
     * @psalm-pure
1075
     *
1076
     * @return string
1077
     *
1078
     * @phpstan-param array<string,string> $filter
1079
     */
1080 1
    public static function css_identifier(
1081
        string $str = '',
1082
        array $filter = [
1083
            ' ' => '-',
1084
            '/' => '-',
1085
            '[' => '',
1086
            ']' => '',
1087
        ],
1088
        bool $strip_tags = false,
1089
        bool $strtolower = true
1090
    ): string {
1091
        // We could also use strtr() here but its much slower than str_replace(). In
1092
        // order to keep '__' to stay '__' we first replace it with a different
1093
        // placeholder after checking that it is not defined as a filter.
1094 1
        $double_underscore_replacements = 0;
1095
1096
        // Fallback ...
1097 1
        if (\trim($str) === '') {
1098 1
            $str = \uniqid('auto-generated-css-class', true);
1099
        } else {
1100 1
            $str = self::clean($str);
1101
        }
1102
1103 1
        if ($strip_tags) {
1104
            $str = \strip_tags($str);
1105
        }
1106
1107 1
        if ($strtolower) {
1108 1
            $str = \strtolower($str);
1109
        }
1110
1111 1
        if (!isset($filter['__'])) {
1112 1
            $str = \str_replace('__', '##', $str, $double_underscore_replacements);
1113
        }
1114
1115 1
        $str = \str_replace(\array_keys($filter), \array_values($filter), $str);
1116
        // Replace temporary placeholder '##' with '__' only if the original
1117
        // $identifier contained '__'.
1118 1
        if ($double_underscore_replacements > 0) {
1119
            $str = \str_replace('##', '__', $str);
1120
        }
1121
1122
        // Valid characters in a CSS identifier are:
1123
        // - the hyphen (U+002D)
1124
        // - a-z (U+0030 - U+0039)
1125
        // - A-Z (U+0041 - U+005A)
1126
        // - the underscore (U+005F)
1127
        // - 0-9 (U+0061 - U+007A)
1128
        // - ISO 10646 characters U+00A1 and higher
1129
        // We strip out any character not in the above list.
1130 1
        $str = (string) \preg_replace('/[^\x{002D}\x{0030}-\x{0039}\x{0041}-\x{005A}\x{005F}\x{0061}-\x{007A}\x{00A1}-\x{FFFF}]/u', '', $str);
1131
        // Identifiers cannot start with a digit, two hyphens, or a hyphen followed by a digit.
1132 1
        $str = (string) \preg_replace(['/^[0-9]/', '/^(-[0-9])|^(--)/'], ['_', '__'], $str);
1133
1134 1
        return \trim($str, '-');
1135
    }
1136
1137
    /**
1138
     * Remove css media-queries.
1139
     *
1140
     * @param string $str
1141
     *
1142
     * @psalm-pure
1143
     *
1144
     * @return string
1145
     */
1146 1
    public static function css_stripe_media_queries(string $str): string
1147
    {
1148 1
        return (string) \preg_replace(
1149 1
            '#@media\\s+(?:only\\s)?(?:[\\s{(]|screen|all)\\s?[^{]+{.*}\\s*}\\s*#isumU',
1150 1
            '',
1151 1
            $str
1152
        );
1153
    }
1154
1155
    /**
1156
     * Checks whether ctype is available on the server.
1157
     *
1158
     * @psalm-pure
1159
     *
1160
     * @return bool
1161
     *              <p><strong>true</strong> if available, <strong>false</strong> otherwise</p>
1162
     *
1163
     * @internal <p>Please do not use it anymore, we will make is private in next major version.</p>
1164
     */
1165
    public static function ctype_loaded(): bool
1166
    {
1167
        return \extension_loaded('ctype');
1168
    }
1169
1170
    /**
1171
     * Converts an int value into a UTF-8 character.
1172
     *
1173
     * INFO: opposite to UTF8::string()
1174
     *
1175
     * EXAMPLE: <code>UTF8::decimal_to_chr(931); // 'Σ'</code>
1176
     *
1177
     * @param int|string $int
1178
     *
1179
     * @phpstan-param int|numeric-string $int
1180
     *
1181
     * @psalm-pure
1182
     *
1183
     * @return string
1184
     */
1185 20
    public static function decimal_to_chr($int): string
1186
    {
1187
        // We cannot use html_entity_decode() here, as it will not return
1188
        // characters for many values < 160.
1189 20
        return mb_convert_encoding('&#' . $int . ';', 'UTF-8', 'HTML-ENTITIES');
0 ignored issues
show
Bug Best Practice introduced by
The expression return mb_convert_encodi...TF-8', 'HTML-ENTITIES') could return the type array which is incompatible with the type-hinted return string. Consider adding an additional type-check to rule them out.
Loading history...
1190
    }
1191
1192
    /**
1193
     * Decodes a MIME header field
1194
     *
1195
     * @param string $str
1196
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
1197
     *
1198
     * @psalm-pure
1199
     *
1200
     * @return false|string
1201
     *                      <p>A decoded MIME field on success,
1202
     *                      or false if an error occurs during the decoding.</p>
1203
     */
1204 2
    public static function decode_mimeheader($str, string $encoding = 'UTF-8')
1205
    {
1206 2
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
1207 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
1208
        }
1209
1210
        // always fallback via symfony polyfill
1211 2
        return \iconv_mime_decode($str, \ICONV_MIME_DECODE_CONTINUE_ON_ERROR, $encoding);
1212
    }
1213
1214
    /**
1215
     * Convert any two-letter country code (ISO 3166-1) to the corresponding Emoji.
1216
     *
1217
     * @see https://en.wikipedia.org/wiki/ISO_3166-1
1218
     *
1219
     * @param string $country_code_iso_3166_1 <p>e.g. DE</p>
1220
     *
1221
     * @return string
1222
     *                <p>Emoji or empty string on error.</p>
1223
     */
1224 1
    public static function emoji_from_country_code(string $country_code_iso_3166_1): string
1225
    {
1226 1
        if ($country_code_iso_3166_1 === '') {
1227 1
            return '';
1228
        }
1229
1230 1
        if (self::strlen($country_code_iso_3166_1) !== 2) {
1231 1
            return '';
1232
        }
1233
1234 1
        $country_code_iso_3166_1 = \strtoupper($country_code_iso_3166_1);
1235
1236 1
        $flagOffset = 0x1F1E6;
1237 1
        $asciiOffset = 0x41;
1238
1239 1
        return (self::chr((self::ord($country_code_iso_3166_1[0]) - $asciiOffset + $flagOffset)) ?? '') .
1240 1
               (self::chr((self::ord($country_code_iso_3166_1[1]) - $asciiOffset + $flagOffset)) ?? '');
1241
    }
1242
1243
    /**
1244
     * Decodes a string which was encoded by "UTF8::emoji_encode()".
1245
     *
1246
     * INFO: opposite to UTF8::emoji_encode()
1247
     *
1248
     * EXAMPLE: <code>
1249
     * UTF8::emoji_decode('foo CHARACTER_OGRE', false); // 'foo 👹'
1250
     * //
1251
     * UTF8::emoji_decode('foo _-_PORTABLE_UTF8_-_308095726_-_627590803_-_8FTU_ELBATROP_-_', true); // 'foo 👹'
1252
     * </code>
1253
     *
1254
     * @param string $str                            <p>The input string.</p>
1255
     * @param bool   $use_reversible_string_mappings [optional] <p>
1256
     *                                               When <b>TRUE</b>, we se a reversible string mapping
1257
     *                                               between "emoji_encode" and "emoji_decode".</p>
1258
     *
1259
     * @psalm-pure
1260
     *
1261
     * @return string
1262
     */
1263 9
    public static function emoji_decode(
1264
        string $str,
1265
        bool $use_reversible_string_mappings = false
1266
    ): string {
1267 9
        if (self::$EMOJI_KEYS_CACHE === null) {
1268
            /** @phpstan-ignore-next-line - we need to load the data first */
1269
            self::initEmojiData();
1270
        }
1271
1272 9
        if ($use_reversible_string_mappings) {
1273 9
            return (string) \str_replace(
1274 9
                (array) self::$EMOJI_KEYS_REVERSIBLE_CACHE,
1275 9
                (array) self::$EMOJI_VALUES_CACHE,
1276 9
                $str
1277
            );
1278
        }
1279
1280 1
        return (string) \str_replace(
1281 1
            (array) self::$EMOJI_KEYS_CACHE,
1282 1
            (array) self::$EMOJI_VALUES_CACHE,
1283 1
            $str
1284
        );
1285
    }
1286
1287
    /**
1288
     * Encode a string with emoji chars into a non-emoji string.
1289
     *
1290
     * INFO: opposite to UTF8::emoji_decode()
1291
     *
1292
     * EXAMPLE: <code>
1293
     * UTF8::emoji_encode('foo 👹', false)); // 'foo CHARACTER_OGRE'
1294
     * //
1295
     * UTF8::emoji_encode('foo 👹', true)); // 'foo _-_PORTABLE_UTF8_-_308095726_-_627590803_-_8FTU_ELBATROP_-_'
1296
     * </code>
1297
     *
1298
     * @param string $str                            <p>The input string</p>
1299
     * @param bool   $use_reversible_string_mappings [optional] <p>
1300
     *                                               when <b>TRUE</b>, we use a reversible string mapping
1301
     *                                               between "emoji_encode" and "emoji_decode"</p>
1302
     *
1303
     * @psalm-pure
1304
     *
1305
     * @return string
1306
     */
1307 12
    public static function emoji_encode(
1308
        string $str,
1309
        bool $use_reversible_string_mappings = false
1310
    ): string {
1311 12
        if (self::$EMOJI_KEYS_CACHE === null) {
1312
            /** @phpstan-ignore-next-line - we need to load the data first */
1313 1
            self::initEmojiData();
1314
        }
1315
1316 12
        if ($use_reversible_string_mappings) {
1317 9
            return (string) \str_replace(
1318 9
                (array) self::$EMOJI_VALUES_CACHE,
1319 9
                (array) self::$EMOJI_KEYS_REVERSIBLE_CACHE,
1320 9
                $str
1321
            );
1322
        }
1323
1324 4
        return (string) \str_replace(
1325 4
            (array) self::$EMOJI_VALUES_CACHE,
1326 4
            (array) self::$EMOJI_KEYS_CACHE,
1327 4
            $str
1328
        );
1329
    }
1330
1331
    /**
1332
     * Encode a string with a new charset-encoding.
1333
     *
1334
     * INFO:  This function will also try to fix broken / double encoding,
1335
     *        so you can call this function also on a UTF-8 string and you don't mess up the string.
1336
     *
1337
     * EXAMPLE: <code>
1338
     * UTF8::encode('ISO-8859-1', '-ABC-中文空白-'); // '-ABC-????-'
1339
     * //
1340
     * UTF8::encode('UTF-8', '-ABC-中文空白-'); // '-ABC-中文空白-'
1341
     * //
1342
     * UTF8::encode('HTML', '-ABC-中文空白-'); // '-ABC-&#20013;&#25991;&#31354;&#30333;-'
1343
     * //
1344
     * UTF8::encode('BASE64', '-ABC-中文空白-'); // 'LUFCQy3kuK3mlofnqbrnmb0t'
1345
     * </code>
1346
     *
1347
     * @param string $to_encoding                   <p>e.g. 'UTF-16', 'UTF-8', 'ISO-8859-1', etc.</p>
1348
     * @param string $str                           <p>The input string</p>
1349
     * @param bool   $auto_detect_the_from_encoding [optional] <p>Force the new encoding (we try to fix broken / double
1350
     *                                              encoding for UTF-8)<br> otherwise we auto-detect the current
1351
     *                                              string-encoding</p>
1352
     * @param string $from_encoding                 [optional] <p>e.g. 'UTF-16', 'UTF-8', 'ISO-8859-1', etc.<br>
1353
     *                                              A empty string will trigger the autodetect anyway.</p>
1354
     *
1355
     * @psalm-pure
1356
     *
1357
     * @return string
1358
     *
1359
     * @psalm-suppress InvalidReturnStatement
1360
     */
1361 28
    public static function encode(
1362
        string $to_encoding,
1363
        string $str,
1364
        bool $auto_detect_the_from_encoding = true,
1365
        string $from_encoding = ''
1366
    ): string {
1367 28
        if ($str === '' || $to_encoding === '') {
1368 13
            return $str;
1369
        }
1370
1371 28
        if ($to_encoding !== 'UTF-8' && $to_encoding !== 'CP850') {
1372 7
            $to_encoding = self::normalize_encoding($to_encoding, 'UTF-8');
1373
        }
1374
1375 28
        if ($from_encoding && $from_encoding !== 'UTF-8' && $from_encoding !== 'CP850') {
1376 2
            $from_encoding = self::normalize_encoding($from_encoding);
1377
        }
1378
1379
        if (
1380 28
            $to_encoding
1381
            &&
1382 28
            $from_encoding
1383
            &&
1384 28
            $from_encoding === $to_encoding
1385
        ) {
1386
            return $str;
1387
        }
1388
1389 28
        if ($to_encoding === 'JSON') {
1390 1
            $return = self::json_encode($str);
1391 1
            if ($return === false) {
1392
                throw new \InvalidArgumentException('The input string [' . $str . '] can not be used for json_encode().');
1393
            }
1394
1395 1
            return $return;
1396
        }
1397 28
        if ($from_encoding === 'JSON') {
1398 1
            $str = self::json_decode($str);
1399 1
            $from_encoding = '';
1400
        }
1401
1402 28
        if ($to_encoding === 'BASE64') {
1403 2
            return \base64_encode($str);
1404
        }
1405 28
        if ($from_encoding === 'BASE64') {
1406 2
            $str = \base64_decode($str, true);
1407 2
            $from_encoding = '';
1408
        }
1409
1410 28
        if ($to_encoding === 'HTML-ENTITIES') {
1411 2
            return self::html_encode($str, true);
1412
        }
1413 28
        if ($from_encoding === 'HTML-ENTITIES') {
1414 2
            $str = self::html_entity_decode($str, \ENT_COMPAT);
1415 2
            $from_encoding = '';
1416
        }
1417
1418 28
        $from_encoding_auto_detected = false;
1419
        if (
1420 28
            $auto_detect_the_from_encoding
1421
            ||
1422 28
            !$from_encoding
1423
        ) {
1424 28
            $from_encoding_auto_detected = self::str_detect_encoding($str);
1425
        }
1426
1427
        // DEBUG
1428
        //var_dump($to_encoding, $from_encoding, $from_encoding_auto_detected, $str, "\n\n");
1429
1430 28
        if ($from_encoding_auto_detected !== false) {
1431 25
            $from_encoding = $from_encoding_auto_detected;
1432 6
        } elseif ($auto_detect_the_from_encoding) {
1433
            // fallback for the "autodetect"-mode
1434 6
            return self::to_utf8($str);
1435
        }
1436
1437
        if (
1438 25
            !$from_encoding
1439
            ||
1440 25
            $from_encoding === $to_encoding
1441
        ) {
1442 15
            return $str;
1443
        }
1444
1445
        if (
1446 20
            $to_encoding === 'UTF-8'
1447
            &&
1448
            (
1449 18
                $from_encoding === 'WINDOWS-1252'
1450
                ||
1451 20
                $from_encoding === 'ISO-8859-1'
1452
            )
1453
        ) {
1454 14
            return self::to_utf8($str);
1455
        }
1456
1457
        if (
1458 12
            $to_encoding === 'ISO-8859-1'
1459
            &&
1460
            (
1461 6
                $from_encoding === 'WINDOWS-1252'
1462
                ||
1463 12
                $from_encoding === 'UTF-8'
1464
            )
1465
        ) {
1466 6
            return self::to_iso8859($str);
1467
        }
1468
1469
        if (
1470 10
            $to_encoding !== 'UTF-8'
1471
            &&
1472 10
            $to_encoding !== 'ISO-8859-1'
1473
            &&
1474 10
            $to_encoding !== 'WINDOWS-1252'
1475
            &&
1476 10
            self::$SUPPORT['mbstring'] === false
1477
        ) {
1478
            /**
1479
             * @psalm-suppress ImpureFunctionCall - is is only a warning
1480
             */
1481
            \trigger_error('UTF8::encode() without mbstring cannot handle "' . $to_encoding . '" encoding', \E_USER_WARNING);
1482
        }
1483
1484 10
        if (self::$SUPPORT['mbstring'] === true) {
1485 10
            $str_encoded = \mb_convert_encoding(
1486 10
                $str,
1487 10
                $to_encoding,
1488 10
                $from_encoding
1489
            );
1490
1491 10
            if ($str_encoded) {
1492
                \assert(\is_string($str_encoded));
1493
1494 10
                return $str_encoded;
1495
            }
1496
        }
1497
1498
        /** @noinspection PhpUsageOfSilenceOperatorInspection - Detected an incomplete multibyte character in input string */
1499
        $return = @\iconv($from_encoding, $to_encoding, $str);
1500
        if ($return !== false) {
1501
            return $return;
1502
        }
1503
1504
        return $str;
1505
    }
1506
1507
    /**
1508
     * @param string $str
1509
     * @param string $from_charset      [optional] <p>Set the input charset.</p>
1510
     * @param string $to_charset        [optional] <p>Set the output charset.</p>
1511
     * @param string $transfer_encoding [optional] <p>Set the transfer encoding.</p>
1512
     * @param string $linefeed          [optional] <p>Set the used linefeed.</p>
1513
     * @param int    $indent            [optional] <p>Set the max length indent.</p>
1514
     *
1515
     * @psalm-pure
1516
     *
1517
     * @return false|string
1518
     *                      <p>An encoded MIME field on success,
1519
     *                      or false if an error occurs during the encoding.</p>
1520
     */
1521 1
    public static function encode_mimeheader(
1522
        string $str,
1523
        string $from_charset = 'UTF-8',
1524
        string $to_charset = 'UTF-8',
1525
        string $transfer_encoding = 'Q',
1526
        string $linefeed = "\r\n",
1527
        int $indent = 76
1528
    ) {
1529 1
        if ($from_charset !== 'UTF-8' && $from_charset !== 'CP850') {
1530
            $from_charset = self::normalize_encoding($from_charset, 'UTF-8');
1531
        }
1532
1533 1
        if ($to_charset !== 'UTF-8' && $to_charset !== 'CP850') {
1534 1
            $to_charset = self::normalize_encoding($to_charset, 'UTF-8');
1535
        }
1536
1537
        // always fallback via symfony polyfill
1538 1
        return \iconv_mime_encode(
1539 1
            '',
1540 1
            $str,
1541
            [
1542 1
                'scheme'           => $transfer_encoding,
1543 1
                'line-length'      => $indent,
1544 1
                'input-charset'    => $from_charset,
1545 1
                'output-charset'   => $to_charset,
1546 1
                'line-break-chars' => $linefeed,
1547
            ]
1548
        );
1549
    }
1550
1551
    /**
1552
     * Create an extract from a sentence, so if the search-string was found, it try to centered in the output.
1553
     *
1554
     * @param string   $str                       <p>The input string.</p>
1555
     * @param string   $search                    <p>The searched string.</p>
1556
     * @param int|null $length                    [optional] <p>Default: null === text->length / 2</p>
1557
     * @param string   $replacer_for_skipped_text [optional] <p>Default: …</p>
1558
     * @param string   $encoding                  [optional] <p>Set the charset for e.g. "mb_" function</p>
1559
     *
1560
     * @psalm-pure
1561
     *
1562
     * @return string
1563
     */
1564 1
    public static function extract_text(
1565
        string $str,
1566
        string $search = '',
1567
        int $length = null,
1568
        string $replacer_for_skipped_text = '…',
1569
        string $encoding = 'UTF-8'
1570
    ): string {
1571 1
        if ($str === '') {
1572 1
            return '';
1573
        }
1574
1575 1
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
1576
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
1577
        }
1578
1579 1
        $trim_chars = "\t\r\n -_()!~?=+/*\\,.:;\"'[]{}`&";
1580
1581 1
        if ($length === null) {
1582 1
            $length = (int) \round((int) self::strlen($str, $encoding) / 2);
1583
        }
1584
1585 1
        if ($search === '') {
1586 1
            if ($encoding === 'UTF-8') {
1587 1
                if ($length > 0) {
1588 1
                    $string_length = (int) \mb_strlen($str);
1589 1
                    $end = ($length - 1) > $string_length ? $string_length : ($length - 1);
1590
                } else {
1591 1
                    $end = 0;
1592
                }
1593
1594 1
                $pos = (int) \min(
1595 1
                    \mb_strpos($str, ' ', $end),
1596 1
                    \mb_strpos($str, '.', $end)
1597
                );
1598
            } else {
1599
                if ($length > 0) {
1600
                    $string_length = (int) self::strlen($str, $encoding);
1601
                    $end = ($length - 1) > $string_length ? $string_length : ($length - 1);
1602
                } else {
1603
                    $end = 0;
1604
                }
1605
1606
                $pos = (int) \min(
1607
                    self::strpos($str, ' ', $end, $encoding),
1608
                    self::strpos($str, '.', $end, $encoding)
1609
                );
1610
            }
1611
1612 1
            if ($pos) {
1613 1
                if ($encoding === 'UTF-8') {
1614 1
                    $str_sub = \mb_substr($str, 0, $pos);
1615
                } else {
1616
                    $str_sub = self::substr($str, 0, $pos, $encoding);
1617
                }
1618
1619 1
                if ($str_sub === false) {
1620
                    return '';
1621
                }
1622
1623 1
                return \rtrim($str_sub, $trim_chars) . $replacer_for_skipped_text;
1624
            }
1625
1626
            return $str;
1627
        }
1628
1629 1
        if ($encoding === 'UTF-8') {
1630 1
            $word_position = (int) \mb_stripos($str, $search);
1631 1
            $half_side = (int) ($word_position - $length / 2 + (int) \mb_strlen($search) / 2);
1632
        } else {
1633
            $word_position = (int) self::stripos($str, $search, 0, $encoding);
1634
            $half_side = (int) ($word_position - $length / 2 + (int) self::strlen($search, $encoding) / 2);
1635
        }
1636
1637 1
        $pos_start = 0;
1638 1
        if ($half_side > 0) {
1639 1
            if ($encoding === 'UTF-8') {
1640 1
                $half_text = \mb_substr($str, 0, $half_side);
1641
            } else {
1642
                $half_text = self::substr($str, 0, $half_side, $encoding);
1643
            }
1644 1
            if ($half_text !== false) {
1645 1
                if ($encoding === 'UTF-8') {
1646 1
                    $pos_start = (int) \max(
1647 1
                        \mb_strrpos($half_text, ' '),
1648 1
                        \mb_strrpos($half_text, '.')
1649
                    );
1650
                } else {
1651
                    $pos_start = (int) \max(
1652
                        self::strrpos($half_text, ' ', 0, $encoding),
1653
                        self::strrpos($half_text, '.', 0, $encoding)
1654
                    );
1655
                }
1656
            }
1657
        }
1658
1659 1
        if ($word_position && $half_side > 0) {
1660 1
            $offset = $pos_start + $length - 1;
1661 1
            $real_length = (int) self::strlen($str, $encoding);
1662
1663 1
            if ($offset > $real_length) {
1664
                $offset = $real_length;
1665
            }
1666
1667 1
            if ($encoding === 'UTF-8') {
1668 1
                $pos_end = (int) \min(
1669 1
                    \mb_strpos($str, ' ', $offset),
1670 1
                    \mb_strpos($str, '.', $offset)
1671 1
                ) - $pos_start;
1672
            } else {
1673
                $pos_end = (int) \min(
1674
                    self::strpos($str, ' ', $offset, $encoding),
1675
                    self::strpos($str, '.', $offset, $encoding)
1676
                ) - $pos_start;
1677
            }
1678
1679 1
            if (!$pos_end || $pos_end <= 0) {
1680 1
                if ($encoding === 'UTF-8') {
1681 1
                    $str_sub = \mb_substr($str, $pos_start, (int) \mb_strlen($str));
1682
                } else {
1683
                    $str_sub = self::substr($str, $pos_start, (int) self::strlen($str, $encoding), $encoding);
1684
                }
1685 1
                if ($str_sub !== false) {
1686 1
                    $extract = $replacer_for_skipped_text . \ltrim($str_sub, $trim_chars);
1687
                } else {
1688 1
                    $extract = '';
1689
                }
1690
            } else {
1691 1
                if ($encoding === 'UTF-8') {
1692 1
                    $str_sub = \mb_substr($str, $pos_start, $pos_end);
1693
                } else {
1694
                    $str_sub = self::substr($str, $pos_start, $pos_end, $encoding);
1695
                }
1696 1
                if ($str_sub !== false) {
1697 1
                    $extract = $replacer_for_skipped_text . \trim($str_sub, $trim_chars) . $replacer_for_skipped_text;
1698
                } else {
1699 1
                    $extract = '';
1700
                }
1701
            }
1702
        } else {
1703 1
            $offset = $length - 1;
1704 1
            $true_length = (int) self::strlen($str, $encoding);
1705
1706 1
            if ($offset > $true_length) {
1707
                $offset = $true_length;
1708
            }
1709
1710 1
            if ($encoding === 'UTF-8') {
1711 1
                $pos_end = (int) \min(
1712 1
                    \mb_strpos($str, ' ', $offset),
1713 1
                    \mb_strpos($str, '.', $offset)
1714
                );
1715
            } else {
1716
                $pos_end = (int) \min(
1717
                    self::strpos($str, ' ', $offset, $encoding),
1718
                    self::strpos($str, '.', $offset, $encoding)
1719
                );
1720
            }
1721
1722 1
            if ($pos_end) {
1723 1
                if ($encoding === 'UTF-8') {
1724 1
                    $str_sub = \mb_substr($str, 0, $pos_end);
1725
                } else {
1726
                    $str_sub = self::substr($str, 0, $pos_end, $encoding);
1727
                }
1728 1
                if ($str_sub !== false) {
1729 1
                    $extract = \rtrim($str_sub, $trim_chars) . $replacer_for_skipped_text;
1730
                } else {
1731 1
                    $extract = '';
1732
                }
1733
            } else {
1734 1
                $extract = $str;
1735
            }
1736
        }
1737
1738 1
        return $extract;
1739
    }
1740
1741
    /**
1742
     * Reads entire file into a string.
1743
     *
1744
     * EXAMPLE: <code>UTF8::file_get_contents('utf16le.txt'); // ...</code>
1745
     *
1746
     * WARNING: Do not use UTF-8 Option ($convert_to_utf8) for binary files (e.g.: images) !!!
1747
     *
1748
     * @see http://php.net/manual/en/function.file-get-contents.php
1749
     *
1750
     * @param string        $filename         <p>
1751
     *                                        Name of the file to read.
1752
     *                                        </p>
1753
     * @param bool          $use_include_path [optional] <p>
1754
     *                                        Prior to PHP 5, this parameter is called
1755
     *                                        use_include_path and is a bool.
1756
     *                                        As of PHP 5 the FILE_USE_INCLUDE_PATH can be used
1757
     *                                        to trigger include path
1758
     *                                        search.
1759
     *                                        </p>
1760
     * @param resource|null $context          [optional] <p>
1761
     *                                        A valid context resource created with
1762
     *                                        stream_context_create. If you don't need to use a
1763
     *                                        custom context, you can skip this parameter by &null;.
1764
     *                                        </p>
1765
     * @param int|null      $offset           [optional] <p>
1766
     *                                        The offset where the reading starts.
1767
     *                                        </p>
1768
     * @param int|null      $max_length       [optional] <p>
1769
     *                                        Maximum length of data read. The default is to read until end
1770
     *                                        of file is reached.
1771
     *                                        </p>
1772
     * @param int           $timeout          <p>The time in seconds for the timeout.</p>
1773
     * @param bool          $convert_to_utf8  <strong>WARNING!!!</strong> <p>Maybe you can't use this option for
1774
     *                                        some files, because they used non default utf-8 chars. Binary files
1775
     *                                        like images or pdf will not be converted.</p>
1776
     * @param string        $from_encoding    [optional] <p>e.g. 'UTF-16', 'UTF-8', 'ISO-8859-1', etc.<br>
1777
     *                                        A empty string will trigger the autodetect anyway.</p>
1778
     *
1779
     * @psalm-pure
1780
     *
1781
     * @return false|string
1782
     *                      <p>The function returns the read data as string or <b>false</b> on failure.</p>
1783
     */
1784 12
    public static function file_get_contents(
1785
        string $filename,
1786
        bool $use_include_path = false,
1787
        $context = null,
1788
        int $offset = null,
1789
        int $max_length = null,
1790
        int $timeout = 10,
1791
        bool $convert_to_utf8 = true,
1792
        string $from_encoding = ''
1793
    ) {
1794
        // init
1795
        /** @noinspection CallableParameterUseCaseInTypeContextInspection - is ok here */
1796 12
        $filename = Bootup::filter_sanitize_string_polyfill($filename);
1797 12
        if ($filename === false) {
1798
            return false;
1799
        }
1800
1801 12
        if ($timeout && $context === null) {
1802 9
            $context = \stream_context_create(
1803
                [
1804
                    'http' => [
1805 9
                        'timeout' => $timeout,
1806
                    ],
1807
                ]
1808
            );
1809
        }
1810
1811 12
        if ($offset === null) {
1812 12
            $offset = 0;
1813
        }
1814
1815 12
        if (\is_int($max_length)) {
1816
1817 2
            if ($max_length < 0) {
1818
                $max_length = 0;
1819
            }
1820
1821 2
            $data = \file_get_contents($filename, $use_include_path, $context, $offset, $max_length);
1822
        } else {
1823 12
            $data = \file_get_contents($filename, $use_include_path, $context, $offset);
1824
        }
1825
1826
        // return false on error
1827 12
        if ($data === false) {
1828
            return false;
1829
        }
1830
1831 12
        if ($convert_to_utf8) {
1832
            if (
1833 12
                !self::is_binary($data, true)
1834
                ||
1835 9
                self::is_utf16($data, false) !== false
1836
                ||
1837 12
                self::is_utf32($data, false) !== false
1838
            ) {
1839 9
                $data = self::encode('UTF-8', $data, false, $from_encoding);
1840 9
                $data = self::cleanup($data);
1841
            }
1842
        }
1843
1844 12
        return $data;
1845
    }
1846
1847
    /**
1848
     * Checks if a file starts with BOM (Byte Order Mark) character.
1849
     *
1850
     * EXAMPLE: <code>UTF8::file_has_bom('utf8_with_bom.txt'); // true</code>
1851
     *
1852
     * @param string $file_path <p>Path to a valid file.</p>
1853
     *
1854
     * @throws \RuntimeException if file_get_contents() returned false
1855
     *
1856
     * @return bool
1857
     *              <p><strong>true</strong> if the file has BOM at the start, <strong>false</strong> otherwise</p>
1858
     *
1859
     * @psalm-pure
1860
     */
1861 2
    public static function file_has_bom(string $file_path): bool
1862
    {
1863 2
        $file_content = \file_get_contents($file_path);
1864 2
        if ($file_content === false) {
1865
            throw new \RuntimeException('file_get_contents() returned false for:' . $file_path);
1866
        }
1867
1868 2
        return self::string_has_bom($file_content);
1869
    }
1870
1871
    /**
1872
     * Normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1873
     *
1874
     * EXAMPLE: <code>UTF8::filter(array("\xE9", 'à', 'a')); // array('é', 'à', 'a')</code>
1875
     *
1876
     * @param array|object|string $var
1877
     * @param int                 $normalization_form
1878
     * @param string              $leading_combining
1879
     *
1880
     * @psalm-pure
1881
     *
1882
     * @return mixed
1883
     *
1884
     * @template TFilter
1885
     * @phpstan-param TFilter $var
1886
     * @phpstan-return TFilter
1887
     */
1888 64
    public static function filter(
1889
        $var,
1890
        int $normalization_form = \Normalizer::NFC,
1891
        string $leading_combining = '◌'
1892
    ) {
1893 64
        switch (\gettype($var)) {
1894 64
            case 'object':
1895 64
            case 'array':
1896 6
                foreach ($var as &$v) {
1897 6
                    $v = self::filter($v, $normalization_form, $leading_combining);
1898
                }
1899 6
                unset($v);
1900
1901 6
                break;
1902 64
            case 'string':
1903
1904 62
                if (\strpos($var, "\r") !== false) {
1905 2
                    $var = self::normalize_line_ending($var);
1906
                }
1907
1908 62
                if (!ASCII::is_ascii($var)) {
1909 32
                    if (\Normalizer::isNormalized($var, $normalization_form)) {
1910 26
                        $n = '-';
1911
                    } else {
1912 12
                        $n = \Normalizer::normalize($var, $normalization_form);
1913
1914 12
                        if ($n && isset($n[0])) {
1915 6
                            $var = $n;
1916
                        } else {
1917 8
                            $var = self::encode('UTF-8', $var);
1918
                        }
1919
                    }
1920
1921
                    \assert(\is_string($var));
1922
                    if (
1923 32
                        $n
1924
                        &&
1925 32
                        $var[0] >= "\x80"
1926
                        &&
1927 32
                        isset($n[0], $leading_combining[0])
1928
                        &&
1929 32
                        \preg_match('/^\\p{Mn}/u', $var)
1930
                    ) {
1931
                        // Prevent leading combining chars
1932
                        // for NFC-safe concatenations.
1933 2
                        $var = $leading_combining . $var;
1934
                    }
1935
                }
1936
1937 62
                break;
1938
            default:
1939
                // nothing
1940
        }
1941
1942
        /** @noinspection PhpSillyAssignmentInspection */
1943
        /** @phpstan-var TFilter $var */
1944 64
        $var = $var;
1945
1946 64
        return $var;
1947
    }
1948
1949
    /**
1950
     * "filter_input()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1951
     *
1952
     * Gets a specific external variable by name and optionally filters it.
1953
     *
1954
     * EXAMPLE: <code>
1955
     * // _GET['foo'] = 'bar';
1956
     * UTF8::filter_input(INPUT_GET, 'foo', FILTER_UNSAFE_RAW)); // 'bar'
1957
     * </code>
1958
     *
1959
     * @see http://php.net/manual/en/function.filter-input.php
1960
     *
1961
     * @param int            $type          <p>
1962
     *                                      One of <b>INPUT_GET</b>, <b>INPUT_POST</b>,
1963
     *                                      <b>INPUT_COOKIE</b>, <b>INPUT_SERVER</b>, or
1964
     *                                      <b>INPUT_ENV</b>.
1965
     *                                      </p>
1966
     * @param string         $variable_name <p>
1967
     *                                      Name of a variable to get.
1968
     *                                      </p>
1969
     * @param int            $filter        [optional] <p>
1970
     *                                      The ID of the filter to apply. The
1971
     *                                      manual page lists the available filters.
1972
     *                                      </p>
1973
     * @param int|int[]|null $options       [optional] <p>
1974
     *                                      Associative array of options or bitwise disjunction of flags. If filter
1975
     *                                      accepts options, flags can be provided in "flags" field of array.
1976
     *                                      </p>
1977
     *
1978
     * @psalm-pure
1979
     *
1980
     * @return mixed
1981
     *               <p>
1982
     *               Value of the requested variable on success, <b>FALSE</b> if the filter fails, or <b>NULL</b> if the
1983
     *               <i>variable_name</i> variable is not set. If the flag <b>FILTER_NULL_ON_FAILURE</b> is used, it
1984
     *               returns <b>FALSE</b> if the variable is not set and <b>NULL</b> if the filter fails.
1985
     *               </p>
1986
     */
1987 1
    public static function filter_input(
1988
        int $type,
1989
        string $variable_name,
1990
        int $filter = \FILTER_DEFAULT,
1991
        $options = null
1992
    ) {
1993
        /**
1994
         * @psalm-suppress ImpureFunctionCall - we use func_num_args only for args count matching here
1995
         */
1996 1
        if ($options === null || \func_num_args() < 4) {
1997 1
            $var = \filter_input($type, $variable_name, $filter);
1998
        } else {
1999
            $var = \filter_input($type, $variable_name, $filter, $options);
2000
        }
2001
2002 1
        return self::filter($var);
2003
    }
2004
2005
    /**
2006
     * "filter_input_array()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
2007
     *
2008
     * Gets external variables and optionally filters them.
2009
     *
2010
     * EXAMPLE: <code>
2011
     * // _GET['foo'] = 'bar';
2012
     * UTF8::filter_input_array(INPUT_GET, array('foo' => 'FILTER_UNSAFE_RAW')); // array('bar')
2013
     * </code>
2014
     *
2015
     * @see http://php.net/manual/en/function.filter-input-array.php
2016
     *
2017
     * @param int        $type       <p>
2018
     *                               One of <b>INPUT_GET</b>, <b>INPUT_POST</b>,
2019
     *                               <b>INPUT_COOKIE</b>, <b>INPUT_SERVER</b>, or
2020
     *                               <b>INPUT_ENV</b>.
2021
     *                               </p>
2022
     * @param array|null $definition [optional] <p>
2023
     *                               An array defining the arguments. A valid key is a string
2024
     *                               containing a variable name and a valid value is either a filter type, or an array
2025
     *                               optionally specifying the filter, flags and options. If the value is an
2026
     *                               array, valid keys are filter which specifies the
2027
     *                               filter type,
2028
     *                               flags which specifies any flags that apply to the
2029
     *                               filter, and options which specifies any options that
2030
     *                               apply to the filter. See the example below for a better understanding.
2031
     *                               </p>
2032
     *                               <p>
2033
     *                               This parameter can be also an integer holding a filter constant. Then all values in the
2034
     *                               input array are filtered by this filter.
2035
     *                               </p>
2036
     * @param bool       $add_empty  [optional] <p>
2037
     *                               Add missing keys as <b>NULL</b> to the return value.
2038
     *                               </p>
2039
     *
2040
     * @psalm-pure
2041
     *
2042
     * @return mixed
2043
     *               <p>
2044
     *               An array containing the values of the requested variables on success, or <b>FALSE</b> on failure.
2045
     *               An array value will be <b>FALSE</b> if the filter fails, or <b>NULL</b> if the variable is not
2046
     *               set. Or if the flag <b>FILTER_NULL_ON_FAILURE</b> is used, it returns <b>FALSE</b> if the variable
2047
     *               is not set and <b>NULL</b> if the filter fails.
2048
     *               </p>
2049
     */
2050 1
    public static function filter_input_array(
2051
        int $type,
2052
        $definition = null,
2053
        bool $add_empty = true
2054
    ) {
2055
        /**
2056
         * @psalm-suppress ImpureFunctionCall - we use func_num_args only for args count matching here
2057
         */
2058 1
        if ($definition === null || \func_num_args() < 2) {
2059
            $a = \filter_input_array($type);
2060
        } else {
2061 1
            $a = \filter_input_array($type, $definition, $add_empty);
2062
        }
2063
2064 1
        return self::filter($a);
2065
    }
2066
2067
    /**
2068
     * "filter_var()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
2069
     *
2070
     * Filters a variable with a specified filter.
2071
     *
2072
     * EXAMPLE: <code>UTF8::filter_var('-ABC-中文空白-', FILTER_VALIDATE_URL); // false</code>
2073
     *
2074
     * @see http://php.net/manual/en/function.filter-var.php
2075
     *
2076
     * @param float|int|string|null $variable <p>
2077
     *                                        Value to filter.
2078
     *                                        </p>
2079
     * @param int                   $filter   [optional] <p>
2080
     *                                        The ID of the filter to apply. The
2081
     *                                        manual page lists the available filters.
2082
     *                                        </p>
2083
     * @param int|int[]|null        $options  [optional] <p>
2084
     *                                        Associative array of options or bitwise disjunction of flags. If filter
2085
     *                                        accepts options, flags can be provided in "flags" field of array. For
2086
     *                                        the "callback" filter, callable type should be passed. The
2087
     *                                        callback must accept one argument, the value to be filtered, and return
2088
     *                                        the value after filtering/sanitizing it.
2089
     *                                        </p>
2090
     *                                        <p>
2091
     *                                        <code>
2092
     *                                        // for filters that accept options, use this format
2093
     *                                        $options = array(
2094
     *                                        'options' => array(
2095
     *                                        'default' => 3, // value to return if the filter fails
2096
     *                                        // other options here
2097
     *                                        'min_range' => 0
2098
     *                                        ),
2099
     *                                        'flags' => FILTER_FLAG_ALLOW_OCTAL,
2100
     *                                        );
2101
     *                                        $var = filter_var('0755', FILTER_VALIDATE_INT, $options);
2102
     *                                        // for filter that only accept flags, you can pass them directly
2103
     *                                        $var = filter_var('oops', FILTER_VALIDATE_BOOLEAN, FILTER_NULL_ON_FAILURE);
2104
     *                                        // for filter that only accept flags, you can also pass as an array
2105
     *                                        $var = filter_var('oops', FILTER_VALIDATE_BOOLEAN,
2106
     *                                        array('flags' => FILTER_NULL_ON_FAILURE));
2107
     *                                        // callback validate filter
2108
     *                                        function foo($value)
2109
     *                                        {
2110
     *                                        // Expected format: Surname, GivenNames
2111
     *                                        if (strpos($value, ", ") === false) return false;
2112
     *                                        list($surname, $givennames) = explode(", ", $value, 2);
2113
     *                                        $empty = (empty($surname) || empty($givennames));
2114
     *                                        $notstrings = (!is_string($surname) || !is_string($givennames));
2115
     *                                        if ($empty || $notstrings) {
2116
     *                                        return false;
2117
     *                                        } else {
2118
     *                                        return $value;
2119
     *                                        }
2120
     *                                        }
2121
     *                                        $var = filter_var('Doe, Jane Sue', FILTER_CALLBACK, array('options' => 'foo'));
2122
     *                                        </code>
2123
     *                                        </p>
2124
     *
2125
     * @psalm-pure
2126
     *
2127
     * @return mixed
2128
     *               <p>The filtered data, or <b>FALSE</b> if the filter fails.</p>
2129
     */
2130 2
    public static function filter_var(
2131
        $variable,
2132
        int $filter = \FILTER_DEFAULT,
2133
        $options = null
2134
    ) {
2135
        /**
2136
         * @psalm-suppress ImpureFunctionCall - we use func_num_args only for args count matching here
2137
         */
2138 2
        if (\func_num_args() < 3) {
2139 2
            $variable = \filter_var($variable, $filter);
2140
        } else {
2141 2
            $variable = \filter_var($variable, $filter, $options);
0 ignored issues
show
Bug introduced by
It seems like $options can also be of type null; however, parameter $options of filter_var() does only seem to accept array|integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

2141
            $variable = \filter_var($variable, $filter, /** @scrutinizer ignore-type */ $options);
Loading history...
2142
        }
2143
2144 2
        return self::filter($variable);
2145
    }
2146
2147
    /**
2148
     * "filter_var_array()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
2149
     *
2150
     * Gets multiple variables and optionally filters them.
2151
     *
2152
     * EXAMPLE: <code>
2153
     * $filters = [
2154
     *     'name'  => ['filter'  => FILTER_CALLBACK, 'options' => [UTF8::class, 'ucwords']],
2155
     *     'age'   => ['filter'  => FILTER_VALIDATE_INT, 'options' => ['min_range' => 1, 'max_range' => 120]],
2156
     *     'email' => FILTER_VALIDATE_EMAIL,
2157
     * ];
2158
     *
2159
     * $data = [
2160
     *     'name' => 'κόσμε',
2161
     *     'age' => '18',
2162
     *     'email' => '[email protected]'
2163
     * ];
2164
     *
2165
     * UTF8::filter_var_array($data, $filters, true); // ['name' => 'Κόσμε', 'age' => 18, 'email' => '[email protected]']
2166
     * </code>
2167
     *
2168
     * @see http://php.net/manual/en/function.filter-var-array.php
2169
     *
2170
     * @param array<mixed>   $data       <p>
2171
     *                                   An array with string keys containing the data to filter.
2172
     *                                   </p>
2173
     * @param array|int|null $definition [optional] <p>
2174
     *                                   An array defining the arguments. A valid key is a string
2175
     *                                   containing a variable name and a valid value is either a
2176
     *                                   filter type, or an
2177
     *                                   array optionally specifying the filter, flags and options.
2178
     *                                   If the value is an array, valid keys are filter
2179
     *                                   which specifies the filter type,
2180
     *                                   flags which specifies any flags that apply to the
2181
     *                                   filter, and options which specifies any options that
2182
     *                                   apply to the filter. See the example below for a better understanding.
2183
     *                                   </p>
2184
     *                                   <p>
2185
     *                                   This parameter can be also an integer holding a filter constant. Then all values
2186
     *                                   in the input array are filtered by this filter.
2187
     *                                   </p>
2188
     * @param bool           $add_empty  [optional] <p>
2189
     *                                   Add missing keys as <b>NULL</b> to the return value.
2190
     *                                   </p>
2191
     *
2192
     * @psalm-pure
2193
     *
2194
     * @return mixed
2195
     *               <p>
2196
     *               An array containing the values of the requested variables on success, or <b>FALSE</b> on failure.
2197
     *               An array value will be <b>FALSE</b> if the filter fails, or <b>NULL</b> if the variable is not
2198
     *               set.
2199
     *               </p>
2200
     */
2201 2
    public static function filter_var_array(
2202
        array $data,
2203
        $definition = null,
2204
        bool $add_empty = true
2205
    ) {
2206
        /**
2207
         * @psalm-suppress ImpureFunctionCall - we use func_num_args only for args count matching here
2208
         */
2209 2
        if (\func_num_args() < 2) {
2210 2
            $a = \filter_var_array($data);
2211
        } else {
2212 2
            $a = \filter_var_array($data, $definition, $add_empty);
0 ignored issues
show
Bug introduced by
It seems like $definition can also be of type null; however, parameter $options of filter_var_array() does only seem to accept array|integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

2212
            $a = \filter_var_array($data, /** @scrutinizer ignore-type */ $definition, $add_empty);
Loading history...
2213
        }
2214
2215 2
        return self::filter($a);
2216
    }
2217
2218
    /**
2219
     * Checks whether finfo is available on the server.
2220
     *
2221
     * @psalm-pure
2222
     *
2223
     * @return bool
2224
     *              <p><strong>true</strong> if available, <strong>false</strong> otherwise</p>
2225
     *
2226
     * @internal <p>Please do not use it anymore, we will make is private in next major version.</p>
2227
     */
2228
    public static function finfo_loaded(): bool
2229
    {
2230
        return \class_exists('finfo');
2231
    }
2232
2233
    /**
2234
     * Returns the first $n characters of the string.
2235
     *
2236
     * @param string $str      <p>The input string.</p>
2237
     * @param int    $n        <p>Number of characters to retrieve from the start.</p>
2238
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
2239
     *
2240
     * @psalm-pure
2241
     *
2242
     * @return string
2243
     */
2244 13
    public static function first_char(
2245
        string $str,
2246
        int $n = 1,
2247
        string $encoding = 'UTF-8'
2248
    ): string {
2249 13
        if ($str === '' || $n <= 0) {
2250 5
            return '';
2251
        }
2252
2253 8
        if ($encoding === 'UTF-8') {
2254 4
            return (string) \mb_substr($str, 0, $n);
2255
        }
2256
2257 4
        return (string) self::substr($str, 0, $n, $encoding);
2258
    }
2259
2260
    /**
2261
     * Check if the number of Unicode characters isn't greater than the specified integer.
2262
     *
2263
     * EXAMPLE: <code>UTF8::fits_inside('κόσμε', 6); // false</code>
2264
     *
2265
     * @param string $str      the original string to be checked
2266
     * @param int    $box_size the size in number of chars to be checked against string
2267
     *
2268
     * @psalm-pure
2269
     *
2270
     * @return bool
2271
     *              <p><strong>TRUE</strong> if string is less than or equal to $box_size, <strong>FALSE</strong> otherwise.</p>
2272
     */
2273 2
    public static function fits_inside(string $str, int $box_size): bool
2274
    {
2275 2
        return (int) self::strlen($str) <= $box_size;
2276
    }
2277
2278
    /**
2279
     * Try to fix simple broken UTF-8 strings.
2280
     *
2281
     * INFO: Take a look at "UTF8::fix_utf8()" if you need a more advanced fix for broken UTF-8 strings.
2282
     *
2283
     * EXAMPLE: <code>UTF8::fix_simple_utf8('Düsseldorf'); // 'Düsseldorf'</code>
2284
     *
2285
     * If you received an UTF-8 string that was converted from Windows-1252 as it was ISO-8859-1
2286
     * (ignoring Windows-1252 chars from 80 to 9F) use this function to fix it.
2287
     * See: http://en.wikipedia.org/wiki/Windows-1252
2288
     *
2289
     * @param string $str <p>The input string</p>
2290
     *
2291
     * @psalm-pure
2292
     *
2293
     * @return string
2294
     */
2295 46
    public static function fix_simple_utf8(string $str): string
2296
    {
2297 46
        if ($str === '') {
2298 4
            return '';
2299
        }
2300
2301
        /**
2302
         * @psalm-suppress ImpureStaticVariable
2303
         *
2304
         * @var array<mixed>|null
2305
         */
2306 46
        static $BROKEN_UTF8_TO_UTF8_KEYS_CACHE = null;
2307
2308
        /**
2309
         * @psalm-suppress ImpureStaticVariable
2310
         *
2311
         * @var array<mixed>|null
2312
         */
2313 46
        static $BROKEN_UTF8_TO_UTF8_VALUES_CACHE = null;
2314
2315 46
        if ($BROKEN_UTF8_TO_UTF8_KEYS_CACHE === null) {
2316 1
            if (self::$BROKEN_UTF8_FIX === null) {
2317 1
                self::$BROKEN_UTF8_FIX = self::getData('utf8_fix');
2318
            }
2319
2320 1
            $BROKEN_UTF8_TO_UTF8_KEYS_CACHE = \array_keys(self::$BROKEN_UTF8_FIX ?: []);
2321 1
            $BROKEN_UTF8_TO_UTF8_VALUES_CACHE = self::$BROKEN_UTF8_FIX;
2322
        }
2323
2324
        \assert(\is_array($BROKEN_UTF8_TO_UTF8_VALUES_CACHE));
2325
2326 46
        return \str_replace($BROKEN_UTF8_TO_UTF8_KEYS_CACHE, $BROKEN_UTF8_TO_UTF8_VALUES_CACHE, $str);
2327
    }
2328
2329
    /**
2330
     * Fix a double (or multiple) encoded UTF8 string.
2331
     *
2332
     * EXAMPLE: <code>UTF8::fix_utf8('Fédération'); // 'Fédération'</code>
2333
     *
2334
     * @param string|string[] $str you can use a string or an array of strings
2335
     *
2336
     * @psalm-pure
2337
     *
2338
     * @return string|string[]
2339
     *                         <p>Will return the fixed input-"array" or
2340
     *                         the fixed input-"string".</p>
2341
     *
2342
     * @template TFixUtf8
2343
     * @phpstan-param TFixUtf8 $str
2344
     * @phpstan-return TFixUtf8
2345
     */
2346 2
    public static function fix_utf8($str)
2347
    {
2348 2
        if (\is_array($str)) {
2349 2
            foreach ($str as &$v) {
2350 2
                $v = self::fix_utf8($v);
2351
            }
2352 2
            unset($v);
2353
2354
            /**
2355
             * @psalm-suppress InvalidReturnStatement
2356
             */
2357 2
            return $str;
2358
        }
2359
2360 2
        $str = (string) $str;
2361 2
        $last = '';
2362 2
        while ($last !== $str) {
2363 2
            $last = $str;
2364
            /**
2365
             * @psalm-suppress PossiblyInvalidArgument
2366
             */
2367 2
            $str = self::to_utf8(
2368 2
                self::utf8_decode($str, true)
2369
            );
2370
        }
2371
2372
        /**
2373
         * @psalm-suppress InvalidReturnStatement
2374
         */
2375 2
        return $str;
2376
    }
2377
2378
    /**
2379
     * Get character of a specific character.
2380
     *
2381
     * EXAMPLE: <code>UTF8::getCharDirection('ا'); // 'RTL'</code>
2382
     *
2383
     * @param string $char
2384
     *
2385
     * @psalm-pure
2386
     *
2387
     * @return string
2388
     *                <p>'RTL' or 'LTR'.</p>
2389
     */
2390 2
    public static function getCharDirection(string $char): string
2391
    {
2392 2
        if (self::$SUPPORT['intlChar'] === true) {
2393 2
            $tmp_return = \IntlChar::charDirection($char);
2394
2395
            // from "IntlChar"-Class
2396
            $char_direction = [
2397 2
                'RTL' => [1, 13, 14, 15, 21],
2398
                'LTR' => [0, 11, 12, 20],
2399
            ];
2400
2401 2
            if (\in_array($tmp_return, $char_direction['LTR'], true)) {
2402
                return 'LTR';
2403
            }
2404
2405 2
            if (\in_array($tmp_return, $char_direction['RTL'], true)) {
2406 2
                return 'RTL';
2407
            }
2408
        }
2409
2410 2
        $c = static::chr_to_decimal($char);
2411
2412 2
        if (!($c >= 0x5be && $c <= 0x10b7f)) {
2413 2
            return 'LTR';
2414
        }
2415
2416 2
        if ($c <= 0x85e) {
2417 2
            if ($c === 0x5be ||
2418 2
                $c === 0x5c0 ||
2419 2
                $c === 0x5c3 ||
2420 2
                $c === 0x5c6 ||
2421 2
                ($c >= 0x5d0 && $c <= 0x5ea) ||
2422 2
                ($c >= 0x5f0 && $c <= 0x5f4) ||
2423 2
                $c === 0x608 ||
2424 2
                $c === 0x60b ||
2425 2
                $c === 0x60d ||
2426 2
                $c === 0x61b ||
2427 2
                ($c >= 0x61e && $c <= 0x64a) ||
2428
                ($c >= 0x66d && $c <= 0x66f) ||
2429
                ($c >= 0x671 && $c <= 0x6d5) ||
2430
                ($c >= 0x6e5 && $c <= 0x6e6) ||
2431
                ($c >= 0x6ee && $c <= 0x6ef) ||
2432
                ($c >= 0x6fa && $c <= 0x70d) ||
2433
                $c === 0x710 ||
2434
                ($c >= 0x712 && $c <= 0x72f) ||
2435
                ($c >= 0x74d && $c <= 0x7a5) ||
2436
                $c === 0x7b1 ||
2437
                ($c >= 0x7c0 && $c <= 0x7ea) ||
2438
                ($c >= 0x7f4 && $c <= 0x7f5) ||
2439
                $c === 0x7fa ||
2440
                ($c >= 0x800 && $c <= 0x815) ||
2441
                $c === 0x81a ||
2442
                $c === 0x824 ||
2443
                $c === 0x828 ||
2444
                ($c >= 0x830 && $c <= 0x83e) ||
2445
                ($c >= 0x840 && $c <= 0x858) ||
2446 2
                $c === 0x85e
2447
            ) {
2448 2
                return 'RTL';
2449
            }
2450 2
        } elseif ($c === 0x200f) {
2451
            return 'RTL';
2452 2
        } elseif ($c >= 0xfb1d) {
2453 2
            if ($c === 0xfb1d ||
2454 2
                ($c >= 0xfb1f && $c <= 0xfb28) ||
2455 2
                ($c >= 0xfb2a && $c <= 0xfb36) ||
2456 2
                ($c >= 0xfb38 && $c <= 0xfb3c) ||
2457 2
                $c === 0xfb3e ||
2458 2
                ($c >= 0xfb40 && $c <= 0xfb41) ||
2459 2
                ($c >= 0xfb43 && $c <= 0xfb44) ||
2460 2
                ($c >= 0xfb46 && $c <= 0xfbc1) ||
2461 2
                ($c >= 0xfbd3 && $c <= 0xfd3d) ||
2462 2
                ($c >= 0xfd50 && $c <= 0xfd8f) ||
2463 2
                ($c >= 0xfd92 && $c <= 0xfdc7) ||
2464 2
                ($c >= 0xfdf0 && $c <= 0xfdfc) ||
2465 2
                ($c >= 0xfe70 && $c <= 0xfe74) ||
2466 2
                ($c >= 0xfe76 && $c <= 0xfefc) ||
2467 2
                ($c >= 0x10800 && $c <= 0x10805) ||
2468 2
                $c === 0x10808 ||
2469 2
                ($c >= 0x1080a && $c <= 0x10835) ||
2470 2
                ($c >= 0x10837 && $c <= 0x10838) ||
2471 2
                $c === 0x1083c ||
2472 2
                ($c >= 0x1083f && $c <= 0x10855) ||
2473 2
                ($c >= 0x10857 && $c <= 0x1085f) ||
2474 2
                ($c >= 0x10900 && $c <= 0x1091b) ||
2475 2
                ($c >= 0x10920 && $c <= 0x10939) ||
2476 2
                $c === 0x1093f ||
2477 2
                $c === 0x10a00 ||
2478 2
                ($c >= 0x10a10 && $c <= 0x10a13) ||
2479 2
                ($c >= 0x10a15 && $c <= 0x10a17) ||
2480 2
                ($c >= 0x10a19 && $c <= 0x10a33) ||
2481 2
                ($c >= 0x10a40 && $c <= 0x10a47) ||
2482 2
                ($c >= 0x10a50 && $c <= 0x10a58) ||
2483 2
                ($c >= 0x10a60 && $c <= 0x10a7f) ||
2484 2
                ($c >= 0x10b00 && $c <= 0x10b35) ||
2485 2
                ($c >= 0x10b40 && $c <= 0x10b55) ||
2486 2
                ($c >= 0x10b58 && $c <= 0x10b72) ||
2487 2
                ($c >= 0x10b78)
2488
            ) {
2489 2
                return 'RTL';
2490
            }
2491
        }
2492
2493 2
        return 'LTR';
2494
    }
2495
2496
    /**
2497
     * Check for php-support.
2498
     *
2499
     * @param string|null $key
2500
     *
2501
     * @psalm-pure
2502
     *
2503
     * @return mixed
2504
     *               Return the full support-"array", if $key === null<br>
2505
     *               return bool-value, if $key is used and available<br>
2506
     *               otherwise return <strong>null</strong>
2507
     */
2508 27
    public static function getSupportInfo(string $key = null)
2509
    {
2510 27
        if ($key === null) {
2511 4
            return self::$SUPPORT;
2512
        }
2513
2514 25
        if (self::$INTL_TRANSLITERATOR_LIST === null) {
2515 1
            self::$INTL_TRANSLITERATOR_LIST = self::getData('transliterator_list');
2516
        }
2517
        // compatibility fix for old versions
2518 25
        self::$SUPPORT['intl__transliterator_list_ids'] = self::$INTL_TRANSLITERATOR_LIST;
2519
2520 25
        return self::$SUPPORT[$key] ?? null;
2521
    }
2522
2523
    /**
2524
     * Warning: this method only works for some file-types (png, jpg)
2525
     *          if you need more supported types, please use e.g. "finfo"
2526
     *
2527
     * @param string $str
2528
     * @param array  $fallback <p>with this keys: 'ext', 'mime', 'type'
2529
     *
2530
     * @psalm-pure
2531
     *
2532
     * @return null[]|string[]
2533
     *                         <p>with this keys: 'ext', 'mime', 'type'</p>
2534
     *
2535
     * @phpstan-param array{ext: null|string, mime: null|string, type: null|string} $fallback
2536
     */
2537 39
    public static function get_file_type(
2538
        string $str,
2539
        array $fallback = [
2540
            'ext'  => null,
2541
            'mime' => 'application/octet-stream',
2542
            'type' => null,
2543
        ]
2544
    ): array {
2545 39
        if ($str === '') {
2546
            return $fallback;
2547
        }
2548
2549
        /** @var false|string $str_info - needed for PhpStan (stubs error) */
2550 39
        $str_info = \substr($str, 0, 2);
2551 39
        if ($str_info === false || \strlen($str_info) !== 2) {
2552 10
            return $fallback;
2553
        }
2554
2555
        // DEBUG
2556
        //var_dump($str_info);
2557
2558 36
        $str_info = \unpack('C2chars', $str_info);
2559
2560 36
        if ($str_info === false) {
2561
            return $fallback;
2562
        }
2563 36
        $type_code = (int) ($str_info['chars1'] . $str_info['chars2']);
2564
2565
        // DEBUG
2566
        //var_dump($type_code);
2567
2568
        //
2569
        // info: https://en.wikipedia.org/wiki/Magic_number_%28programming%29#Format_indicator
2570
        //
2571
        switch ($type_code) {
2572
            // WARNING: do not add too simple comparisons, because of false-positive results:
2573
            //
2574
            // 3780 => 'pdf', 7790 => 'exe', 7784 => 'midi', 8075 => 'zip',
2575
            // 8297 => 'rar', 7173 => 'gif', 7373 => 'tiff' 6677 => 'bmp', ...
2576
            //
2577 36
            case 255216:
2578
                $ext = 'jpg';
2579
                $mime = 'image/jpeg';
2580
                $type = 'binary';
2581
2582
                break;
2583 36
            case 13780:
2584 7
                $ext = 'png';
2585 7
                $mime = 'image/png';
2586 7
                $type = 'binary';
2587
2588 7
                break;
2589
            default:
2590 35
                return $fallback;
2591
        }
2592
2593
        return [
2594 7
            'ext'  => $ext,
2595 7
            'mime' => $mime,
2596 7
            'type' => $type,
2597
        ];
2598
    }
2599
2600
    /**
2601
     * @param int    $length         <p>Length of the random string.</p>
2602
     * @param string $possible_chars [optional] <p>Characters string for the random selection.</p>
2603
     * @param string $encoding       [optional] <p>Set the charset for e.g. "mb_" function</p>
2604
     *
2605
     * @return string
2606
     */
2607 1
    public static function get_random_string(
2608
        int $length,
2609
        string $possible_chars = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789',
2610
        string $encoding = 'UTF-8'
2611
    ): string {
2612
        // init
2613 1
        $i = 0;
2614 1
        $str = '';
2615
2616
        //
2617
        // add random chars
2618
        //
2619
2620 1
        if ($encoding === 'UTF-8') {
2621 1
            $max_length = (int) \mb_strlen($possible_chars);
2622 1
            if ($max_length === 0) {
2623 1
                return '';
2624
            }
2625
2626 1
            while ($i < $length) {
2627
                try {
2628 1
                    $rand_int = \random_int(0, $max_length - 1);
2629
                } catch (\Exception $e) {
2630
                    $rand_int = \mt_rand(0, $max_length - 1);
2631
                }
2632 1
                $char = \mb_substr($possible_chars, $rand_int, 1);
2633 1
                if ($char !== false) {
2634 1
                    $str .= $char;
2635 1
                    ++$i;
2636
                }
2637
            }
2638
        } else {
2639
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
2640
2641
            $max_length = (int) self::strlen($possible_chars, $encoding);
2642
            if ($max_length === 0) {
2643
                return '';
2644
            }
2645
2646
            while ($i < $length) {
2647
                try {
2648
                    $rand_int = \random_int(0, $max_length - 1);
2649
                } catch (\Exception $e) {
2650
                    $rand_int = \mt_rand(0, $max_length - 1);
2651
                }
2652
                $char = self::substr($possible_chars, $rand_int, 1, $encoding);
2653
                if ($char !== false) {
2654
                    $str .= $char;
2655
                    ++$i;
2656
                }
2657
            }
2658
        }
2659
2660 1
        return $str;
2661
    }
2662
2663
    /**
2664
     * @param int|string $extra_entropy [optional] <p>Extra entropy via a string or int value.</p>
2665
     * @param bool       $use_md5       [optional] <p>Return the unique identifier as md5-hash? Default: true</p>
2666
     *
2667
     * @return string
2668
     */
2669 1
    public static function get_unique_string($extra_entropy = '', bool $use_md5 = true): string
2670
    {
2671
        try {
2672 1
            $rand_int = \random_int(0, \mt_getrandmax());
2673
        } catch (\Exception $e) {
2674
            $rand_int = \mt_rand(0, \mt_getrandmax());
2675
        }
2676
2677
        $unique_helper = $rand_int .
2678 1
                         \session_id() .
2679 1
                         ($_SERVER['REMOTE_ADDR'] ?? '') .
2680 1
                         ($_SERVER['SERVER_ADDR'] ?? '') .
2681 1
                         $extra_entropy;
2682
2683 1
        $unique_string = \uniqid($unique_helper, true);
2684
2685 1
        if ($use_md5) {
2686 1
            $unique_string = \md5($unique_string . $unique_helper);
2687
        }
2688
2689 1
        return $unique_string;
2690
    }
2691
2692
    /**
2693
     * Returns true if the string contains a lower case char, false otherwise.
2694
     *
2695
     * @param string $str <p>The input string.</p>
2696
     *
2697
     * @psalm-pure
2698
     *
2699
     * @return bool
2700
     *              <p>Whether or not the string contains a lower case character.</p>
2701
     */
2702 47
    public static function has_lowercase(string $str): bool
2703
    {
2704 47
        if (self::$SUPPORT['mbstring'] === true) {
2705 47
            return \mb_ereg_match('.*[[:lower:]]', $str);
2706
        }
2707
2708
        return self::str_matches_pattern($str, '.*[[:lower:]]');
2709
    }
2710
2711
    /**
2712
     * Returns true if the string contains whitespace, false otherwise.
2713
     *
2714
     * @param string $str <p>The input string.</p>
2715
     *
2716
     * @psalm-pure
2717
     *
2718
     * @return bool
2719
     *              <p>Whether or not the string contains whitespace.</p>
2720
     */
2721 11
    public static function has_whitespace(string $str): bool
2722
    {
2723 11
        if (self::$SUPPORT['mbstring'] === true) {
2724 11
            return \mb_ereg_match('.*[[:space:]]', $str);
2725
        }
2726
2727
        return self::str_matches_pattern($str, '.*[[:space:]]');
2728
    }
2729
2730
    /**
2731
     * Returns true if the string contains an upper case char, false otherwise.
2732
     *
2733
     * @param string $str <p>The input string.</p>
2734
     *
2735
     * @psalm-pure
2736
     *
2737
     * @return bool
2738
     *              <p>Whether or not the string contains an upper case character.</p>
2739
     */
2740 12
    public static function has_uppercase(string $str): bool
2741
    {
2742 12
        if (self::$SUPPORT['mbstring'] === true) {
2743 12
            return \mb_ereg_match('.*[[:upper:]]', $str);
2744
        }
2745
2746
        return self::str_matches_pattern($str, '.*[[:upper:]]');
2747
    }
2748
2749
    /**
2750
     * Converts a hexadecimal value into a UTF-8 character.
2751
     *
2752
     * INFO: opposite to UTF8::chr_to_hex()
2753
     *
2754
     * EXAMPLE: <code>UTF8::hex_to_chr('U+00a7'); // '§'</code>
2755
     *
2756
     * @param string $hexdec <p>The hexadecimal value.</p>
2757
     *
2758
     * @psalm-pure
2759
     *
2760
     * @return false|string one single UTF-8 character
2761
     */
2762 4
    public static function hex_to_chr(string $hexdec)
2763
    {
2764
        /** @noinspection PhpUsageOfSilenceOperatorInspection - Invalid characters passed for attempted conversion, these have been ignored */
2765 4
        return self::decimal_to_chr((int) @\hexdec($hexdec));
2766
    }
2767
2768
    /**
2769
     * Converts hexadecimal U+xxxx code point representation to integer.
2770
     *
2771
     * INFO: opposite to UTF8::int_to_hex()
2772
     *
2773
     * EXAMPLE: <code>UTF8::hex_to_int('U+00f1'); // 241</code>
2774
     *
2775
     * @param string $hexdec <p>The hexadecimal code point representation.</p>
2776
     *
2777
     * @psalm-pure
2778
     *
2779
     * @return false|int
2780
     *                   <p>The code point, or false on failure.</p>
2781
     */
2782 2
    public static function hex_to_int($hexdec)
2783
    {
2784
        // init
2785 2
        $hexdec = (string) $hexdec;
2786
2787 2
        if ($hexdec === '') {
2788 2
            return false;
2789
        }
2790
2791 2
        if (\preg_match('/^(?:\\\u|U\+|)([a-zA-Z0-9]{4,6})$/', $hexdec, $match)) {
2792 2
            return \intval($match[1], 16);
2793
        }
2794
2795 2
        return false;
2796
    }
2797
2798
    /**
2799
     * Converts a UTF-8 string to a series of HTML numbered entities.
2800
     *
2801
     * INFO: opposite to UTF8::html_decode()
2802
     *
2803
     * EXAMPLE: <code>UTF8::html_encode('中文空白'); // '&#20013;&#25991;&#31354;&#30333;'</code>
2804
     *
2805
     * @param string $str              <p>The Unicode string to be encoded as numbered entities.</p>
2806
     * @param bool   $keep_ascii_chars [optional] <p>Keep ASCII chars.</p>
2807
     * @param string $encoding         [optional] <p>Set the charset for e.g. "mb_" function</p>
2808
     *
2809
     * @psalm-pure
2810
     *
2811
     * @return string HTML numbered entities
2812
     */
2813 14
    public static function html_encode(
2814
        string $str,
2815
        bool $keep_ascii_chars = false,
2816
        string $encoding = 'UTF-8'
2817
    ): string {
2818 14
        if ($str === '') {
2819 4
            return '';
2820
        }
2821
2822 14
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
2823 4
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
2824
        }
2825
2826
        // INFO: http://stackoverflow.com/questions/35854535/better-explanation-of-convmap-in-mb-encode-numericentity
2827 14
        if (self::$SUPPORT['mbstring'] === true) {
2828 14
            if ($keep_ascii_chars) {
2829 13
                $start_code = 0x80;
2830
            } else {
2831 3
                $start_code = 0x00;
2832
            }
2833
2834 14
            if ($encoding === 'UTF-8') {
2835
                /** @var false|string|null $return - needed for PhpStan (stubs error) */
2836 14
                $return = \mb_encode_numericentity(
0 ignored issues
show
Bug introduced by
The call to mb_encode_numericentity() has too few arguments starting with encoding. ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-call  annotation

2836
                $return = /** @scrutinizer ignore-call */ \mb_encode_numericentity(

This check compares calls to functions or methods with their respective definitions. If the call has less arguments than are defined, it raises an issue.

If a function is defined several times with a different number of parameters, the check may pick up the wrong definition and report false positives. One codebase where this has been known to happen is Wordpress. Please note the @ignore annotation hint above.

Loading history...
2837 14
                    $str,
2838 14
                    [$start_code, 0xfffff, 0, 0xfffff]
2839
                );
2840 14
                if ($return !== null && $return !== false) {
2841 14
                    return $return;
2842
                }
2843
            }
2844
2845
            /** @var false|string|null $return - needed for PhpStan (stubs error) */
2846 4
            $return = \mb_encode_numericentity(
2847 4
                $str,
2848 4
                [$start_code, 0xfffff, 0, 0xfffff],
2849 4
                $encoding
2850
            );
2851 4
            if ($return !== null && $return !== false) {
2852 4
                return $return;
2853
            }
2854
        }
2855
2856
        //
2857
        // fallback via vanilla php
2858
        //
2859
2860
        return \implode(
2861
            '',
2862
            \array_map(
2863
                static function (string $chr) use ($keep_ascii_chars, $encoding): string {
2864
                    return self::single_chr_html_encode($chr, $keep_ascii_chars, $encoding);
2865
                },
2866
                self::str_split($str)
2867
            )
2868
        );
2869
    }
2870
2871
    /**
2872
     * UTF-8 version of html_entity_decode()
2873
     *
2874
     * The reason we are not using html_entity_decode() by itself is because
2875
     * while it is not technically correct to leave out the semicolon
2876
     * at the end of an entity most browsers will still interpret the entity
2877
     * correctly. html_entity_decode() does not convert entities without
2878
     * semicolons, so we are left with our own little solution here. Bummer.
2879
     *
2880
     * Convert all HTML entities to their applicable characters.
2881
     *
2882
     * INFO: opposite to UTF8::html_encode()
2883
     *
2884
     * EXAMPLE: <code>UTF8::html_entity_decode('&#20013;&#25991;&#31354;&#30333;'); // '中文空白'</code>
2885
     *
2886
     * @see http://php.net/manual/en/function.html-entity-decode.php
2887
     *
2888
     * @param string   $str      <p>
2889
     *                           The input string.
2890
     *                           </p>
2891
     * @param int|null $flags    [optional] <p>
2892
     *                           A bitmask of one or more of the following flags, which specify how to handle quotes
2893
     *                           and which document type to use. The default is ENT_COMPAT | ENT_HTML401.
2894
     *                           <table>
2895
     *                           Available <i>flags</i> constants
2896
     *                           <tr valign="top">
2897
     *                           <td>Constant Name</td>
2898
     *                           <td>Description</td>
2899
     *                           </tr>
2900
     *                           <tr valign="top">
2901
     *                           <td><b>ENT_COMPAT</b></td>
2902
     *                           <td>Will convert double-quotes and leave single-quotes alone.</td>
2903
     *                           </tr>
2904
     *                           <tr valign="top">
2905
     *                           <td><b>ENT_QUOTES</b></td>
2906
     *                           <td>Will convert both double and single quotes.</td>
2907
     *                           </tr>
2908
     *                           <tr valign="top">
2909
     *                           <td><b>ENT_NOQUOTES</b></td>
2910
     *                           <td>Will leave both double and single quotes unconverted.</td>
2911
     *                           </tr>
2912
     *                           <tr valign="top">
2913
     *                           <td><b>ENT_HTML401</b></td>
2914
     *                           <td>
2915
     *                           Handle code as HTML 4.01.
2916
     *                           </td>
2917
     *                           </tr>
2918
     *                           <tr valign="top">
2919
     *                           <td><b>ENT_XML1</b></td>
2920
     *                           <td>
2921
     *                           Handle code as XML 1.
2922
     *                           </td>
2923
     *                           </tr>
2924
     *                           <tr valign="top">
2925
     *                           <td><b>ENT_XHTML</b></td>
2926
     *                           <td>
2927
     *                           Handle code as XHTML.
2928
     *                           </td>
2929
     *                           </tr>
2930
     *                           <tr valign="top">
2931
     *                           <td><b>ENT_HTML5</b></td>
2932
     *                           <td>
2933
     *                           Handle code as HTML 5.
2934
     *                           </td>
2935
     *                           </tr>
2936
     *                           </table>
2937
     *                           </p>
2938
     * @param string   $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
2939
     *
2940
     * @psalm-pure
2941
     *
2942
     * @return string the decoded string
2943
     */
2944 34
    public static function html_entity_decode(
2945
        string $str,
2946
        int $flags = null,
2947
        string $encoding = 'UTF-8'
2948
    ): string {
2949
        if (
2950 34
            !isset($str[3]) // examples: &; || &x;
2951
            ||
2952 34
            \strpos($str, '&') === false // no "&"
2953
        ) {
2954 23
            return $str;
2955
        }
2956
2957 34
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
2958 9
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
2959
        }
2960
2961 34
        if ($flags === null) {
2962 11
            $flags = \ENT_QUOTES | \ENT_HTML5;
2963
        }
2964
2965
        if (
2966 34
            $encoding !== 'UTF-8'
2967
            &&
2968 34
            $encoding !== 'ISO-8859-1'
2969
            &&
2970 34
            $encoding !== 'WINDOWS-1252'
2971
            &&
2972 34
            self::$SUPPORT['mbstring'] === false
2973
        ) {
2974
            /**
2975
             * @psalm-suppress ImpureFunctionCall - is is only a warning
2976
             */
2977
            \trigger_error('UTF8::html_entity_decode() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
2978
        }
2979
2980
        do {
2981 34
            $str_compare = $str;
2982
2983 34
            if (\strpos($str, '&') !== false) {
2984 34
                if (\strpos($str, '&#') !== false) {
2985
                    // decode also numeric & UTF16 two byte entities
2986 25
                    $str = (string) \preg_replace(
2987 25
                        '/(&#(?:x0*[0-9a-fA-F]{2,6}(?![0-9a-fA-F;])|(?:0*\d{2,6}(?![0-9;]))))/S',
2988 25
                        '$1;',
2989 25
                        $str
2990
                    );
2991
                }
2992
2993 34
                $str = \html_entity_decode(
2994 34
                    $str,
2995 34
                    $flags,
2996 34
                    $encoding
2997
                );
2998
            }
2999 34
        } while ($str_compare !== $str);
3000
3001 34
        return $str;
3002
    }
3003
3004
    /**
3005
     * Create a escape html version of the string via "UTF8::htmlspecialchars()".
3006
     *
3007
     * @param string $str
3008
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
3009
     *
3010
     * @psalm-pure
3011
     *
3012
     * @return string
3013
     */
3014 6
    public static function html_escape(string $str, string $encoding = 'UTF-8'): string
3015
    {
3016 6
        return self::htmlspecialchars(
3017 6
            $str,
3018 6
            \ENT_QUOTES | \ENT_SUBSTITUTE,
3019 6
            $encoding
3020
        );
3021
    }
3022
3023
    /**
3024
     * Remove empty html-tag.
3025
     *
3026
     * e.g.: <pre><tag></tag></pre>
3027
     *
3028
     * @param string $str
3029
     *
3030
     * @psalm-pure
3031
     *
3032
     * @return string
3033
     */
3034 1
    public static function html_stripe_empty_tags(string $str): string
3035
    {
3036 1
        return (string) \preg_replace(
3037 1
            '/<[^\\/>]*?>\\s*?<\\/[^>]*?>/u',
3038 1
            '',
3039 1
            $str
3040
        );
3041
    }
3042
3043
    /**
3044
     * Convert all applicable characters to HTML entities: UTF-8 version of htmlentities().
3045
     *
3046
     * EXAMPLE: <code>UTF8::htmlentities('<白-öäü>'); // '&lt;&#30333;-&ouml;&auml;&uuml;&gt;'</code>
3047
     *
3048
     * @see http://php.net/manual/en/function.htmlentities.php
3049
     *
3050
     * @param string $str           <p>
3051
     *                              The input string.
3052
     *                              </p>
3053
     * @param int    $flags         [optional] <p>
3054
     *                              A bitmask of one or more of the following flags, which specify how to handle
3055
     *                              quotes, invalid code unit sequences and the used document type. The default is
3056
     *                              ENT_COMPAT | ENT_HTML401.
3057
     *                              <table>
3058
     *                              Available <i>flags</i> constants
3059
     *                              <tr valign="top">
3060
     *                              <td>Constant Name</td>
3061
     *                              <td>Description</td>
3062
     *                              </tr>
3063
     *                              <tr valign="top">
3064
     *                              <td><b>ENT_COMPAT</b></td>
3065
     *                              <td>Will convert double-quotes and leave single-quotes alone.</td>
3066
     *                              </tr>
3067
     *                              <tr valign="top">
3068
     *                              <td><b>ENT_QUOTES</b></td>
3069
     *                              <td>Will convert both double and single quotes.</td>
3070
     *                              </tr>
3071
     *                              <tr valign="top">
3072
     *                              <td><b>ENT_NOQUOTES</b></td>
3073
     *                              <td>Will leave both double and single quotes unconverted.</td>
3074
     *                              </tr>
3075
     *                              <tr valign="top">
3076
     *                              <td><b>ENT_IGNORE</b></td>
3077
     *                              <td>
3078
     *                              Silently discard invalid code unit sequences instead of returning
3079
     *                              an empty string. Using this flag is discouraged as it
3080
     *                              may have security implications.
3081
     *                              </td>
3082
     *                              </tr>
3083
     *                              <tr valign="top">
3084
     *                              <td><b>ENT_SUBSTITUTE</b></td>
3085
     *                              <td>
3086
     *                              Replace invalid code unit sequences with a Unicode Replacement Character
3087
     *                              U+FFFD (UTF-8) or &#38;#38;#FFFD; (otherwise) instead of returning an empty
3088
     *                              string.
3089
     *                              </td>
3090
     *                              </tr>
3091
     *                              <tr valign="top">
3092
     *                              <td><b>ENT_DISALLOWED</b></td>
3093
     *                              <td>
3094
     *                              Replace invalid code points for the given document type with a
3095
     *                              Unicode Replacement Character U+FFFD (UTF-8) or &#38;#38;#FFFD;
3096
     *                              (otherwise) instead of leaving them as is. This may be useful, for
3097
     *                              instance, to ensure the well-formedness of XML documents with
3098
     *                              embedded external content.
3099
     *                              </td>
3100
     *                              </tr>
3101
     *                              <tr valign="top">
3102
     *                              <td><b>ENT_HTML401</b></td>
3103
     *                              <td>
3104
     *                              Handle code as HTML 4.01.
3105
     *                              </td>
3106
     *                              </tr>
3107
     *                              <tr valign="top">
3108
     *                              <td><b>ENT_XML1</b></td>
3109
     *                              <td>
3110
     *                              Handle code as XML 1.
3111
     *                              </td>
3112
     *                              </tr>
3113
     *                              <tr valign="top">
3114
     *                              <td><b>ENT_XHTML</b></td>
3115
     *                              <td>
3116
     *                              Handle code as XHTML.
3117
     *                              </td>
3118
     *                              </tr>
3119
     *                              <tr valign="top">
3120
     *                              <td><b>ENT_HTML5</b></td>
3121
     *                              <td>
3122
     *                              Handle code as HTML 5.
3123
     *                              </td>
3124
     *                              </tr>
3125
     *                              </table>
3126
     *                              </p>
3127
     * @param string $encoding      [optional] <p>
3128
     *                              Like <b>htmlspecialchars</b>,
3129
     *                              <b>htmlentities</b> takes an optional third argument
3130
     *                              <i>encoding</i> which defines encoding used in
3131
     *                              conversion.
3132
     *                              Although this argument is technically optional, you are highly
3133
     *                              encouraged to specify the correct value for your code.
3134
     *                              </p>
3135
     * @param bool   $double_encode [optional] <p>
3136
     *                              When <i>double_encode</i> is turned off PHP will not
3137
     *                              encode existing html entities. The default is to convert everything.
3138
     *                              </p>
3139
     *
3140
     * @psalm-pure
3141
     *
3142
     * @return string
3143
     *                <p>
3144
     *                The encoded string.
3145
     *                <br><br>
3146
     *                If the input <i>string</i> contains an invalid code unit
3147
     *                sequence within the given <i>encoding</i> an empty string
3148
     *                will be returned, unless either the <b>ENT_IGNORE</b> or
3149
     *                <b>ENT_SUBSTITUTE</b> flags are set.
3150
     *                </p>
3151
     */
3152 9
    public static function htmlentities(
3153
        string $str,
3154
        int $flags = \ENT_COMPAT,
3155
        string $encoding = 'UTF-8',
3156
        bool $double_encode = true
3157
    ): string {
3158 9
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
3159 7
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
3160
        }
3161
3162 9
        $str = \htmlentities(
3163 9
            $str,
3164 9
            $flags,
3165 9
            $encoding,
3166 9
            $double_encode
3167
        );
3168
3169
        /**
3170
         * PHP doesn't replace a backslash to its html entity since this is something
3171
         * that's mostly used to escape characters when inserting in a database. Since
3172
         * we're using a decent database layer, we don't need this shit and we're replacing
3173
         * the double backslashes by its' html entity equivalent.
3174
         *
3175
         * https://github.com/forkcms/library/blob/master/spoon/filter/filter.php#L303
3176
         */
3177 9
        $str = \str_replace('\\', '&#92;', $str);
3178
3179 9
        return self::html_encode($str, true, $encoding);
3180
    }
3181
3182
    /**
3183
     * Convert only special characters to HTML entities: UTF-8 version of htmlspecialchars()
3184
     *
3185
     * INFO: Take a look at "UTF8::htmlentities()"
3186
     *
3187
     * EXAMPLE: <code>UTF8::htmlspecialchars('<白-öäü>'); // '&lt;白-öäü&gt;'</code>
3188
     *
3189
     * @see http://php.net/manual/en/function.htmlspecialchars.php
3190
     *
3191
     * @param string $str           <p>
3192
     *                              The string being converted.
3193
     *                              </p>
3194
     * @param int    $flags         [optional] <p>
3195
     *                              A bitmask of one or more of the following flags, which specify how to handle
3196
     *                              quotes, invalid code unit sequences and the used document type. The default is
3197
     *                              ENT_COMPAT | ENT_HTML401.
3198
     *                              <table>
3199
     *                              Available <i>flags</i> constants
3200
     *                              <tr valign="top">
3201
     *                              <td>Constant Name</td>
3202
     *                              <td>Description</td>
3203
     *                              </tr>
3204
     *                              <tr valign="top">
3205
     *                              <td><b>ENT_COMPAT</b></td>
3206
     *                              <td>Will convert double-quotes and leave single-quotes alone.</td>
3207
     *                              </tr>
3208
     *                              <tr valign="top">
3209
     *                              <td><b>ENT_QUOTES</b></td>
3210
     *                              <td>Will convert both double and single quotes.</td>
3211
     *                              </tr>
3212
     *                              <tr valign="top">
3213
     *                              <td><b>ENT_NOQUOTES</b></td>
3214
     *                              <td>Will leave both double and single quotes unconverted.</td>
3215
     *                              </tr>
3216
     *                              <tr valign="top">
3217
     *                              <td><b>ENT_IGNORE</b></td>
3218
     *                              <td>
3219
     *                              Silently discard invalid code unit sequences instead of returning
3220
     *                              an empty string. Using this flag is discouraged as it
3221
     *                              may have security implications.
3222
     *                              </td>
3223
     *                              </tr>
3224
     *                              <tr valign="top">
3225
     *                              <td><b>ENT_SUBSTITUTE</b></td>
3226
     *                              <td>
3227
     *                              Replace invalid code unit sequences with a Unicode Replacement Character
3228
     *                              U+FFFD (UTF-8) or &#38;#38;#FFFD; (otherwise) instead of returning an empty
3229
     *                              string.
3230
     *                              </td>
3231
     *                              </tr>
3232
     *                              <tr valign="top">
3233
     *                              <td><b>ENT_DISALLOWED</b></td>
3234
     *                              <td>
3235
     *                              Replace invalid code points for the given document type with a
3236
     *                              Unicode Replacement Character U+FFFD (UTF-8) or &#38;#38;#FFFD;
3237
     *                              (otherwise) instead of leaving them as is. This may be useful, for
3238
     *                              instance, to ensure the well-formedness of XML documents with
3239
     *                              embedded external content.
3240
     *                              </td>
3241
     *                              </tr>
3242
     *                              <tr valign="top">
3243
     *                              <td><b>ENT_HTML401</b></td>
3244
     *                              <td>
3245
     *                              Handle code as HTML 4.01.
3246
     *                              </td>
3247
     *                              </tr>
3248
     *                              <tr valign="top">
3249
     *                              <td><b>ENT_XML1</b></td>
3250
     *                              <td>
3251
     *                              Handle code as XML 1.
3252
     *                              </td>
3253
     *                              </tr>
3254
     *                              <tr valign="top">
3255
     *                              <td><b>ENT_XHTML</b></td>
3256
     *                              <td>
3257
     *                              Handle code as XHTML.
3258
     *                              </td>
3259
     *                              </tr>
3260
     *                              <tr valign="top">
3261
     *                              <td><b>ENT_HTML5</b></td>
3262
     *                              <td>
3263
     *                              Handle code as HTML 5.
3264
     *                              </td>
3265
     *                              </tr>
3266
     *                              </table>
3267
     *                              </p>
3268
     * @param string $encoding      [optional] <p>
3269
     *                              Defines encoding used in conversion.
3270
     *                              </p>
3271
     *                              <p>
3272
     *                              For the purposes of this function, the encodings
3273
     *                              ISO-8859-1, ISO-8859-15,
3274
     *                              UTF-8, cp866,
3275
     *                              cp1251, cp1252, and
3276
     *                              KOI8-R are effectively equivalent, provided the
3277
     *                              <i>string</i> itself is valid for the encoding, as
3278
     *                              the characters affected by <b>htmlspecialchars</b> occupy
3279
     *                              the same positions in all of these encodings.
3280
     *                              </p>
3281
     * @param bool   $double_encode [optional] <p>
3282
     *                              When <i>double_encode</i> is turned off PHP will not
3283
     *                              encode existing html entities, the default is to convert everything.
3284
     *                              </p>
3285
     *
3286
     * @psalm-pure
3287
     *
3288
     * @return string the converted string.
3289
     *                </p>
3290
     *                <p>
3291
     *                If the input <i>string</i> contains an invalid code unit
3292
     *                sequence within the given <i>encoding</i> an empty string
3293
     *                will be returned, unless either the <b>ENT_IGNORE</b> or
3294
     *                <b>ENT_SUBSTITUTE</b> flags are set
3295
     */
3296 8
    public static function htmlspecialchars(
3297
        string $str,
3298
        int $flags = \ENT_COMPAT,
3299
        string $encoding = 'UTF-8',
3300
        bool $double_encode = true
3301
    ): string {
3302 8
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
3303 8
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
3304
        }
3305
3306 8
        return \htmlspecialchars(
3307 8
            $str,
3308 8
            $flags,
3309 8
            $encoding,
3310 8
            $double_encode
3311
        );
3312
    }
3313
3314
    /**
3315
     * Checks whether iconv is available on the server.
3316
     *
3317
     * @psalm-pure
3318
     *
3319
     * @return bool
3320
     *              <p><strong>true</strong> if available, <strong>false</strong> otherwise</p>
3321
     *
3322
     * @internal <p>Please do not use it anymore, we will make is private in next major version.</p>
3323
     */
3324
    public static function iconv_loaded(): bool
3325
    {
3326
        return \extension_loaded('iconv');
3327
    }
3328
3329
    /**
3330
     * Converts Integer to hexadecimal U+xxxx code point representation.
3331
     *
3332
     * INFO: opposite to UTF8::hex_to_int()
3333
     *
3334
     * EXAMPLE: <code>UTF8::int_to_hex(241); // 'U+00f1'</code>
3335
     *
3336
     * @param int    $int    <p>The integer to be converted to hexadecimal code point.</p>
3337
     * @param string $prefix [optional]
3338
     *
3339
     * @psalm-pure
3340
     *
3341
     * @return string the code point, or empty string on failure
3342
     */
3343 6
    public static function int_to_hex(int $int, string $prefix = 'U+'): string
3344
    {
3345 6
        $hex = \dechex($int);
3346
3347 6
        $hex = (\strlen($hex) < 4 ? \substr('0000' . $hex, -4) : $hex);
3348
3349 6
        return $prefix . $hex . '';
3350
    }
3351
3352
    /**
3353
     * Checks whether intl-char is available on the server.
3354
     *
3355
     * @psalm-pure
3356
     *
3357
     * @return bool
3358
     *              <p><strong>true</strong> if available, <strong>false</strong> otherwise</p>
3359
     *
3360
     * @internal <p>Please do not use it anymore, we will make is private in next major version.</p>
3361
     */
3362
    public static function intlChar_loaded(): bool
3363
    {
3364
        return \class_exists('IntlChar');
3365
    }
3366
3367
    /**
3368
     * Checks whether intl is available on the server.
3369
     *
3370
     * @psalm-pure
3371
     *
3372
     * @return bool
3373
     *              <p><strong>true</strong> if available, <strong>false</strong> otherwise</p>
3374
     *
3375
     * @internal <p>Please do not use it anymore, we will make is private in next major version.</p>
3376
     */
3377 5
    public static function intl_loaded(): bool
3378
    {
3379 5
        return \extension_loaded('intl');
3380
    }
3381
3382
    /**
3383
     * Returns true if the string contains only alphabetic chars, false otherwise.
3384
     *
3385
     * @param string $str <p>The input string.</p>
3386
     *
3387
     * @psalm-pure
3388
     *
3389
     * @return bool
3390
     *              <p>Whether or not $str contains only alphabetic chars.</p>
3391
     */
3392 10
    public static function is_alpha(string $str): bool
3393
    {
3394 10
        if (self::$SUPPORT['mbstring'] === true) {
3395 10
            return \mb_ereg_match('^[[:alpha:]]*$', $str);
3396
        }
3397
3398
        return self::str_matches_pattern($str, '^[[:alpha:]]*$');
3399
    }
3400
3401
    /**
3402
     * Returns true if the string contains only alphabetic and numeric chars, false otherwise.
3403
     *
3404
     * @param string $str <p>The input string.</p>
3405
     *
3406
     * @psalm-pure
3407
     *
3408
     * @return bool
3409
     *              <p>Whether or not $str contains only alphanumeric chars.</p>
3410
     */
3411 13
    public static function is_alphanumeric(string $str): bool
3412
    {
3413 13
        if (self::$SUPPORT['mbstring'] === true) {
3414 13
            return \mb_ereg_match('^[[:alnum:]]*$', $str);
3415
        }
3416
3417
        return self::str_matches_pattern($str, '^[[:alnum:]]*$');
3418
    }
3419
3420
    /**
3421
     * Returns true if the string contains only punctuation chars, false otherwise.
3422
     *
3423
     * @param string $str <p>The input string.</p>
3424
     *
3425
     * @psalm-pure
3426
     *
3427
     * @return bool
3428
     *              <p>Whether or not $str contains only punctuation chars.</p>
3429
     */
3430 10
    public static function is_punctuation(string $str): bool
3431
    {
3432 10
        return self::str_matches_pattern($str, '^[[:punct:]]*$');
3433
    }
3434
3435
    /**
3436
     * Returns true if the string contains only printable (non-invisible) chars, false otherwise.
3437
     *
3438
     * @param string $str                       <p>The input string.</p>
3439
     * @param bool   $ignore_control_characters [optional] <p>Ignore control characters like [LRM] or [LSEP].</p>
3440
     *
3441
     * @psalm-pure
3442
     *
3443
     * @return bool
3444
     *              <p>Whether or not $str contains only printable (non-invisible) chars.</p>
3445
     */
3446 1
    public static function is_printable(string $str, bool $ignore_control_characters = false): bool
3447
    {
3448 1
        return self::remove_invisible_characters($str, false, '', $ignore_control_characters) === $str;
3449
    }
3450
3451
    /**
3452
     * Checks if a string is 7 bit ASCII.
3453
     *
3454
     * EXAMPLE: <code>UTF8::is_ascii('白'); // false</code>
3455
     *
3456
     * @param string $str <p>The string to check.</p>
3457
     *
3458
     * @psalm-pure
3459
     *
3460
     * @return bool
3461
     *              <p>
3462
     *              <strong>true</strong> if it is ASCII<br>
3463
     *              <strong>false</strong> otherwise
3464
     *              </p>
3465
     */
3466 8
    public static function is_ascii(string $str): bool
3467
    {
3468 8
        return ASCII::is_ascii($str);
3469
    }
3470
3471
    /**
3472
     * Returns true if the string is base64 encoded, false otherwise.
3473
     *
3474
     * EXAMPLE: <code>UTF8::is_base64('4KSu4KWL4KSo4KS/4KSa'); // true</code>
3475
     *
3476
     * @param string|null $str                   <p>The input string.</p>
3477
     * @param bool        $empty_string_is_valid [optional] <p>Is an empty string valid base64 or not?</p>
3478
     *
3479
     * @psalm-pure
3480
     *
3481
     * @return bool
3482
     *              <p>Whether or not $str is base64 encoded.</p>
3483
     */
3484 16
    public static function is_base64($str, bool $empty_string_is_valid = false): bool
3485
    {
3486
        if (
3487 16
            !$empty_string_is_valid
3488
            &&
3489 16
            $str === ''
3490
        ) {
3491 3
            return false;
3492
        }
3493
3494 15
        if (!\is_string($str)) {
3495 2
            return false;
3496
        }
3497
3498 15
        $base64String = \base64_decode($str, true);
3499
3500 15
        return $base64String !== false && \base64_encode($base64String) === $str;
3501
    }
3502
3503
    /**
3504
     * Check if the input is binary... (is look like a hack).
3505
     *
3506
     * EXAMPLE: <code>UTF8::is_binary(01); // true</code>
3507
     *
3508
     * @param int|string $input
3509
     * @param bool       $strict
3510
     *
3511
     * @psalm-pure
3512
     *
3513
     * @return bool
3514
     */
3515 39
    public static function is_binary($input, bool $strict = false): bool
3516
    {
3517 39
        $input = (string) $input;
3518 39
        if ($input === '') {
3519 10
            return false;
3520
        }
3521
3522 39
        if (\preg_match('~^[01]+$~', $input)) {
3523 13
            return true;
3524
        }
3525
3526 39
        $ext = self::get_file_type($input);
3527 39
        if ($ext['type'] === 'binary') {
3528 7
            return true;
3529
        }
3530
3531 38
        if (!$strict) {
3532 7
            $test_length = \strlen($input);
3533 7
            $test_null_counting = \substr_count($input, "\x0", 0, $test_length);
3534 7
            if (($test_null_counting / $test_length) > 0.25) {
3535 5
                return true;
3536
            }
3537
        }
3538
3539 38
        if ($strict) {
3540 38
            if (self::$SUPPORT['finfo'] === false) {
3541
                throw new \RuntimeException('ext-fileinfo: is not installed');
3542
            }
3543
3544
            /**
3545
             * @psalm-suppress ImpureMethodCall - it will return the same result for the same file ...
3546
             */
3547 38
            $finfo_encoding = (new \finfo(\FILEINFO_MIME_ENCODING))->buffer($input);
3548 38
            if ($finfo_encoding && $finfo_encoding === 'binary') {
3549 20
                return true;
3550
            }
3551
        }
3552
3553 33
        return false;
3554
    }
3555
3556
    /**
3557
     * Check if the file is binary.
3558
     *
3559
     * EXAMPLE: <code>UTF8::is_binary('./utf32.txt'); // true</code>
3560
     *
3561
     * @param string $file
3562
     *
3563
     * @return bool
3564
     */
3565 6
    public static function is_binary_file($file): bool
3566
    {
3567
        // init
3568 6
        $block = '';
3569
3570 6
        $fp = \fopen($file, 'rb');
3571 6
        if (\is_resource($fp)) {
3572 6
            $block = \fread($fp, 512);
3573 6
            \fclose($fp);
3574
        }
3575
3576 6
        if ($block === '' || $block === false) {
3577 2
            return false;
3578
        }
3579
3580 6
        return self::is_binary($block, true);
3581
    }
3582
3583
    /**
3584
     * Returns true if the string contains only whitespace chars, false otherwise.
3585
     *
3586
     * @param string $str <p>The input string.</p>
3587
     *
3588
     * @psalm-pure
3589
     *
3590
     * @return bool
3591
     *              <p>Whether or not $str contains only whitespace characters.</p>
3592
     */
3593 15
    public static function is_blank(string $str): bool
3594
    {
3595 15
        if (self::$SUPPORT['mbstring'] === true) {
3596 15
            return \mb_ereg_match('^[[:space:]]*$', $str);
3597
        }
3598
3599
        return self::str_matches_pattern($str, '^[[:space:]]*$');
3600
    }
3601
3602
    /**
3603
     * Checks if the given string is equal to any "Byte Order Mark".
3604
     *
3605
     * WARNING: Use "UTF8::string_has_bom()" if you will check BOM in a string.
3606
     *
3607
     * EXAMPLE: <code>UTF8::is_bom("\xef\xbb\xbf"); // true</code>
3608
     *
3609
     * @param string $str <p>The input string.</p>
3610
     *
3611
     * @psalm-pure
3612
     *
3613
     * @return bool
3614
     *              <p><strong>true</strong> if the $utf8_chr is Byte Order Mark, <strong>false</strong> otherwise.</p>
3615
     */
3616 2
    public static function is_bom($str): bool
3617
    {
3618
        /** @noinspection PhpUnusedLocalVariableInspection */
3619 2
        foreach (self::$BOM as $bom_string => &$bom_byte_length) {
3620 2
            if ($str === $bom_string) {
3621 2
                return true;
3622
            }
3623
        }
3624
3625 2
        return false;
3626
    }
3627
3628
    /**
3629
     * Determine whether the string is considered to be empty.
3630
     *
3631
     * A variable is considered empty if it does not exist or if its value equals FALSE.
3632
     * empty() does not generate a warning if the variable does not exist.
3633
     *
3634
     * @param array|float|int|string $str
3635
     *
3636
     * @psalm-pure
3637
     *
3638
     * @return bool
3639
     *              <p>Whether or not $str is empty().</p>
3640
     */
3641 1
    public static function is_empty($str): bool
3642
    {
3643 1
        return empty($str);
3644
    }
3645
3646
    /**
3647
     * Returns true if the string contains only hexadecimal chars, false otherwise.
3648
     *
3649
     * @param string $str <p>The input string.</p>
3650
     *
3651
     * @psalm-pure
3652
     *
3653
     * @return bool
3654
     *              <p>Whether or not $str contains only hexadecimal chars.</p>
3655
     */
3656 13
    public static function is_hexadecimal(string $str): bool
3657
    {
3658 13
        if (self::$SUPPORT['mbstring'] === true) {
3659 13
            return \mb_ereg_match('^[[:xdigit:]]*$', $str);
3660
        }
3661
3662
        return self::str_matches_pattern($str, '^[[:xdigit:]]*$');
3663
    }
3664
3665
    /**
3666
     * Check if the string contains any HTML tags.
3667
     *
3668
     * EXAMPLE: <code>UTF8::is_html('<b>lall</b>'); // true</code>
3669
     *
3670
     * @param string $str <p>The input string.</p>
3671
     *
3672
     * @psalm-pure
3673
     *
3674
     * @return bool
3675
     *              <p>Whether or not $str contains html elements.</p>
3676
     */
3677 3
    public static function is_html(string $str): bool
3678
    {
3679 3
        if ($str === '') {
3680 3
            return false;
3681
        }
3682
3683
        // init
3684 3
        $matches = [];
3685
3686 3
        $str = self::emoji_encode($str); // hack for emoji support :/
3687
3688 3
        \preg_match("/<\\/?\\w+(?:(?:\\s+\\w+(?:\\s*=\\s*(?:\".*?\"|'.*?'|[^'\">\\s]+))?)*\\s*|\\s*)\\/?>/u", $str, $matches);
3689
3690 3
        return $matches !== [];
3691
    }
3692
3693
    /**
3694
     * Check if $url is an correct url.
3695
     *
3696
     * @param string $url
3697
     * @param bool   $disallow_localhost
3698
     *
3699
     * @psalm-pure
3700
     *
3701
     * @return bool
3702
     */
3703 1
    public static function is_url(string $url, bool $disallow_localhost = false): bool
3704
    {
3705 1
        if ($url === '') {
3706 1
            return false;
3707
        }
3708
3709
        // WARNING: keep this as hack protection
3710 1
        if (!self::str_istarts_with_any($url, ['http://', 'https://'])) {
3711 1
            return false;
3712
        }
3713
3714
        // e.g. -> the server itself connect to "https://foo.localhost/phpmyadmin/...
3715 1
        if ($disallow_localhost) {
3716 1
            if (self::str_istarts_with_any(
3717 1
                $url,
3718
                [
3719 1
                    'http://localhost',
3720
                    'https://localhost',
3721
                    'http://127.0.0.1',
3722
                    'https://127.0.0.1',
3723
                    'http://::1',
3724
                    'https://::1',
3725
                ]
3726
            )) {
3727 1
                return false;
3728
            }
3729
3730 1
            $regex = '/^(?:http(?:s)?:\/\/).*?(?:\.localhost)/iu';
3731 1
            if (\preg_match($regex, $url)) {
3732 1
                return false;
3733
            }
3734
        }
3735
3736
        // INFO: this is needed for e.g. "http://müller.de/" (internationalized domain names) and non ASCII-parameters
3737 1
        $regex = '/^(?:http(?:s)?:\\/\\/)(?:[\p{L}0-9][\p{L}0-9_-]*(?:\\.[\p{L}0-9][\p{L}0-9_-]*))(?:\\d+)?(?:\\/\\.*)?/iu';
3738 1
        if (\preg_match($regex, $url)) {
3739 1
            return true;
3740
        }
3741
3742 1
        return \filter_var($url, \FILTER_VALIDATE_URL) !== false;
3743
    }
3744
3745
    /**
3746
     * Try to check if "$str" is a JSON-string.
3747
     *
3748
     * EXAMPLE: <code>UTF8::is_json('{"array":[1,"¥","ä"]}'); // true</code>
3749
     *
3750
     * @param string $str                                    <p>The input string.</p>
3751
     * @param bool   $only_array_or_object_results_are_valid [optional] <p>Only array and objects are valid json
3752
     *                                                       results.</p>
3753
     *
3754
     * @return bool
3755
     *              <p>Whether or not the $str is in JSON format.</p>
3756
     */
3757 42
    public static function is_json(string $str, bool $only_array_or_object_results_are_valid = true): bool
3758
    {
3759 42
        if ($str === '') {
3760 4
            return false;
3761
        }
3762
3763 40
        if (self::$SUPPORT['json'] === false) {
3764
            throw new \RuntimeException('ext-json: is not installed');
3765
        }
3766
3767 40
        $jsonOrNull = self::json_decode($str);
3768 40
        if ($jsonOrNull === null && \strtoupper($str) !== 'NULL') {
3769 18
            return false;
3770
        }
3771
3772
        if (
3773 24
            $only_array_or_object_results_are_valid
3774
            &&
3775 24
            !\is_object($jsonOrNull)
3776
            &&
3777 24
            !\is_array($jsonOrNull)
3778
        ) {
3779 5
            return false;
3780
        }
3781
3782 19
        return \json_last_error() === \JSON_ERROR_NONE;
3783
    }
3784
3785
    /**
3786
     * @param string $str <p>The input string.</p>
3787
     *
3788
     * @psalm-pure
3789
     *
3790
     * @return bool
3791
     *              <p>Whether or not $str contains only lowercase chars.</p>
3792
     */
3793 8
    public static function is_lowercase(string $str): bool
3794
    {
3795 8
        if (self::$SUPPORT['mbstring'] === true) {
3796 8
            return \mb_ereg_match('^[[:lower:]]*$', $str);
3797
        }
3798
3799
        return self::str_matches_pattern($str, '^[[:lower:]]*$');
3800
    }
3801
3802
    /**
3803
     * Returns true if the string is serialized, false otherwise.
3804
     *
3805
     * @param string $str <p>The input string.</p>
3806
     *
3807
     * @psalm-pure
3808
     *
3809
     * @return bool
3810
     *              <p>Whether or not $str is serialized.</p>
3811
     */
3812 7
    public static function is_serialized(string $str): bool
3813
    {
3814 7
        if ($str === '') {
3815 1
            return false;
3816
        }
3817
3818
        /** @noinspection PhpUsageOfSilenceOperatorInspection */
3819
        /** @noinspection UnserializeExploitsInspection */
3820 6
        return $str === 'b:0;'
3821
               ||
3822 6
               @\unserialize($str, []) !== false;
3823
    }
3824
3825
    /**
3826
     * Returns true if the string contains only lower case chars, false
3827
     * otherwise.
3828
     *
3829
     * @param string $str <p>The input string.</p>
3830
     *
3831
     * @psalm-pure
3832
     *
3833
     * @return bool
3834
     *              <p>Whether or not $str contains only lower case characters.</p>
3835
     */
3836 8
    public static function is_uppercase(string $str): bool
3837
    {
3838 8
        if (self::$SUPPORT['mbstring'] === true) {
3839 8
            return \mb_ereg_match('^[[:upper:]]*$', $str);
3840
        }
3841
3842
        return self::str_matches_pattern($str, '^[[:upper:]]*$');
3843
    }
3844
3845
    /**
3846
     * Check if the string is UTF-16.
3847
     *
3848
     * EXAMPLE: <code>
3849
     * UTF8::is_utf16(file_get_contents('utf-16-le.txt')); // 1
3850
     * //
3851
     * UTF8::is_utf16(file_get_contents('utf-16-be.txt')); // 2
3852
     * //
3853
     * UTF8::is_utf16(file_get_contents('utf-8.txt')); // false
3854
     * </code>
3855
     *
3856
     * @param string $str                       <p>The input string.</p>
3857
     * @param bool   $check_if_string_is_binary
3858
     *
3859
     * @psalm-pure
3860
     *
3861
     * @return false|int
3862
     *                   <strong>false</strong> if is't not UTF-16,<br>
3863
     *                   <strong>1</strong> for UTF-16LE,<br>
3864
     *                   <strong>2</strong> for UTF-16BE
3865
     */
3866 21
    public static function is_utf16($str, bool $check_if_string_is_binary = true)
3867
    {
3868
        // init
3869 21
        $str = (string) $str;
3870 21
        $str_chars = [];
3871
3872
        // fix for the "binary"-check
3873 21
        if ($check_if_string_is_binary !== false && self::string_has_bom($str)) {
3874 2
            $check_if_string_is_binary = false;
3875
        }
3876
3877
        if (
3878 21
            $check_if_string_is_binary
3879
            &&
3880 21
            !self::is_binary($str, true)
3881
        ) {
3882 2
            return false;
3883
        }
3884
3885 21
        if (self::$SUPPORT['mbstring'] === false) {
3886
            /**
3887
             * @psalm-suppress ImpureFunctionCall - is is only a warning
3888
             */
3889 3
            \trigger_error('UTF8::is_utf16() without mbstring may did not work correctly', \E_USER_WARNING);
3890
        }
3891
3892 21
        $str = self::remove_bom($str);
3893
3894 21
        $maybe_utf16le = 0;
3895 21
        $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-16LE');
3896 21
        if ($test) {
3897 15
            $test2 = \mb_convert_encoding($test, 'UTF-16LE', 'UTF-8');
3898 15
            $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-16LE');
3899 15
            if ($test3 === $test) {
3900
                /**
3901
                 * @psalm-suppress RedundantCondition
3902
                 */
3903 15
                if ($str_chars === []) {
3904 15
                    $str_chars = self::count_chars($str, true, false);
3905
                }
3906 15
                foreach (self::count_chars($test3) as $test3char => &$test3charEmpty) {
0 ignored issues
show
Bug introduced by
It seems like $test3 can also be of type array; however, parameter $str of voku\helper\UTF8::count_chars() does only seem to accept string, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

3906
                foreach (self::count_chars(/** @scrutinizer ignore-type */ $test3) as $test3char => &$test3charEmpty) {
Loading history...
Bug introduced by
The expression self::count_chars($test3) cannot be used as a reference.

Let?s assume that you have the following foreach statement:

foreach ($array as &$itemValue) { }

$itemValue is assigned by reference. This is possible because the expression (in the example $array) can be used as a reference target.

However, if we were to replace $array with something different like the result of a function call as in

foreach (getArray() as &$itemValue) { }

then assigning by reference is not possible anymore as there is no target that could be modified.

Available Fixes

1. Do not assign by reference
foreach (getArray() as $itemValue) { }
2. Assign to a local variable first
$array = getArray();
foreach ($array as &$itemValue) {}
3. Return a reference
function &getArray() { $array = array(); return $array; }

foreach (getArray() as &$itemValue) { }
Loading history...
3907 15
                    if (\in_array($test3char, $str_chars, true)) {
3908 15
                        ++$maybe_utf16le;
3909
                    }
3910
                }
3911 15
                unset($test3charEmpty);
3912
            }
3913
        }
3914
3915 21
        $maybe_utf16be = 0;
3916 21
        $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-16BE');
3917 21
        if ($test) {
3918 15
            $test2 = \mb_convert_encoding($test, 'UTF-16BE', 'UTF-8');
3919 15
            $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-16BE');
3920 15
            if ($test3 === $test) {
3921 15
                if ($str_chars === []) {
3922 7
                    $str_chars = self::count_chars($str, true, false);
3923
                }
3924 15
                foreach (self::count_chars($test3) as $test3char => &$test3charEmpty) {
0 ignored issues
show
Bug introduced by
The expression self::count_chars($test3) cannot be used as a reference.

Let?s assume that you have the following foreach statement:

foreach ($array as &$itemValue) { }

$itemValue is assigned by reference. This is possible because the expression (in the example $array) can be used as a reference target.

However, if we were to replace $array with something different like the result of a function call as in

foreach (getArray() as &$itemValue) { }

then assigning by reference is not possible anymore as there is no target that could be modified.

Available Fixes

1. Do not assign by reference
foreach (getArray() as $itemValue) { }
2. Assign to a local variable first
$array = getArray();
foreach ($array as &$itemValue) {}
3. Return a reference
function &getArray() { $array = array(); return $array; }

foreach (getArray() as &$itemValue) { }
Loading history...
3925 15
                    if (\in_array($test3char, $str_chars, true)) {
3926 15
                        ++$maybe_utf16be;
3927
                    }
3928
                }
3929 15
                unset($test3charEmpty);
3930
            }
3931
        }
3932
3933 21
        if ($maybe_utf16be !== $maybe_utf16le) {
3934 7
            if ($maybe_utf16le > $maybe_utf16be) {
3935 5
                return 1;
3936
            }
3937
3938 6
            return 2;
3939
        }
3940
3941 17
        return false;
3942
    }
3943
3944
    /**
3945
     * Check if the string is UTF-32.
3946
     *
3947
     * EXAMPLE: <code>
3948
     * UTF8::is_utf32(file_get_contents('utf-32-le.txt')); // 1
3949
     * //
3950
     * UTF8::is_utf32(file_get_contents('utf-32-be.txt')); // 2
3951
     * //
3952
     * UTF8::is_utf32(file_get_contents('utf-8.txt')); // false
3953
     * </code>
3954
     *
3955
     * @param string $str                       <p>The input string.</p>
3956
     * @param bool   $check_if_string_is_binary
3957
     *
3958
     * @psalm-pure
3959
     *
3960
     * @return false|int
3961
     *                   <strong>false</strong> if is't not UTF-32,<br>
3962
     *                   <strong>1</strong> for UTF-32LE,<br>
3963
     *                   <strong>2</strong> for UTF-32BE
3964
     */
3965 19
    public static function is_utf32($str, bool $check_if_string_is_binary = true)
3966
    {
3967
        // init
3968 19
        $str = (string) $str;
3969 19
        $str_chars = [];
3970
3971
        // fix for the "binary"-check
3972 19
        if ($check_if_string_is_binary !== false && self::string_has_bom($str)) {
3973 2
            $check_if_string_is_binary = false;
3974
        }
3975
3976
        if (
3977 19
            $check_if_string_is_binary
3978
            &&
3979 19
            !self::is_binary($str, true)
3980
        ) {
3981 2
            return false;
3982
        }
3983
3984 19
        if (self::$SUPPORT['mbstring'] === false) {
3985
            /**
3986
             * @psalm-suppress ImpureFunctionCall - is is only a warning
3987
             */
3988 3
            \trigger_error('UTF8::is_utf32() without mbstring may did not work correctly', \E_USER_WARNING);
3989
        }
3990
3991 19
        $str = self::remove_bom($str);
3992
3993 19
        $maybe_utf32le = 0;
3994 19
        $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-32LE');
3995 19
        if ($test) {
3996 13
            $test2 = \mb_convert_encoding($test, 'UTF-32LE', 'UTF-8');
3997 13
            $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-32LE');
3998 13
            if ($test3 === $test) {
3999
                /**
4000
                 * @psalm-suppress RedundantCondition
4001
                 */
4002 13
                if ($str_chars === []) {
4003 13
                    $str_chars = self::count_chars($str, true, false);
4004
                }
4005 13
                foreach (self::count_chars($test3) as $test3char => &$test3charEmpty) {
0 ignored issues
show
Bug introduced by
The expression self::count_chars($test3) cannot be used as a reference.

Let?s assume that you have the following foreach statement:

foreach ($array as &$itemValue) { }

$itemValue is assigned by reference. This is possible because the expression (in the example $array) can be used as a reference target.

However, if we were to replace $array with something different like the result of a function call as in

foreach (getArray() as &$itemValue) { }

then assigning by reference is not possible anymore as there is no target that could be modified.

Available Fixes

1. Do not assign by reference
foreach (getArray() as $itemValue) { }
2. Assign to a local variable first
$array = getArray();
foreach ($array as &$itemValue) {}
3. Return a reference
function &getArray() { $array = array(); return $array; }

foreach (getArray() as &$itemValue) { }
Loading history...
Bug introduced by
It seems like $test3 can also be of type array; however, parameter $str of voku\helper\UTF8::count_chars() does only seem to accept string, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

4005
                foreach (self::count_chars(/** @scrutinizer ignore-type */ $test3) as $test3char => &$test3charEmpty) {
Loading history...
4006 13
                    if (\in_array($test3char, $str_chars, true)) {
4007 13
                        ++$maybe_utf32le;
4008
                    }
4009
                }
4010 13
                unset($test3charEmpty);
4011
            }
4012
        }
4013
4014 19
        $maybe_utf32be = 0;
4015 19
        $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-32BE');
4016 19
        if ($test) {
4017 13
            $test2 = \mb_convert_encoding($test, 'UTF-32BE', 'UTF-8');
4018 13
            $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-32BE');
4019 13
            if ($test3 === $test) {
4020 13
                if ($str_chars === []) {
4021 7
                    $str_chars = self::count_chars($str, true, false);
4022
                }
4023 13
                foreach (self::count_chars($test3) as $test3char => &$test3charEmpty) {
0 ignored issues
show
Bug introduced by
The expression self::count_chars($test3) cannot be used as a reference.

Let?s assume that you have the following foreach statement:

foreach ($array as &$itemValue) { }

$itemValue is assigned by reference. This is possible because the expression (in the example $array) can be used as a reference target.

However, if we were to replace $array with something different like the result of a function call as in

foreach (getArray() as &$itemValue) { }

then assigning by reference is not possible anymore as there is no target that could be modified.

Available Fixes

1. Do not assign by reference
foreach (getArray() as $itemValue) { }
2. Assign to a local variable first
$array = getArray();
foreach ($array as &$itemValue) {}
3. Return a reference
function &getArray() { $array = array(); return $array; }

foreach (getArray() as &$itemValue) { }
Loading history...
4024 13
                    if (\in_array($test3char, $str_chars, true)) {
4025 13
                        ++$maybe_utf32be;
4026
                    }
4027
                }
4028 13
                unset($test3charEmpty);
4029
            }
4030
        }
4031
4032 19
        if ($maybe_utf32be !== $maybe_utf32le) {
4033 3
            if ($maybe_utf32le > $maybe_utf32be) {
4034 2
                return 1;
4035
            }
4036
4037 3
            return 2;
4038
        }
4039
4040 19
        return false;
4041
    }
4042
4043
    /**
4044
     * Checks whether the passed input contains only byte sequences that appear valid UTF-8.
4045
     *
4046
     * EXAMPLE: <code>
4047
     * UTF8::is_utf8(['Iñtërnâtiônàlizætiøn', 'foo']); // true
4048
     * //
4049
     * UTF8::is_utf8(["Iñtërnâtiônàlizætiøn\xA0\xA1", 'bar']); // false
4050
     * </code>
4051
     *
4052
     * @param int|string|string[]|null $str    <p>The input to be checked.</p>
4053
     * @param bool                     $strict <p>Check also if the string is not UTF-16 or UTF-32.</p>
4054
     *
4055
     * @psalm-pure
4056
     *
4057
     * @return bool
4058
     */
4059 83
    public static function is_utf8($str, bool $strict = false): bool
4060
    {
4061 83
        if (\is_array($str)) {
4062 2
            foreach ($str as &$v) {
4063 2
                if (!self::is_utf8($v, $strict)) {
4064 2
                    return false;
4065
                }
4066
            }
4067
4068
            return true;
4069
        }
4070
4071 83
        return self::is_utf8_string((string) $str, $strict);
4072
    }
4073
4074
    /**
4075
     * (PHP 5 &gt;= 5.2.0, PECL json &gt;= 1.2.0)<br/>
4076
     * Decodes a JSON string
4077
     *
4078
     * EXAMPLE: <code>UTF8::json_decode('[1,"\u00a5","\u00e4"]'); // array(1, '¥', 'ä')</code>
4079
     *
4080
     * @see http://php.net/manual/en/function.json-decode.php
4081
     *
4082
     * @param string $json    <p>
4083
     *                        The <i>json</i> string being decoded.
4084
     *                        </p>
4085
     *                        <p>
4086
     *                        This function only works with UTF-8 encoded strings.
4087
     *                        </p>
4088
     *                        <p>PHP implements a superset of
4089
     *                        JSON - it will also encode and decode scalar types and <b>NULL</b>. The JSON standard
4090
     *                        only supports these values when they are nested inside an array or an object.
4091
     *                        </p>
4092
     * @param bool   $assoc   [optional] <p>
4093
     *                        When <b>TRUE</b>, returned objects will be converted into
4094
     *                        associative arrays.
4095
     *                        </p>
4096
     * @param int    $depth   [optional] <p>
4097
     *                        User specified recursion depth.
4098
     *                        </p>
4099
     * @param int    $options [optional] <p>
4100
     *                        Bitmask of JSON decode options. Currently only
4101
     *                        <b>JSON_BIGINT_AS_STRING</b>
4102
     *                        is supported (default is to cast large integers as floats)
4103
     *                        </p>
4104
     *
4105
     * @psalm-pure
4106
     *
4107
     * @return mixed
4108
     *               <p>The value encoded in <i>json</i> in appropriate PHP type. Values true, false and
4109
     *               null (case-insensitive) are returned as <b>TRUE</b>, <b>FALSE</b> and <b>NULL</b> respectively.
4110
     *               <b>NULL</b> is returned if the <i>json</i> cannot be decoded or if the encoded data
4111
     *               is deeper than the recursion limit.</p>
4112
     */
4113 43
    public static function json_decode(
4114
        string $json,
4115
        bool $assoc = false,
4116
        int $depth = 512,
4117
        int $options = 0
4118
    ) {
4119 43
        $json = self::filter($json);
4120
4121 43
        if (self::$SUPPORT['json'] === false) {
4122
            throw new \RuntimeException('ext-json: is not installed');
4123
        }
4124
4125 43
        if ($depth < 1) {
4126
            $depth = 1;
4127
        }
4128
4129 43
        return \json_decode($json, $assoc, $depth, $options);
4130
    }
4131
4132
    /**
4133
     * (PHP 5 &gt;= 5.2.0, PECL json &gt;= 1.2.0)<br/>
4134
     * Returns the JSON representation of a value.
4135
     *
4136
     * EXAMPLE: <code>UTF8::json_enocde(array(1, '¥', 'ä')); // '[1,"\u00a5","\u00e4"]'</code>
4137
     *
4138
     * @see http://php.net/manual/en/function.json-encode.php
4139
     *
4140
     * @param mixed $value   <p>
4141
     *                       The <i>value</i> being encoded. Can be any type except
4142
     *                       a resource.
4143
     *                       </p>
4144
     *                       <p>
4145
     *                       All string data must be UTF-8 encoded.
4146
     *                       </p>
4147
     *                       <p>PHP implements a superset of
4148
     *                       JSON - it will also encode and decode scalar types and <b>NULL</b>. The JSON standard
4149
     *                       only supports these values when they are nested inside an array or an object.
4150
     *                       </p>
4151
     * @param int   $options [optional] <p>
4152
     *                       Bitmask consisting of <b>JSON_HEX_QUOT</b>,
4153
     *                       <b>JSON_HEX_TAG</b>,
4154
     *                       <b>JSON_HEX_AMP</b>,
4155
     *                       <b>JSON_HEX_APOS</b>,
4156
     *                       <b>JSON_NUMERIC_CHECK</b>,
4157
     *                       <b>JSON_PRETTY_PRINT</b>,
4158
     *                       <b>JSON_UNESCAPED_SLASHES</b>,
4159
     *                       <b>JSON_FORCE_OBJECT</b>,
4160
     *                       <b>JSON_UNESCAPED_UNICODE</b>. The behaviour of these
4161
     *                       constants is described on
4162
     *                       the JSON constants page.
4163
     *                       </p>
4164
     * @param int   $depth   [optional] <p>
4165
     *                       Set the maximum depth. Must be greater than zero.
4166
     *                       </p>
4167
     *
4168
     * @psalm-pure
4169
     *
4170
     * @return false|string
4171
     *                      A JSON encoded <strong>string</strong> on success or<br>
4172
     *                      <strong>FALSE</strong> on failure
4173
     */
4174 5
    public static function json_encode($value, int $options = 0, int $depth = 512)
4175
    {
4176 5
        $value = self::filter($value);
4177
4178 5
        if (self::$SUPPORT['json'] === false) {
4179
            throw new \RuntimeException('ext-json: is not installed');
4180
        }
4181
4182 5
        if ($depth < 1) {
4183
            $depth = 1;
4184
        }
4185
4186 5
        return \json_encode($value, $options, $depth);
4187
    }
4188
4189
    /**
4190
     * Checks whether JSON is available on the server.
4191
     *
4192
     * @psalm-pure
4193
     *
4194
     * @return bool
4195
     *              <p><strong>true</strong> if available, <strong>false</strong> otherwise</p>
4196
     *
4197
     * @internal <p>Please do not use it anymore, we will make is private in next major version.</p>
4198
     */
4199
    public static function json_loaded(): bool
4200
    {
4201
        return \function_exists('json_decode');
4202
    }
4203
4204
    /**
4205
     * Makes string's first char lowercase.
4206
     *
4207
     * EXAMPLE: <code>UTF8::lcfirst('ÑTËRNÂTIÔNÀLIZÆTIØN'); // ñTËRNÂTIÔNÀLIZÆTIØN</code>
4208
     *
4209
     * @param string      $str                           <p>The input string</p>
4210
     * @param string      $encoding                      [optional] <p>Set the charset for e.g. "mb_" function</p>
4211
     * @param bool        $clean_utf8                    [optional] <p>Remove non UTF-8 chars from the string.</p>
4212
     * @param string|null $lang                          [optional] <p>Set the language for special cases: az, el, lt,
4213
     *                                                   tr</p>
4214
     * @param bool        $try_to_keep_the_string_length [optional] <p>true === try to keep the string length: e.g. ẞ
4215
     *                                                   -> ß</p>
4216
     *
4217
     * @psalm-pure
4218
     *
4219
     * @return string the resulting string
4220
     */
4221 46
    public static function lcfirst(
4222
        string $str,
4223
        string $encoding = 'UTF-8',
4224
        bool $clean_utf8 = false,
4225
        string $lang = null,
4226
        bool $try_to_keep_the_string_length = false
4227
    ): string {
4228 46
        if ($clean_utf8) {
4229
            $str = self::clean($str);
4230
        }
4231
4232 46
        $use_mb_functions = ($lang === null && !$try_to_keep_the_string_length);
4233
4234 46
        if ($encoding === 'UTF-8') {
4235 43
            $str_part_two = (string) \mb_substr($str, 1);
4236
4237 43
            if ($use_mb_functions) {
4238 43
                $str_part_one = \mb_strtolower(
4239 43
                    (string) \mb_substr($str, 0, 1)
4240
                );
4241
            } else {
4242
                $str_part_one = self::strtolower(
4243
                    (string) \mb_substr($str, 0, 1),
4244
                    $encoding,
4245
                    false,
4246
                    $lang,
4247 43
                    $try_to_keep_the_string_length
4248
                );
4249
            }
4250
        } else {
4251 3
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
4252
4253 3
            $str_part_two = (string) self::substr($str, 1, null, $encoding);
4254
4255 3
            $str_part_one = self::strtolower(
4256 3
                (string) self::substr($str, 0, 1, $encoding),
4257 3
                $encoding,
4258 3
                false,
4259 3
                $lang,
4260 3
                $try_to_keep_the_string_length
4261
            );
4262
        }
4263
4264 46
        return $str_part_one . $str_part_two;
4265
    }
4266
4267
    /**
4268
     * Lowercase for all words in the string.
4269
     *
4270
     * @param string      $str                           <p>The input string.</p>
4271
     * @param string[]    $exceptions                    [optional] <p>Exclusion for some words.</p>
4272
     * @param string      $char_list                     [optional] <p>Additional chars that contains to words and do
4273
     *                                                   not start a new word.</p>
4274
     * @param string      $encoding                      [optional] <p>Set the charset.</p>
4275
     * @param bool        $clean_utf8                    [optional] <p>Remove non UTF-8 chars from the string.</p>
4276
     * @param string|null $lang                          [optional] <p>Set the language for special cases: az, el, lt,
4277
     *                                                   tr</p>
4278
     * @param bool        $try_to_keep_the_string_length [optional] <p>true === try to keep the string length: e.g. ẞ
4279
     *                                                   -> ß</p>
4280
     *
4281
     * @psalm-pure
4282
     *
4283
     * @return string
4284
     */
4285 4
    public static function lcwords(
4286
        string $str,
4287
        array $exceptions = [],
4288
        string $char_list = '',
4289
        string $encoding = 'UTF-8',
4290
        bool $clean_utf8 = false,
4291
        string $lang = null,
4292
        bool $try_to_keep_the_string_length = false
4293
    ): string {
4294 4
        if (!$str) {
4295 2
            return '';
4296
        }
4297
4298 4
        $words = self::str_to_words($str, $char_list);
4299 4
        $use_exceptions = $exceptions !== [];
4300
4301 4
        $words_str = '';
4302 4
        foreach ($words as &$word) {
4303 4
            if (!$word) {
4304 4
                continue;
4305
            }
4306
4307
            if (
4308 4
                !$use_exceptions
4309
                ||
4310 4
                !\in_array($word, $exceptions, true)
4311
            ) {
4312 4
                $words_str .= self::lcfirst($word, $encoding, $clean_utf8, $lang, $try_to_keep_the_string_length);
4313
            } else {
4314 4
                $words_str .= $word;
4315
            }
4316
        }
4317
4318 4
        return $words_str;
4319
    }
4320
4321
    /**
4322
     * Strip whitespace or other characters from the beginning of a UTF-8 string.
4323
     *
4324
     * EXAMPLE: <code>UTF8::ltrim(' 中文空白  '); // '中文空白  '</code>
4325
     *
4326
     * @param string      $str   <p>The string to be trimmed</p>
4327
     * @param string|null $chars <p>Optional characters to be stripped</p>
4328
     *
4329
     * @psalm-pure
4330
     *
4331
     * @return string the string with unwanted characters stripped from the left
4332
     */
4333 23
    public static function ltrim(string $str = '', string $chars = null): string
4334
    {
4335 23
        if ($str === '') {
4336 3
            return '';
4337
        }
4338
4339 22
        if (self::$SUPPORT['mbstring'] === true) {
4340 22
            if ($chars !== null) {
4341
                /** @noinspection PregQuoteUsageInspection */
4342 11
                $chars = \preg_quote($chars);
4343 11
                $pattern = "^[${chars}]+";
4344
            } else {
4345 14
                $pattern = '^[\\s]+';
4346
            }
4347
4348 22
            return (string) \mb_ereg_replace($pattern, '', $str);
4349
        }
4350
4351
        if ($chars !== null) {
4352
            $chars = \preg_quote($chars, '/');
4353
            $pattern = "^[${chars}]+";
4354
        } else {
4355
            $pattern = '^[\\s]+';
4356
        }
4357
4358
        return self::regex_replace($str, $pattern, '');
4359
    }
4360
4361
    /**
4362
     * Returns the UTF-8 character with the maximum code point in the given data.
4363
     *
4364
     * EXAMPLE: <code>UTF8::max('abc-äöü-中文空白'); // 'ø'</code>
4365
     *
4366
     * @param array<string>|string $arg <p>A UTF-8 encoded string or an array of such strings.</p>
4367
     *
4368
     * @psalm-pure
4369
     *
4370
     * @return string|null the character with the highest code point than others, returns null on failure or empty input
4371
     */
4372
    public static function max($arg)
4373
    {
4374 2
        if (\is_array($arg)) {
4375 2
            $arg = \implode('', $arg);
4376
        }
4377
4378 2
        $codepoints = self::codepoints($arg);
4379 2
        if ($codepoints === []) {
4380 2
            return null;
4381
        }
4382
4383 2
        $codepoint_max = \max($codepoints);
4384
4385 2
        return self::chr((int) $codepoint_max);
4386
    }
4387
4388
    /**
4389
     * Calculates and returns the maximum number of bytes taken by any
4390
     * UTF-8 encoded character in the given string.
4391
     *
4392
     * EXAMPLE: <code>UTF8::max_chr_width('Intërnâtiônàlizætiøn'); // 2</code>
4393
     *
4394
     * @param string $str <p>The original Unicode string.</p>
4395
     *
4396
     * @psalm-pure
4397
     *
4398
     * @return int
4399
     *             <p>Max byte lengths of the given chars.</p>
4400
     */
4401
    public static function max_chr_width(string $str): int
4402
    {
4403 2
        $bytes = self::chr_size_list($str);
4404 2
        if ($bytes !== []) {
4405 2
            return (int) \max($bytes);
4406
        }
4407
4408 2
        return 0;
4409
    }
4410
4411
    /**
4412
     * Checks whether mbstring is available on the server.
4413
     *
4414
     * @psalm-pure
4415
     *
4416
     * @return bool
4417
     *              <p><strong>true</strong> if available, <strong>false</strong> otherwise</p>
4418
     *
4419
     * @internal <p>Please do not use it anymore, we will make is private in next major version.</p>
4420
     */
4421
    public static function mbstring_loaded(): bool
4422
    {
4423 28
        return \extension_loaded('mbstring');
4424
    }
4425
4426
    /**
4427
     * Returns the UTF-8 character with the minimum code point in the given data.
4428
     *
4429
     * EXAMPLE: <code>UTF8::min('abc-äöü-中文空白'); // '-'</code>
4430
     *
4431
     * @param string|string[] $arg <strong>A UTF-8 encoded string or an array of such strings.</strong>
4432
     *
4433
     * @psalm-pure
4434
     *
4435
     * @return string|null
4436
     *                     <p>The character with the lowest code point than others, returns null on failure or empty input.</p>
4437
     */
4438
    public static function min($arg)
4439
    {
4440 2
        if (\is_array($arg)) {
4441 2
            $arg = \implode('', $arg);
4442
        }
4443
4444 2
        $codepoints = self::codepoints($arg);
4445 2
        if ($codepoints === []) {
4446 2
            return null;
4447
        }
4448
4449 2
        $codepoint_min = \min($codepoints);
4450
4451 2
        return self::chr((int) $codepoint_min);
4452
    }
4453
4454
    /**
4455
     * Normalize the encoding-"name" input.
4456
     *
4457
     * EXAMPLE: <code>UTF8::normalize_encoding('UTF8'); // 'UTF-8'</code>
4458
     *
4459
     * @param mixed $encoding <p>e.g.: ISO, UTF8, WINDOWS-1251 etc.</p>
4460
     * @param mixed $fallback <p>e.g.: UTF-8</p>
4461
     *
4462
     * @psalm-pure
4463
     *
4464
     * @return mixed|string
4465
     *                      <p>e.g.: ISO-8859-1, UTF-8, WINDOWS-1251 etc.<br>Will return a empty string as fallback (by default)</p>
4466
     *
4467
     * @template TNormalizeEncodingFallback
4468
     * @phpstan-param string|TNormalizeEncodingFallback $fallback
4469
     * @phpstan-return string|TNormalizeEncodingFallback
4470
     */
4471
    public static function normalize_encoding($encoding, $fallback = '')
4472
    {
4473
        /**
4474
         * @psalm-suppress ImpureStaticVariable
4475
         *
4476
         * @var array<string,string>
4477
         */
4478 339
        static $STATIC_NORMALIZE_ENCODING_CACHE = [];
4479
4480
        // init
4481 339
        $encoding = (string) $encoding;
4482
4483 339
        if (!$encoding) {
4484 290
            return $fallback;
4485
        }
4486
4487
        if (
4488 53
            $encoding === 'UTF-8'
4489
            ||
4490 53
            $encoding === 'UTF8'
4491
        ) {
4492 29
            return 'UTF-8';
4493
        }
4494
4495
        if (
4496 44
            $encoding === '8BIT'
4497
            ||
4498 44
            $encoding === 'BINARY'
4499
        ) {
4500
            return 'CP850';
4501
        }
4502
4503
        if (
4504 44
            $encoding === 'HTML'
4505
            ||
4506 44
            $encoding === 'HTML-ENTITIES'
4507
        ) {
4508 2
            return 'HTML-ENTITIES';
4509
        }
4510
4511
        if (
4512 44
            $encoding === 'ISO'
4513
            ||
4514 44
            $encoding === 'ISO-8859-1'
4515
        ) {
4516 41
            return 'ISO-8859-1';
4517
        }
4518
4519
        if (
4520 11
            $encoding === '1' // only a fallback, for non "strict_types" usage ...
4521
            ||
4522 11
            $encoding === '0' // only a fallback, for non "strict_types" usage ...
4523
        ) {
4524
            return $fallback;
4525
        }
4526
4527 11
        if (isset($STATIC_NORMALIZE_ENCODING_CACHE[$encoding])) {
4528 8
            return $STATIC_NORMALIZE_ENCODING_CACHE[$encoding];
4529
        }
4530
4531 5
        if (self::$ENCODINGS === null) {
4532 1
            self::$ENCODINGS = self::getData('encodings');
4533
        }
4534
4535 5
        if (\in_array($encoding, self::$ENCODINGS, true)) {
4536 3
            $STATIC_NORMALIZE_ENCODING_CACHE[$encoding] = $encoding;
4537
4538 3
            return $encoding;
4539
        }
4540
4541 4
        $encoding_original = $encoding;
4542 4
        $encoding = \strtoupper($encoding);
4543 4
        $encoding_upper_helper = (string) \preg_replace('/[^a-zA-Z0-9]/u', '', $encoding);
4544
4545
        $equivalences = [
4546 4
            'ISO8859'     => 'ISO-8859-1',
4547
            'ISO88591'    => 'ISO-8859-1',
4548
            'ISO'         => 'ISO-8859-1',
4549
            'LATIN'       => 'ISO-8859-1',
4550
            'LATIN1'      => 'ISO-8859-1', // Western European
4551
            'ISO88592'    => 'ISO-8859-2',
4552
            'LATIN2'      => 'ISO-8859-2', // Central European
4553
            'ISO88593'    => 'ISO-8859-3',
4554
            'LATIN3'      => 'ISO-8859-3', // Southern European
4555
            'ISO88594'    => 'ISO-8859-4',
4556
            'LATIN4'      => 'ISO-8859-4', // Northern European
4557
            'ISO88595'    => 'ISO-8859-5',
4558
            'ISO88596'    => 'ISO-8859-6', // Greek
4559
            'ISO88597'    => 'ISO-8859-7',
4560
            'ISO88598'    => 'ISO-8859-8', // Hebrew
4561
            'ISO88599'    => 'ISO-8859-9',
4562
            'LATIN5'      => 'ISO-8859-9', // Turkish
4563
            'ISO885911'   => 'ISO-8859-11',
4564
            'TIS620'      => 'ISO-8859-11', // Thai
4565
            'ISO885910'   => 'ISO-8859-10',
4566
            'LATIN6'      => 'ISO-8859-10', // Nordic
4567
            'ISO885913'   => 'ISO-8859-13',
4568
            'LATIN7'      => 'ISO-8859-13', // Baltic
4569
            'ISO885914'   => 'ISO-8859-14',
4570
            'LATIN8'      => 'ISO-8859-14', // Celtic
4571
            'ISO885915'   => 'ISO-8859-15',
4572
            'LATIN9'      => 'ISO-8859-15', // Western European (with some extra chars e.g. €)
4573
            'ISO885916'   => 'ISO-8859-16',
4574
            'LATIN10'     => 'ISO-8859-16', // Southeast European
4575
            'CP1250'      => 'WINDOWS-1250',
4576
            'WIN1250'     => 'WINDOWS-1250',
4577
            'WINDOWS1250' => 'WINDOWS-1250',
4578
            'CP1251'      => 'WINDOWS-1251',
4579
            'WIN1251'     => 'WINDOWS-1251',
4580
            'WINDOWS1251' => 'WINDOWS-1251',
4581
            'CP1252'      => 'WINDOWS-1252',
4582
            'WIN1252'     => 'WINDOWS-1252',
4583
            'WINDOWS1252' => 'WINDOWS-1252',
4584
            'CP1253'      => 'WINDOWS-1253',
4585
            'WIN1253'     => 'WINDOWS-1253',
4586
            'WINDOWS1253' => 'WINDOWS-1253',
4587
            'CP1254'      => 'WINDOWS-1254',
4588
            'WIN1254'     => 'WINDOWS-1254',
4589
            'WINDOWS1254' => 'WINDOWS-1254',
4590
            'CP1255'      => 'WINDOWS-1255',
4591
            'WIN1255'     => 'WINDOWS-1255',
4592
            'WINDOWS1255' => 'WINDOWS-1255',
4593
            'CP1256'      => 'WINDOWS-1256',
4594
            'WIN1256'     => 'WINDOWS-1256',
4595
            'WINDOWS1256' => 'WINDOWS-1256',
4596
            'CP1257'      => 'WINDOWS-1257',
4597
            'WIN1257'     => 'WINDOWS-1257',
4598
            'WINDOWS1257' => 'WINDOWS-1257',
4599
            'CP1258'      => 'WINDOWS-1258',
4600
            'WIN1258'     => 'WINDOWS-1258',
4601
            'WINDOWS1258' => 'WINDOWS-1258',
4602
            'UTF16'       => 'UTF-16',
4603
            'UTF32'       => 'UTF-32',
4604
            'UTF8'        => 'UTF-8',
4605
            'UTF'         => 'UTF-8',
4606
            'UTF7'        => 'UTF-7',
4607
            '8BIT'        => 'CP850',
4608
            'BINARY'      => 'CP850',
4609
        ];
4610
4611 4
        if (!empty($equivalences[$encoding_upper_helper])) {
4612 3
            $encoding = $equivalences[$encoding_upper_helper];
4613
        }
4614
4615 4
        $STATIC_NORMALIZE_ENCODING_CACHE[$encoding_original] = $encoding;
4616
4617 4
        return $encoding;
4618
    }
4619
4620
    /**
4621
     * Standardize line ending to unix-like.
4622
     *
4623
     * @param string          $str      <p>The input string.</p>
4624
     * @param string|string[] $replacer <p>The replacer char e.g. "\n" (Linux) or "\r\n" (Windows). You can also use \PHP_EOL
4625
     *                                  here.</p>
4626
     *
4627
     * @psalm-pure
4628
     *
4629
     * @return string
4630
     *                <p>A string with normalized line ending.</p>
4631
     */
4632
    public static function normalize_line_ending(string $str, $replacer = "\n"): string
4633
    {
4634 4
        return \str_replace(["\r\n", "\r", "\n"], $replacer, $str);
4635
    }
4636
4637
    /**
4638
     * Normalize some MS Word special characters.
4639
     *
4640
     * EXAMPLE: <code>UTF8::normalize_msword('„Abcdef…”'); // '"Abcdef..."'</code>
4641
     *
4642
     * @param string $str <p>The string to be normalized.</p>
4643
     *
4644
     * @psalm-pure
4645
     *
4646
     * @return string
4647
     *                <p>A string with normalized characters for commonly used chars in Word documents.</p>
4648
     */
4649
    public static function normalize_msword(string $str): string
4650
    {
4651 10
        return ASCII::normalize_msword($str);
4652
    }
4653
4654
    /**
4655
     * Normalize the whitespace.
4656
     *
4657
     * EXAMPLE: <code>UTF8::normalize_whitespace("abc-\xc2\xa0-öäü-\xe2\x80\xaf-\xE2\x80\xAC", true); // "abc-\xc2\xa0-öäü- -"</code>
4658
     *
4659
     * @param string $str                          <p>The string to be normalized.</p>
4660
     * @param bool   $keep_non_breaking_space      [optional] <p>Set to true, to keep non-breaking-spaces.</p>
4661
     * @param bool   $keep_bidi_unicode_controls   [optional] <p>Set to true, to keep non-printable (for the web)
4662
     *                                             bidirectional text chars.</p>
4663
     * @param bool   $normalize_control_characters [optional] <p>Set to true, to convert e.g. LINE-, PARAGRAPH-SEPARATOR with "\n" and LINE TABULATION with "\t".</p>
4664
     *
4665
     * @psalm-pure
4666
     *
4667
     * @return string
4668
     *                <p>A string with normalized whitespace.</p>
4669
     */
4670
    public static function normalize_whitespace(
4671
        string $str,
4672
        bool $keep_non_breaking_space = false,
4673
        bool $keep_bidi_unicode_controls = false,
4674
        bool $normalize_control_characters = false
4675
    ): string {
4676 61
        return ASCII::normalize_whitespace(
4677 61
            $str,
4678 61
            $keep_non_breaking_space,
4679 61
            $keep_bidi_unicode_controls,
4680 61
            $normalize_control_characters
4681
        );
4682
    }
4683
4684
    /**
4685
     * Calculates Unicode code point of the given UTF-8 encoded character.
4686
     *
4687
     * INFO: opposite to UTF8::chr()
4688
     *
4689
     * EXAMPLE: <code>UTF8::ord('☃'); // 0x2603</code>
4690
     *
4691
     * @param string $chr      <p>The character of which to calculate code point.<p/>
4692
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
4693
     *
4694
     * @psalm-pure
4695
     *
4696
     * @return int
4697
     *             <p>Unicode code point of the given character,<br>
4698
     *             0 on invalid UTF-8 byte sequence</p>
4699
     */
4700
    public static function ord($chr, string $encoding = 'UTF-8'): int
4701
    {
4702
        /**
4703
         * @psalm-suppress ImpureStaticVariable
4704
         *
4705
         * @var array<string,int>
4706
         */
4707 27
        static $CHAR_CACHE = [];
4708
4709
        // init
4710 27
        $chr = (string) $chr;
4711
4712 27
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
4713 5
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
4714
        }
4715
4716 27
        $cache_key = $chr . '_' . $encoding;
4717 27
        if (isset($CHAR_CACHE[$cache_key])) {
4718 27
            return $CHAR_CACHE[$cache_key];
4719
        }
4720
4721
        // check again, if it's still not UTF-8
4722 11
        if ($encoding !== 'UTF-8') {
4723 3
            $chr = self::encode($encoding, $chr);
4724
        }
4725
4726 11
        if (self::$ORD === null) {
4727 1
            self::$ORD = self::getData('ord');
4728
        }
4729
4730 11
        if (isset(self::$ORD[$chr])) {
4731 11
            return $CHAR_CACHE[$cache_key] = self::$ORD[$chr];
4732
        }
4733
4734
        //
4735
        // fallback via "IntlChar"
4736
        //
4737
4738 6
        if (self::$SUPPORT['intlChar'] === true) {
4739 5
            $code = \IntlChar::ord($chr);
4740 5
            if ($code) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $code of type integer|null is loosely compared to true; this is ambiguous if the integer can be 0. You might want to explicitly use !== null instead.

In PHP, under loose comparison (like ==, or !=, or switch conditions), values of different types might be equal.

For integer values, zero is a special case, in particular the following results might be unexpected:

0   == false // true
0   == null  // true
123 == false // false
123 == null  // false

// It is often better to use strict comparison
0 === false // false
0 === null  // false
Loading history...
4741 5
                return $CHAR_CACHE[$cache_key] = $code;
4742
            }
4743
        }
4744
4745
        //
4746
        // fallback via vanilla php
4747
        //
4748
4749 1
        $chr = \unpack('C*', (string) \substr($chr, 0, 4));
4750
        /** @noinspection PhpSillyAssignmentInspection - hack for phpstan */
4751
        /** @var int[] $chr - "unpack": only false if the format string contains errors */
4752 1
        $chr = $chr;
4753 1
        $code = $chr ? $chr[1] : 0;
4754
4755 1
        if ($code >= 0xF0 && isset($chr[4])) {
4756
            return $CHAR_CACHE[$cache_key] = ((($code - 0xF0) << 18) + (($chr[2] - 0x80) << 12) + (($chr[3] - 0x80) << 6) + $chr[4] - 0x80);
4757
        }
4758
4759 1
        if ($code >= 0xE0 && isset($chr[3])) {
4760 1
            return $CHAR_CACHE[$cache_key] = ((($code - 0xE0) << 12) + (($chr[2] - 0x80) << 6) + $chr[3] - 0x80);
4761
        }
4762
4763 1
        if ($code >= 0xC0 && isset($chr[2])) {
4764 1
            return $CHAR_CACHE[$cache_key] = ((($code - 0xC0) << 6) + $chr[2] - 0x80);
4765
        }
4766
4767
        return $CHAR_CACHE[$cache_key] = $code;
4768
    }
4769
4770
    /**
4771
     * Parses the string into an array (into the the second parameter).
4772
     *
4773
     * WARNING: Unlike "parse_str()", this method does not (re-)place variables in the current scope,
4774
     *          if the second parameter is not set!
4775
     *
4776
     * EXAMPLE: <code>
4777
     * UTF8::parse_str('Iñtërnâtiônéàlizætiøn=測試&arr[]=foo+測試&arr[]=ການທົດສອບ', $array);
4778
     * echo $array['Iñtërnâtiônéàlizætiøn']; // '測試'
4779
     * </code>
4780
     *
4781
     * @see http://php.net/manual/en/function.parse-str.php
4782
     *
4783
     * @param string $str        <p>The input string.</p>
4784
     * @param array  $result     <p>The result will be returned into this reference parameter.</p>
4785
     * @param bool   $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
4786
     *
4787
     * @psalm-pure
4788
     *
4789
     * @return bool
4790
     *              <p>Will return <strong>false</strong> if php can't parse the string and we haven't any $result.</p>
4791
     */
4792
    public static function parse_str(string $str, &$result, bool $clean_utf8 = false): bool
4793
    {
4794 2
        if ($clean_utf8) {
4795 2
            $str = self::clean($str);
4796
        }
4797
4798 2
        if (self::$SUPPORT['mbstring'] === true) {
4799 2
            $return = \mb_parse_str($str, $result);
4800
4801 2
            return $return !== false && $result !== [];
4802
        }
4803
4804
        /**
4805
         * @psalm-suppress ImpureFunctionCall - we use the second parameter, so we don't change variables by magic
4806
         */
4807
        \parse_str($str, $result);
4808
4809
        return $result !== [];
4810
    }
4811
4812
    /**
4813
     * Checks if \u modifier is available that enables Unicode support in PCRE.
4814
     *
4815
     * @psalm-pure
4816
     *
4817
     * @return bool
4818
     *              <p>
4819
     *              <strong>true</strong> if support is available,<br>
4820
     *              <strong>false</strong> otherwise
4821
     *              </p>
4822
     */
4823
    public static function pcre_utf8_support(): bool
4824
    {
4825
        /** @noinspection PhpUsageOfSilenceOperatorInspection */
4826
        return (bool) @\preg_match('//u', '');
4827
    }
4828
4829
    /**
4830
     * Create an array containing a range of UTF-8 characters.
4831
     *
4832
     * EXAMPLE: <code>UTF8::range('κ', 'ζ'); // array('κ', 'ι', 'θ', 'η', 'ζ',)</code>
4833
     *
4834
     * @param int|string $var1      <p>Numeric or hexadecimal code points, or a UTF-8 character to start from.</p>
4835
     * @param int|string $var2      <p>Numeric or hexadecimal code points, or a UTF-8 character to end at.</p>
4836
     * @param bool       $use_ctype <p>use ctype to detect numeric and hexadecimal, otherwise we will use a simple
4837
     *                              "is_numeric"</p>
4838
     * @param string     $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
4839
     * @param float|int  $step      [optional] <p>
4840
     *                              If a step value is given, it will be used as the
4841
     *                              increment between elements in the sequence. step
4842
     *                              should be given as a positive number. If not specified,
4843
     *                              step will default to 1.
4844
     *                              </p>
4845
     *
4846
     * @psalm-pure
4847
     *
4848
     * @return string[]
4849
     */
4850
    public static function range(
4851
        $var1,
4852
        $var2,
4853
        bool $use_ctype = true,
4854
        string $encoding = 'UTF-8',
4855
        $step = 1
4856
    ): array {
4857 2
        if (!$var1 || !$var2) {
4858 2
            return [];
4859
        }
4860
4861 2
        if ($step !== 1) {
4862
            /**
4863
             * @psalm-suppress RedundantConditionGivenDocblockType
4864
             * @psalm-suppress DocblockTypeContradiction
4865
             */
4866 1
            if (!\is_numeric($step)) {
0 ignored issues
show
introduced by
The condition is_numeric($step) is always true.
Loading history...
4867
                throw new \InvalidArgumentException('$step need to be a number, type given: ' . \gettype($step));
4868
            }
4869
4870
            /**
4871
             * @psalm-suppress RedundantConditionGivenDocblockType - false-positive from psalm?
4872
             */
4873 1
            if ($step <= 0) {
4874
                throw new \InvalidArgumentException('$step need to be a positive number, given: ' . $step);
4875
            }
4876
        }
4877
4878 2
        if ($use_ctype && self::$SUPPORT['ctype'] === false) {
4879
            throw new \RuntimeException('ext-ctype: is not installed');
4880
        }
4881
4882 2
        $is_digit = false;
4883 2
        $is_xdigit = false;
4884
4885 2
        if ($use_ctype && \ctype_digit((string) $var1) && \ctype_digit((string) $var2)) {
4886 2
            $is_digit = true;
4887 2
            $start = (int) $var1;
4888 2
        } elseif ($use_ctype && \ctype_xdigit($var1) && \ctype_xdigit($var2)) {
4889
            $is_xdigit = true;
4890
            $start = (int) self::hex_to_int((string) $var1);
4891 2
        } elseif (!$use_ctype && \is_numeric($var1)) {
4892 1
            $start = (int) $var1;
4893
        } else {
4894 2
            $start = self::ord((string) $var1);
4895
        }
4896
4897 2
        if (!$start) {
4898
            return [];
4899
        }
4900
4901 2
        if ($is_digit) {
4902 2
            $end = (int) $var2;
4903 2
        } elseif ($is_xdigit) {
4904
            $end = (int) self::hex_to_int((string) $var2);
4905 2
        } elseif (!$use_ctype && \is_numeric($var2)) {
4906 1
            $end = (int) $var2;
4907
        } else {
4908 2
            $end = self::ord((string) $var2);
4909
        }
4910
4911 2
        if (!$end) {
4912
            return [];
4913
        }
4914
4915 2
        $array = [];
4916 2
        foreach (\range($start, $end, $step) as $i) {
4917 2
            $array[] = (string) self::chr((int) $i, $encoding);
4918
        }
4919
4920 2
        return $array;
4921
    }
4922
4923
    /**
4924
     * Multi decode HTML entity + fix urlencoded-win1252-chars.
4925
     *
4926
     * EXAMPLE: <code>UTF8::rawurldecode('tes%20öäü%20\u00edtest+test'); // 'tes öäü ítest+test'</code>
4927
     *
4928
     * e.g:
4929
     * 'test+test'                     => 'test+test'
4930
     * 'D&#252;sseldorf'               => 'Düsseldorf'
4931
     * 'D%FCsseldorf'                  => 'Düsseldorf'
4932
     * 'D&#xFC;sseldorf'               => 'Düsseldorf'
4933
     * 'D%26%23xFC%3Bsseldorf'         => 'Düsseldorf'
4934
     * 'Düsseldorf'                   => 'Düsseldorf'
4935
     * 'D%C3%BCsseldorf'               => 'Düsseldorf'
4936
     * 'D%C3%83%C2%BCsseldorf'         => 'Düsseldorf'
4937
     * 'D%25C3%2583%25C2%25BCsseldorf' => 'Düsseldorf'
4938
     *
4939
     * @param string $str          <p>The input string.</p>
4940
     * @param bool   $multi_decode <p>Decode as often as possible.</p>
4941
     *
4942
     * @psalm-pure
4943
     *
4944
     * @return string
4945
     *                <p>The decoded URL, as a string.</p>
4946
     */
4947
    public static function rawurldecode(string $str, bool $multi_decode = true): string
4948
    {
4949 6
        if ($str === '') {
4950 4
            return '';
4951
        }
4952
4953 6
        $str = self::urldecode_unicode_helper($str);
4954
4955 6
        if ($multi_decode) {
4956
            do {
4957 5
                $str_compare = $str;
4958
4959
                /**
4960
                 * @psalm-suppress PossiblyInvalidArgument
4961
                 */
4962 5
                $str = \rawurldecode(
4963 5
                    self::html_entity_decode(
4964 5
                        self::to_utf8($str),
4965 5
                        \ENT_QUOTES | \ENT_HTML5
4966
                    )
4967
                );
4968 5
            } while ($str_compare !== $str);
4969
        } else {
4970
            /**
4971
             * @psalm-suppress PossiblyInvalidArgument
4972
             */
4973 1
            $str = \rawurldecode(
4974 1
                self::html_entity_decode(
4975 1
                    self::to_utf8($str),
4976 1
                    \ENT_QUOTES | \ENT_HTML5
4977
                )
4978
            );
4979
        }
4980
4981 6
        return self::fix_simple_utf8($str);
4982
    }
4983
4984
    /**
4985
     * Replaces all occurrences of $pattern in $str by $replacement.
4986
     *
4987
     * @param string $str         <p>The input string.</p>
4988
     * @param string $pattern     <p>The regular expression pattern.</p>
4989
     * @param string $replacement <p>The string to replace with.</p>
4990
     * @param string $options     [optional] <p>Matching conditions to be used.</p>
4991
     * @param string $delimiter   [optional] <p>Delimiter the the regex. Default: '/'</p>
4992
     *
4993
     * @psalm-pure
4994
     *
4995
     * @return string
4996
     */
4997
    public static function regex_replace(
4998
        string $str,
4999
        string $pattern,
5000
        string $replacement,
5001
        string $options = '',
5002
        string $delimiter = '/'
5003
    ): string {
5004 18
        if ($options === 'msr') {
5005 9
            $options = 'ms';
5006
        }
5007
5008
        // fallback
5009 18
        if (!$delimiter) {
5010
            $delimiter = '/';
5011
        }
5012
5013 18
        return (string) \preg_replace(
5014 18
            $delimiter . $pattern . $delimiter . 'u' . $options,
5015 18
            $replacement,
5016 18
            $str
5017
        );
5018
    }
5019
5020
    /**
5021
     * Remove the BOM from UTF-8 / UTF-16 / UTF-32 strings.
5022
     *
5023
     * EXAMPLE: <code>UTF8::remove_bom("\xEF\xBB\xBFΜπορώ να"); // 'Μπορώ να'</code>
5024
     *
5025
     * @param string $str <p>The input string.</p>
5026
     *
5027
     * @psalm-pure
5028
     *
5029
     * @return string
5030
     *                <p>A string without UTF-BOM.</p>
5031
     */
5032
    public static function remove_bom(string $str): string
5033
    {
5034 54
        if ($str === '') {
5035 9
            return '';
5036
        }
5037
5038 54
        $str_length = \strlen($str);
5039 54
        foreach (self::$BOM as $bom_string => $bom_byte_length) {
5040 54
            if (\strncmp($str, $bom_string, $bom_byte_length) === 0) {
5041
                /** @var false|string $str_tmp - needed for PhpStan (stubs error) */
5042 9
                $str_tmp = \substr($str, $bom_byte_length, $str_length);
5043 9
                if ($str_tmp === false) {
5044
                    return '';
5045
                }
5046
5047 9
                $str_length -= $bom_byte_length;
5048
5049 54
                $str = (string) $str_tmp;
5050
            }
5051
        }
5052
5053 54
        return $str;
5054
    }
5055
5056
    /**
5057
     * Removes duplicate occurrences of a string in another string.
5058
     *
5059
     * EXAMPLE: <code>UTF8::remove_duplicates('öäü-κόσμεκόσμε-äöü', 'κόσμε'); // 'öäü-κόσμε-äöü'</code>
5060
     *
5061
     * @param string          $str  <p>The base string.</p>
5062
     * @param string|string[] $what <p>String to search for in the base string.</p>
5063
     *
5064
     * @psalm-pure
5065
     *
5066
     * @return string
5067
     *                <p>A string with removed duplicates.</p>
5068
     */
5069
    public static function remove_duplicates(string $str, $what = ' '): string
5070
    {
5071 2
        if (\is_string($what)) {
5072 2
            $what = [$what];
5073
        }
5074
5075
        /**
5076
         * @psalm-suppress RedundantConditionGivenDocblockType
5077
         */
5078 2
        if (\is_array($what)) {
0 ignored issues
show
introduced by
The condition is_array($what) is always true.
Loading history...
5079 2
            foreach ($what as $item) {
5080 2
                $str = (string) \preg_replace('/(' . \preg_quote($item, '/') . ')+/u', $item, $str);
5081
            }
5082
        }
5083
5084 2
        return $str;
5085
    }
5086
5087
    /**
5088
     * Remove html via "strip_tags()" from the string.
5089
     *
5090
     * @param string $str            <p>The input string.</p>
5091
     * @param string $allowable_tags [optional] <p>You can use the optional second parameter to specify tags which
5092
     *                               should not be stripped. Default: null
5093
     *                               </p>
5094
     *
5095
     * @psalm-pure
5096
     *
5097
     * @return string
5098
     *                <p>A string with without html tags.</p>
5099
     */
5100
    public static function remove_html(string $str, string $allowable_tags = ''): string
5101
    {
5102 6
        return \strip_tags($str, $allowable_tags);
5103
    }
5104
5105
    /**
5106
     * Remove all breaks [<br> | \r\n | \r | \n | ...] from the string.
5107
     *
5108
     * @param string $str         <p>The input string.</p>
5109
     * @param string $replacement [optional] <p>Default is a empty string.</p>
5110
     *
5111
     * @psalm-pure
5112
     *
5113
     * @return string
5114
     *                <p>A string without breaks.</p>
5115
     */
5116
    public static function remove_html_breaks(string $str, string $replacement = ''): string
5117
    {
5118 6
        return (string) \preg_replace("#/\r\n|\r|\n|<br.*/?>#isU", $replacement, $str);
5119
    }
5120
5121
    /**
5122
     * Remove invisible characters from a string.
5123
     *
5124
     * e.g.: This prevents sandwiching null characters between ascii characters, like Java\0script.
5125
     *
5126
     * EXAMPLE: <code>UTF8::remove_invisible_characters("κόσ\0με"); // 'κόσμε'</code>
5127
     *
5128
     * copy&past from https://github.com/bcit-ci/CodeIgniter/blob/develop/system/core/Common.php
5129
     *
5130
     * @param string $str                           <p>The input string.</p>
5131
     * @param bool   $url_encoded                   [optional] <p>
5132
     *                                              Try to remove url encoded control character.
5133
     *                                              WARNING: maybe contains false-positives e.g. aa%0Baa -> aaaa.
5134
     *                                              <br>
5135
     *                                              Default: false
5136
     *                                              </p>
5137
     * @param string $replacement                   [optional] <p>The replacement character.</p>
5138
     * @param bool   $keep_basic_control_characters [optional] <p>Keep control characters like [LRM] or [LSEP].</p>
5139
     *
5140
     * @psalm-pure
5141
     *
5142
     * @return string
5143
     *                <p>A string without invisible chars.</p>
5144
     */
5145
    public static function remove_invisible_characters(
5146
        string $str,
5147
        bool $url_encoded = false,
5148
        string $replacement = '',
5149
        bool $keep_basic_control_characters = true
5150
    ): string {
5151 92
        return ASCII::remove_invisible_characters(
5152 92
            $str,
5153 92
            $url_encoded,
5154 92
            $replacement,
5155 92
            $keep_basic_control_characters
5156
        );
5157
    }
5158
5159
    /**
5160
     * Returns a new string with the prefix $substring removed, if present.
5161
     *
5162
     * @param string $str       <p>The input string.</p>
5163
     * @param string $substring <p>The prefix to remove.</p>
5164
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
5165
     *
5166
     * @psalm-pure
5167
     *
5168
     * @return string
5169
     *                <p>A string without the prefix $substring.</p>
5170
     */
5171
    public static function remove_left(
5172
        string $str,
5173
        string $substring,
5174
        string $encoding = 'UTF-8'
5175
    ): string {
5176
        if (
5177 12
            $substring
5178
            &&
5179 12
            \strpos($str, $substring) === 0
5180
        ) {
5181 6
            if ($encoding === 'UTF-8') {
5182 4
                return (string) \mb_substr(
5183 4
                    $str,
5184 4
                    (int) \mb_strlen($substring)
5185
                );
5186
            }
5187
5188 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
5189
5190 2
            return (string) self::substr(
5191 2
                $str,
5192 2
                (int) self::strlen($substring, $encoding),
5193 2
                null,
5194 2
                $encoding
5195
            );
5196
        }
5197
5198 6
        return $str;
5199
    }
5200
5201
    /**
5202
     * Returns a new string with the suffix $substring removed, if present.
5203
     *
5204
     * @param string $str
5205
     * @param string $substring <p>The suffix to remove.</p>
5206
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
5207
     *
5208
     * @psalm-pure
5209
     *
5210
     * @return string
5211
     *                <p>A string having a $str without the suffix $substring.</p>
5212
     */
5213
    public static function remove_right(
5214
        string $str,
5215
        string $substring,
5216
        string $encoding = 'UTF-8'
5217
    ): string {
5218 12
        if ($substring && \substr($str, -\strlen($substring)) === $substring) {
5219 6
            if ($encoding === 'UTF-8') {
5220 4
                return (string) \mb_substr(
5221 4
                    $str,
5222 4
                    0,
5223 4
                    (int) \mb_strlen($str) - (int) \mb_strlen($substring)
5224
                );
5225
            }
5226
5227 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
5228
5229 2
            return (string) self::substr(
5230 2
                $str,
5231 2
                0,
5232 2
                (int) self::strlen($str, $encoding) - (int) self::strlen($substring, $encoding),
5233 2
                $encoding
5234
            );
5235
        }
5236
5237 6
        return $str;
5238
    }
5239
5240
    /**
5241
     * Replaces all occurrences of $search in $str by $replacement.
5242
     *
5243
     * @param string $str            <p>The input string.</p>
5244
     * @param string $search         <p>The needle to search for.</p>
5245
     * @param string $replacement    <p>The string to replace with.</p>
5246
     * @param bool   $case_sensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
5247
     *
5248
     * @psalm-pure
5249
     *
5250
     * @return string
5251
     *                <p>A string with replaced parts.</p>
5252
     */
5253
    public static function replace(
5254
        string $str,
5255
        string $search,
5256
        string $replacement,
5257
        bool $case_sensitive = true
5258
    ): string {
5259 29
        if ($case_sensitive) {
5260 22
            return \str_replace($search, $replacement, $str);
5261
        }
5262
5263 7
        return self::str_ireplace($search, $replacement, $str);
0 ignored issues
show
Bug Best Practice introduced by
The expression return self::str_ireplac...ch, $replacement, $str) could return the type string[] which is incompatible with the type-hinted return string. Consider adding an additional type-check to rule them out.
Loading history...
5264
    }
5265
5266
    /**
5267
     * Replaces all occurrences of $search in $str by $replacement.
5268
     *
5269
     * @param string       $str            <p>The input string.</p>
5270
     * @param array        $search         <p>The elements to search for.</p>
5271
     * @param array|string $replacement    <p>The string to replace with.</p>
5272
     * @param bool         $case_sensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
5273
     *
5274
     * @psalm-pure
5275
     *
5276
     * @return string
5277
     *                <p>A string with replaced parts.</p>
5278
     */
5279
    public static function replace_all(
5280
        string $str,
5281
        array $search,
5282
        $replacement,
5283
        bool $case_sensitive = true
5284
    ): string {
5285 30
        if ($case_sensitive) {
5286 23
            return \str_replace($search, $replacement, $str);
5287
        }
5288
5289 7
        return self::str_ireplace($search, $replacement, $str);
0 ignored issues
show
Bug Best Practice introduced by
The expression return self::str_ireplac...ch, $replacement, $str) could return the type string[] which is incompatible with the type-hinted return string. Consider adding an additional type-check to rule them out.
Loading history...
5290
    }
5291
5292
    /**
5293
     * Replace the diamond question mark (�) and invalid-UTF8 chars with the replacement.
5294
     *
5295
     * EXAMPLE: <code>UTF8::replace_diamond_question_mark('中文空白�', ''); // '中文空白'</code>
5296
     *
5297
     * @param string $str                        <p>The input string</p>
5298
     * @param string $replacement_char           <p>The replacement character.</p>
5299
     * @param bool   $process_invalid_utf8_chars <p>Convert invalid UTF-8 chars </p>
5300
     *
5301
     * @psalm-pure
5302
     *
5303
     * @return string
5304
     *                <p>A string without diamond question marks (�).</p>
5305
     */
5306
    public static function replace_diamond_question_mark(
5307
        string $str,
5308
        string $replacement_char = '',
5309
        bool $process_invalid_utf8_chars = true
5310
    ): string {
5311 35
        if ($str === '') {
5312 9
            return '';
5313
        }
5314
5315 35
        if ($process_invalid_utf8_chars) {
5316 35
            if ($replacement_char === '') {
5317 35
                $replacement_char_helper = 'none';
5318
            } else {
5319 2
                $replacement_char_helper = \ord($replacement_char);
5320
            }
5321
5322 35
            if (self::$SUPPORT['mbstring'] === false) {
5323
                // if there is no native support for "mbstring",
5324
                // then we need to clean the string before ...
5325
                $str = self::clean($str);
5326
            }
5327
5328
            /**
5329
             * @psalm-suppress ImpureFunctionCall - we will reset the value in the next step
5330
             */
5331 35
            $save = \mb_substitute_character();
5332
            /** @noinspection PhpUsageOfSilenceOperatorInspection - ignore "Unknown character" warnings, it's working anyway */
5333 35
            @\mb_substitute_character($replacement_char_helper);
0 ignored issues
show
Security Best Practice introduced by
It seems like you do not handle an error condition for mb_substitute_character(). This can introduce security issues, and is generally not recommended. ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-unhandled  annotation

5333
            /** @scrutinizer ignore-unhandled */ @\mb_substitute_character($replacement_char_helper);

If you suppress an error, we recommend checking for the error condition explicitly:

// For example instead of
@mkdir($dir);

// Better use
if (@mkdir($dir) === false) {
    throw new \RuntimeException('The directory '.$dir.' could not be created.');
}
Loading history...
5334
            // the polyfill maybe return false, so cast to string
5335 35
            $str = (string) \mb_convert_encoding($str, 'UTF-8', 'UTF-8');
5336 35
            \mb_substitute_character($save);
0 ignored issues
show
Bug introduced by
It seems like $save can also be of type true; however, parameter $substitute_character of mb_substitute_character() does only seem to accept integer|null|string, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

5336
            \mb_substitute_character(/** @scrutinizer ignore-type */ $save);
Loading history...
5337
        }
5338
5339 35
        return \str_replace(
5340
            [
5341 35
                "\xEF\xBF\xBD",
5342
                '�',
5343
            ],
5344
            [
5345 35
                $replacement_char,
5346 35
                $replacement_char,
5347
            ],
5348 35
            $str
5349
        );
5350
    }
5351
5352
    /**
5353
     * Strip whitespace or other characters from the end of a UTF-8 string.
5354
     *
5355
     * EXAMPLE: <code>UTF8::rtrim('-ABC-中文空白-  '); // '-ABC-中文空白-'</code>
5356
     *
5357
     * @param string      $str   <p>The string to be trimmed.</p>
5358
     * @param string|null $chars <p>Optional characters to be stripped.</p>
5359
     *
5360
     * @psalm-pure
5361
     *
5362
     * @return string
5363
     *                <p>A string with unwanted characters stripped from the right.</p>
5364
     */
5365
    public static function rtrim(string $str = '', string $chars = null): string
5366
    {
5367 21
        if ($str === '') {
5368 3
            return '';
5369
        }
5370
5371 20
        if (self::$SUPPORT['mbstring'] === true) {
5372 20
            if ($chars !== null) {
5373
                /** @noinspection PregQuoteUsageInspection */
5374 9
                $chars = \preg_quote($chars);
5375 9
                $pattern = "[${chars}]+$";
5376
            } else {
5377 14
                $pattern = '[\\s]+$';
5378
            }
5379
5380 20
            return (string) \mb_ereg_replace($pattern, '', $str);
5381
        }
5382
5383
        if ($chars !== null) {
5384
            $chars = \preg_quote($chars, '/');
5385
            $pattern = "[${chars}]+$";
5386
        } else {
5387
            $pattern = '[\\s]+$';
5388
        }
5389
5390
        return self::regex_replace($str, $pattern, '');
5391
    }
5392
5393
    /**
5394
     * WARNING: Print native UTF-8 support (libs) by default, e.g. for debugging.
5395
     *
5396
     * @param bool $useEcho
5397
     *
5398
     * @psalm-pure
5399
     *
5400
     * @return string|void
5401
     */
5402
    public static function showSupport(bool $useEcho = true)
5403
    {
5404
        // init
5405 2
        $html = '';
5406
5407 2
        $html .= '<pre>';
5408 2
        foreach (self::$SUPPORT as $key => &$value) {
5409 2
            $html .= $key . ' - ' . \print_r($value, true) . "\n<br>";
0 ignored issues
show
Bug introduced by
Are you sure print_r($value, true) of type string|true can be used in concatenation? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

5409
            $html .= $key . ' - ' . /** @scrutinizer ignore-type */ \print_r($value, true) . "\n<br>";
Loading history...
5410
        }
5411 2
        $html .= '</pre>';
5412
5413 2
        if ($useEcho) {
5414 1
            echo $html;
5415
        }
5416
5417 2
        return $html;
5418
    }
5419
5420
    /**
5421
     * Converts a UTF-8 character to HTML Numbered Entity like "&#123;".
5422
     *
5423
     * EXAMPLE: <code>UTF8::single_chr_html_encode('κ'); // '&#954;'</code>
5424
     *
5425
     * @param string $char             <p>The Unicode character to be encoded as numbered entity.</p>
5426
     * @param bool   $keep_ascii_chars <p>Set to <strong>true</strong> to keep ASCII chars.</>
5427
     * @param string $encoding         [optional] <p>Set the charset for e.g. "mb_" function</p>
5428
     *
5429
     * @psalm-pure
5430
     *
5431
     * @return string
5432
     *                <p>The HTML numbered entity for the given character.</p>
5433
     */
5434
    public static function single_chr_html_encode(
5435
        string $char,
5436
        bool $keep_ascii_chars = false,
5437
        string $encoding = 'UTF-8'
5438
    ): string {
5439 2
        if ($char === '') {
5440 2
            return '';
5441
        }
5442
5443
        if (
5444 2
            $keep_ascii_chars
5445
            &&
5446 2
            ASCII::is_ascii($char)
5447
        ) {
5448 2
            return $char;
5449
        }
5450
5451 2
        return '&#' . self::ord($char, $encoding) . ';';
5452
    }
5453
5454
    /**
5455
     * @param string $str
5456
     * @param int    $tab_length
5457
     *
5458
     * @psalm-pure
5459
     *
5460
     * @return string
5461
     */
5462
    public static function spaces_to_tabs(string $str, int $tab_length = 4): string
5463
    {
5464 5
        if ($tab_length === 4) {
5465 3
            $tab = '    ';
5466 2
        } elseif ($tab_length === 2) {
5467 1
            $tab = '  ';
5468
        } else {
5469 1
            $tab = \str_repeat(' ', $tab_length);
5470
        }
5471
5472 5
        return \str_replace($tab, "\t", $str);
5473
    }
5474
5475
    /**
5476
     * Returns a camelCase version of the string. Trims surrounding spaces,
5477
     * capitalizes letters following digits, spaces, dashes and underscores,
5478
     * and removes spaces, dashes, as well as underscores.
5479
     *
5480
     * @param string      $str                           <p>The input string.</p>
5481
     * @param string      $encoding                      [optional] <p>Default: 'UTF-8'</p>
5482
     * @param bool        $clean_utf8                    [optional] <p>Remove non UTF-8 chars from the string.</p>
5483
     * @param string|null $lang                          [optional] <p>Set the language for special cases: az, el, lt,
5484
     *                                                   tr</p>
5485
     * @param bool        $try_to_keep_the_string_length [optional] <p>true === try to keep the string length: e.g. ẞ
5486
     *                                                   -> ß</p>
5487
     *
5488
     * @psalm-pure
5489
     *
5490
     * @return string
5491
     */
5492
    public static function str_camelize(
5493
        string $str,
5494
        string $encoding = 'UTF-8',
5495
        bool $clean_utf8 = false,
5496
        string $lang = null,
5497
        bool $try_to_keep_the_string_length = false
5498
    ): string {
5499 32
        if ($clean_utf8) {
5500
            $str = self::clean($str);
5501
        }
5502
5503 32
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
5504 26
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
5505
        }
5506
5507 32
        $str = self::lcfirst(
5508 32
            \trim($str),
5509 32
            $encoding,
5510 32
            false,
5511 32
            $lang,
5512 32
            $try_to_keep_the_string_length
5513
        );
5514 32
        $str = (string) \preg_replace('/^[-_]+/', '', $str);
5515
5516 32
        $use_mb_functions = $lang === null && !$try_to_keep_the_string_length;
5517
5518 32
        $str = (string) \preg_replace_callback(
5519 32
            '/[-_\\s]+(.)?/u',
5520
            /**
5521
             * @param array $match
5522
             *
5523
             * @psalm-pure
5524
             *
5525
             * @return string
5526
             */
5527
            static function (array $match) use ($use_mb_functions, $encoding, $lang, $try_to_keep_the_string_length): string {
5528 27
                if (isset($match[1])) {
5529 27
                    if ($use_mb_functions) {
5530 27
                        if ($encoding === 'UTF-8') {
5531 27
                            return \mb_strtoupper($match[1]);
5532
                        }
5533
5534
                        return \mb_strtoupper($match[1], $encoding);
5535
                    }
5536
5537
                    return self::strtoupper($match[1], $encoding, false, $lang, $try_to_keep_the_string_length);
5538
                }
5539
5540 1
                return '';
5541 32
            },
5542 32
            $str
5543
        );
5544
5545 32
        return (string) \preg_replace_callback(
5546 32
            '/[\\p{N}]+(.)?/u',
5547
            /**
5548
             * @param array $match
5549
             *
5550
             * @psalm-pure
5551
             *
5552
             * @return string
5553
             */
5554
            static function (array $match) use ($use_mb_functions, $encoding, $clean_utf8, $lang, $try_to_keep_the_string_length): string {
5555 6
                if ($use_mb_functions) {
5556 6
                    if ($encoding === 'UTF-8') {
5557 6
                        return \mb_strtoupper($match[0]);
5558
                    }
5559
5560
                    return \mb_strtoupper($match[0], $encoding);
5561
                }
5562
5563
                return self::strtoupper($match[0], $encoding, $clean_utf8, $lang, $try_to_keep_the_string_length);
5564 32
            },
5565 32
            $str
5566
        );
5567
    }
5568
5569
    /**
5570
     * Returns the string with the first letter of each word capitalized,
5571
     * except for when the word is a name which shouldn't be capitalized.
5572
     *
5573
     * @param string $str
5574
     *
5575
     * @psalm-pure
5576
     *
5577
     * @return string
5578
     *                <p>A string with $str capitalized.</p>
5579
     */
5580
    public static function str_capitalize_name(string $str): string
5581
    {
5582 1
        return self::str_capitalize_name_helper(
5583 1
            self::str_capitalize_name_helper(
5584 1
                self::collapse_whitespace($str),
5585 1
                ' '
5586
            ),
5587 1
            '-'
5588
        );
5589
    }
5590
5591
    /**
5592
     * Returns true if the string contains $needle, false otherwise. By default
5593
     * the comparison is case-sensitive, but can be made insensitive by setting
5594
     * $case_sensitive to false.
5595
     *
5596
     * @param string $haystack       <p>The input string.</p>
5597
     * @param string $needle         <p>Substring to look for.</p>
5598
     * @param bool   $case_sensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
5599
     *
5600
     * @psalm-pure
5601
     *
5602
     * @return bool
5603
     *              <p>Whether or not $haystack contains $needle.</p>
5604
     */
5605
    public static function str_contains(
5606
        string $haystack,
5607
        string $needle,
5608
        bool $case_sensitive = true
5609
    ): bool {
5610 21
        if ($case_sensitive) {
5611 11
            if (\PHP_VERSION_ID >= 80000) {
5612
                /** @phpstan-ignore-next-line - only for PHP8 */
5613
                return \str_contains($haystack, $needle);
5614
            }
5615
5616 11
            return \strpos($haystack, $needle) !== false;
5617
        }
5618
5619 10
        return \mb_stripos($haystack, $needle) !== false;
5620
    }
5621
5622
    /**
5623
     * Returns true if the string contains all $needles, false otherwise. By
5624
     * default the comparison is case-sensitive, but can be made insensitive by
5625
     * setting $case_sensitive to false.
5626
     *
5627
     * @param string $haystack       <p>The input string.</p>
5628
     * @param array  $needles        <p>SubStrings to look for.</p>
5629
     * @param bool   $case_sensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
5630
     *
5631
     * @psalm-pure
5632
     *
5633
     * @return bool
5634
     *              <p>Whether or not $haystack contains $needle.</p>
5635
     */
5636
    public static function str_contains_all(
5637
        string $haystack,
5638
        array $needles,
5639
        bool $case_sensitive = true
5640
    ): bool {
5641 45
        if ($haystack === '' || $needles === []) {
5642 1
            return false;
5643
        }
5644
5645 44
        foreach ($needles as &$needle) {
5646 44
            if ($case_sensitive) {
5647 24
                if (!$needle || \strpos($haystack, $needle) === false) {
5648 12
                    return false;
5649
                }
5650
            }
5651
5652 33
            if (!$needle || \mb_stripos($haystack, $needle) === false) {
5653 33
                return false;
5654
            }
5655
        }
5656
5657 24
        return true;
5658
    }
5659
5660
    /**
5661
     * Returns true if the string contains any $needles, false otherwise. By
5662
     * default the comparison is case-sensitive, but can be made insensitive by
5663
     * setting $case_sensitive to false.
5664
     *
5665
     * @param string $haystack       <p>The input string.</p>
5666
     * @param array  $needles        <p>SubStrings to look for.</p>
5667
     * @param bool   $case_sensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
5668
     *
5669
     * @psalm-pure
5670
     *
5671
     * @return bool
5672
     *              <p>Whether or not $str contains $needle.</p>
5673
     */
5674
    public static function str_contains_any(
5675
        string $haystack,
5676
        array $needles,
5677
        bool $case_sensitive = true
5678
    ): bool {
5679 46
        if ($haystack === '' || $needles === []) {
5680 1
            return false;
5681
        }
5682
5683 45
        foreach ($needles as &$needle) {
5684 45
            if (!$needle) {
5685
                continue;
5686
            }
5687
5688 45
            if ($case_sensitive) {
5689 25
                if (\strpos($haystack, $needle) !== false) {
5690 14
                    return true;
5691
                }
5692
5693 13
                continue;
5694
            }
5695
5696 20
            if (\mb_stripos($haystack, $needle) !== false) {
5697 20
                return true;
5698
            }
5699
        }
5700
5701 19
        return false;
5702
    }
5703
5704
    /**
5705
     * Returns a lowercase and trimmed string separated by dashes. Dashes are
5706
     * inserted before uppercase characters (with the exception of the first
5707
     * character of the string), and in place of spaces as well as underscores.
5708
     *
5709
     * @param string $str      <p>The input string.</p>
5710
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
5711
     *
5712
     * @psalm-pure
5713
     *
5714
     * @return string
5715
     */
5716
    public static function str_dasherize(string $str, string $encoding = 'UTF-8'): string
5717
    {
5718 19
        return self::str_delimit($str, '-', $encoding);
5719
    }
5720
5721
    /**
5722
     * Returns a lowercase and trimmed string separated by the given delimiter.
5723
     * Delimiters are inserted before uppercase characters (with the exception
5724
     * of the first character of the string), and in place of spaces, dashes,
5725
     * and underscores. Alpha delimiters are not converted to lowercase.
5726
     *
5727
     * @param string      $str                           <p>The input string.</p>
5728
     * @param string      $delimiter                     <p>Sequence used to separate parts of the string.</p>
5729
     * @param string      $encoding                      [optional] <p>Set the charset for e.g. "mb_" function</p>
5730
     * @param bool        $clean_utf8                    [optional] <p>Remove non UTF-8 chars from the string.</p>
5731
     * @param string|null $lang                          [optional] <p>Set the language for special cases: az, el, lt,
5732
     *                                                   tr</p>
5733
     * @param bool        $try_to_keep_the_string_length [optional] <p>true === try to keep the string length: e.g. ẞ ->
5734
     *                                                   ß</p>
5735
     *
5736
     * @psalm-pure
5737
     *
5738
     * @return string
5739
     */
5740
    public static function str_delimit(
5741
        string $str,
5742
        string $delimiter,
5743
        string $encoding = 'UTF-8',
5744
        bool $clean_utf8 = false,
5745
        string $lang = null,
5746
        bool $try_to_keep_the_string_length = false
5747
    ): string {
5748 49
        if (self::$SUPPORT['mbstring'] === true) {
5749 49
            $str = (string) \mb_ereg_replace('\\B(\\p{Lu})', '-\1', \trim($str));
5750
5751 49
            $use_mb_functions = $lang === null && !$try_to_keep_the_string_length;
5752 49
            if ($use_mb_functions && $encoding === 'UTF-8') {
5753 22
                $str = \mb_strtolower($str);
5754
            } else {
5755 27
                $str = self::strtolower($str, $encoding, $clean_utf8, $lang, $try_to_keep_the_string_length);
5756
            }
5757
5758 49
            return (string) \mb_ereg_replace('[\\-_\\s]+', $delimiter, $str);
5759
        }
5760
5761
        $str = (string) \preg_replace('/\\B(\\p{Lu})/u', '-\1', \trim($str));
5762
5763
        $use_mb_functions = $lang === null && !$try_to_keep_the_string_length;
5764
        if ($use_mb_functions && $encoding === 'UTF-8') {
5765
            $str = \mb_strtolower($str);
5766
        } else {
5767
            $str = self::strtolower($str, $encoding, $clean_utf8, $lang, $try_to_keep_the_string_length);
5768
        }
5769
5770
        return (string) \preg_replace('/[\\-_\\s]+/u', $delimiter, $str);
5771
    }
5772
5773
    /**
5774
     * Optimized "mb_detect_encoding()"-function -> with support for UTF-16 and UTF-32.
5775
     *
5776
     * EXAMPLE: <code>
5777
     * UTF8::str_detect_encoding('中文空白'); // 'UTF-8'
5778
     * UTF8::str_detect_encoding('Abc'); // 'ASCII'
5779
     * </code>
5780
     *
5781
     * @param string $str <p>The input string.</p>
5782
     *
5783
     * @psalm-pure
5784
     *
5785
     * @return false|string
5786
     *                      <p>
5787
     *                      The detected string-encoding e.g. UTF-8 or UTF-16BE,<br>
5788
     *                      otherwise it will return false e.g. for BINARY or not detected encoding.
5789
     *                      </p>
5790
     */
5791
    public static function str_detect_encoding($str)
5792
    {
5793
        // init
5794 30
        $str = (string) $str;
5795
5796
        //
5797
        // 1.) check binary strings (010001001...) like UTF-16 / UTF-32 / PDF / Images / ...
5798
        //
5799
5800 30
        if (self::is_binary($str, self::string_has_bom($str) ? false : true)) {
5801 10
            $is_utf32 = self::is_utf32($str, false);
5802 10
            if ($is_utf32 === 1) {
5803
                return 'UTF-32LE';
5804
            }
5805 10
            if ($is_utf32 === 2) {
5806 1
                return 'UTF-32BE';
5807
            }
5808
5809 10
            $is_utf16 = self::is_utf16($str, false);
5810 10
            if ($is_utf16 === 1) {
5811 3
                return 'UTF-16LE';
5812
            }
5813 10
            if ($is_utf16 === 2) {
5814 2
                return 'UTF-16BE';
5815
            }
5816
5817
            // is binary but not "UTF-16" or "UTF-32"
5818 8
            return false;
5819
        }
5820
5821
        //
5822
        // 2.) simple check for ASCII chars
5823
        //
5824
5825 27
        if (ASCII::is_ascii($str)) {
5826 10
            return 'ASCII';
5827
        }
5828
5829
        //
5830
        // 3.) simple check for UTF-8 chars
5831
        //
5832
5833 27
        if (self::is_utf8_string($str)) {
5834 19
            return 'UTF-8';
5835
        }
5836
5837
        //
5838
        // 4.) check via "mb_detect_encoding()"
5839
        //
5840
        // INFO: UTF-16, UTF-32, UCS2 and UCS4, encoding detection will fail always with "mb_detect_encoding()"
5841
5842
        $encoding_detecting_order = [
5843 16
            'ISO-8859-1',
5844
            'ISO-8859-2',
5845
            'ISO-8859-3',
5846
            'ISO-8859-4',
5847
            'ISO-8859-5',
5848
            'ISO-8859-6',
5849
            'ISO-8859-7',
5850
            'ISO-8859-8',
5851
            'ISO-8859-9',
5852
            'ISO-8859-10',
5853
            'ISO-8859-13',
5854
            'ISO-8859-14',
5855
            'ISO-8859-15',
5856
            'ISO-8859-16',
5857
            'WINDOWS-1251',
5858
            'WINDOWS-1252',
5859
            'WINDOWS-1254',
5860
            'CP932',
5861
            'CP936',
5862
            'CP950',
5863
            'CP866',
5864
            'CP850',
5865
            'CP51932',
5866
            'CP50220',
5867
            'CP50221',
5868
            'CP50222',
5869
            'ISO-2022-JP',
5870
            'ISO-2022-KR',
5871
            'JIS',
5872
            'JIS-ms',
5873
            'EUC-CN',
5874
            'EUC-JP',
5875
        ];
5876
5877 16
        if (self::$SUPPORT['mbstring'] === true) {
5878
            // info: do not use the symfony polyfill here
5879 16
            $encoding = \mb_detect_encoding($str, $encoding_detecting_order, true);
5880 16
            if ($encoding) {
5881 16
                return $encoding;
5882
            }
5883
        }
5884
5885
        //
5886
        // 5.) check via "iconv()"
5887
        //
5888
5889
        if (self::$ENCODINGS === null) {
5890
            self::$ENCODINGS = self::getData('encodings');
5891
        }
5892
5893
        foreach (self::$ENCODINGS as $encoding_tmp) {
5894
            // INFO: //IGNORE but still throw notice
5895
            /** @noinspection PhpUsageOfSilenceOperatorInspection */
5896
            if ((string) @\iconv($encoding_tmp, $encoding_tmp . '//IGNORE', $str) === $str) {
5897
                return $encoding_tmp;
5898
            }
5899
        }
5900
5901
        return false;
5902
    }
5903
5904
    /**
5905
     * Check if the string ends with the given substring.
5906
     *
5907
     * EXAMPLE: <code>
5908
     * UTF8::str_ends_with('BeginMiddleΚόσμε', 'Κόσμε'); // true
5909
     * UTF8::str_ends_with('BeginMiddleΚόσμε', 'κόσμε'); // false
5910
     * </code>
5911
     *
5912
     * @param string $haystack <p>The string to search in.</p>
5913
     * @param string $needle   <p>The substring to search for.</p>
5914
     *
5915
     * @psalm-pure
5916
     *
5917
     * @return bool
5918
     */
5919
    public static function str_ends_with(string $haystack, string $needle): bool
5920
    {
5921 9
        if ($needle === '') {
5922 2
            return true;
5923
        }
5924
5925 9
        if ($haystack === '') {
5926
            return false;
5927
        }
5928
5929 9
        if (\PHP_VERSION_ID >= 80000) {
5930
            /** @phpstan-ignore-next-line - only for PHP8 */
5931
            return \str_ends_with($haystack, $needle);
5932
        }
5933
5934 9
        return \substr($haystack, -\strlen($needle)) === $needle;
5935
    }
5936
5937
    /**
5938
     * Returns true if the string ends with any of $substrings, false otherwise.
5939
     *
5940
     * - case-sensitive
5941
     *
5942
     * @param string   $str        <p>The input string.</p>
5943
     * @param string[] $substrings <p>Substrings to look for.</p>
5944
     *
5945
     * @psalm-pure
5946
     *
5947
     * @return bool
5948
     *              <p>Whether or not $str ends with $substring.</p>
5949
     */
5950
    public static function str_ends_with_any(string $str, array $substrings): bool
5951
    {
5952 7
        if ($substrings === []) {
5953
            return false;
5954
        }
5955
5956 7
        foreach ($substrings as &$substring) {
5957 7
            if (\substr($str, -\strlen($substring)) === $substring) {
5958 7
                return true;
5959
            }
5960
        }
5961
5962 6
        return false;
5963
    }
5964
5965
    /**
5966
     * Ensures that the string begins with $substring. If it doesn't, it's
5967
     * prepended.
5968
     *
5969
     * @param string $str       <p>The input string.</p>
5970
     * @param string $substring <p>The substring to add if not present.</p>
5971
     *
5972
     * @psalm-pure
5973
     *
5974
     * @return string
5975
     */
5976
    public static function str_ensure_left(string $str, string $substring): string
5977
    {
5978
        if (
5979 10
            $substring !== ''
5980
            &&
5981 10
            \strpos($str, $substring) === 0
5982
        ) {
5983 6
            return $str;
5984
        }
5985
5986 4
        return $substring . $str;
5987
    }
5988
5989
    /**
5990
     * Ensures that the string ends with $substring. If it doesn't, it's appended.
5991
     *
5992
     * @param string $str       <p>The input string.</p>
5993
     * @param string $substring <p>The substring to add if not present.</p>
5994
     *
5995
     * @psalm-pure
5996
     *
5997
     * @return string
5998
     */
5999
    public static function str_ensure_right(string $str, string $substring): string
6000
    {
6001
        if (
6002 10
            $str === ''
6003
            ||
6004 10
            $substring === ''
6005
            ||
6006 10
            \substr($str, -\strlen($substring)) !== $substring
6007
        ) {
6008 4
            $str .= $substring;
6009
        }
6010
6011 10
        return $str;
6012
    }
6013
6014
    /**
6015
     * Capitalizes the first word of the string, replaces underscores with
6016
     * spaces, and strips '_id'.
6017
     *
6018
     * @param string $str
6019
     *
6020
     * @psalm-pure
6021
     *
6022
     * @return string
6023
     */
6024
    public static function str_humanize($str): string
6025
    {
6026 3
        $str = \str_replace(
6027
            [
6028 3
                '_id',
6029
                '_',
6030
            ],
6031
            [
6032 3
                '',
6033
                ' ',
6034
            ],
6035 3
            $str
6036
        );
6037
6038 3
        return self::ucfirst(\trim($str));
6039
    }
6040
6041
    /**
6042
     * Check if the string ends with the given substring, case-insensitive.
6043
     *
6044
     * EXAMPLE: <code>
6045
     * UTF8::str_iends_with('BeginMiddleΚόσμε', 'Κόσμε'); // true
6046
     * UTF8::str_iends_with('BeginMiddleΚόσμε', 'κόσμε'); // true
6047
     * </code>
6048
     *
6049
     * @param string $haystack <p>The string to search in.</p>
6050
     * @param string $needle   <p>The substring to search for.</p>
6051
     *
6052
     * @psalm-pure
6053
     *
6054
     * @return bool
6055
     */
6056
    public static function str_iends_with(string $haystack, string $needle): bool
6057
    {
6058 12
        if ($needle === '') {
6059 2
            return true;
6060
        }
6061
6062 12
        if ($haystack === '') {
6063
            return false;
6064
        }
6065
6066 12
        return self::strcasecmp(\substr($haystack, -\strlen($needle)), $needle) === 0;
6067
    }
6068
6069
    /**
6070
     * Returns true if the string ends with any of $substrings, false otherwise.
6071
     *
6072
     * - case-insensitive
6073
     *
6074
     * @param string   $str        <p>The input string.</p>
6075
     * @param string[] $substrings <p>Substrings to look for.</p>
6076
     *
6077
     * @psalm-pure
6078
     *
6079
     * @return bool
6080
     *              <p>Whether or not $str ends with $substring.</p>
6081
     */
6082
    public static function str_iends_with_any(string $str, array $substrings): bool
6083
    {
6084 4
        if ($substrings === []) {
6085
            return false;
6086
        }
6087
6088 4
        foreach ($substrings as &$substring) {
6089 4
            if (self::str_iends_with($str, $substring)) {
6090 4
                return true;
6091
            }
6092
        }
6093
6094
        return false;
6095
    }
6096
6097
    /**
6098
     * Inserts $substring into the string at the $index provided.
6099
     *
6100
     * @param string $str       <p>The input string.</p>
6101
     * @param string $substring <p>String to be inserted.</p>
6102
     * @param int    $index     <p>The index at which to insert the substring.</p>
6103
     * @param string $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
6104
     *
6105
     * @psalm-pure
6106
     *
6107
     * @return string
6108
     */
6109
    public static function str_insert(
6110
        string $str,
6111
        string $substring,
6112
        int $index,
6113
        string $encoding = 'UTF-8'
6114
    ): string {
6115 8
        if ($encoding === 'UTF-8') {
6116 4
            $len = (int) \mb_strlen($str);
6117 4
            if ($index > $len) {
6118
                return $str;
6119
            }
6120
6121
            /** @noinspection UnnecessaryCastingInspection */
6122 4
            return (string) \mb_substr($str, 0, $index) .
6123 4
                   $substring .
6124 4
                   (string) \mb_substr($str, $index, $len);
6125
        }
6126
6127 4
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
6128
6129 4
        $len = (int) self::strlen($str, $encoding);
6130 4
        if ($index > $len) {
6131 1
            return $str;
6132
        }
6133
6134 3
        return ((string) self::substr($str, 0, $index, $encoding)) .
6135 3
               $substring .
6136 3
               ((string) self::substr($str, $index, $len, $encoding));
6137
    }
6138
6139
    /**
6140
     * Case-insensitive and UTF-8 safe version of <function>str_replace</function>.
6141
     *
6142
     * EXAMPLE: <code>
6143
     * UTF8::str_ireplace('lIzÆ', 'lise', 'Iñtërnâtiônàlizætiøn'); // 'Iñtërnâtiônàlisetiøn'
6144
     * </code>
6145
     *
6146
     * @see http://php.net/manual/en/function.str-ireplace.php
6147
     *
6148
     * @param string|string[] $search      <p>
6149
     *                                     Every replacement with search array is
6150
     *                                     performed on the result of previous replacement.
6151
     *                                     </p>
6152
     * @param string|string[] $replacement <p>The replacement.</p>
6153
     * @param string|string[] $subject     <p>
6154
     *                                     If subject is an array, then the search and
6155
     *                                     replace is performed with every entry of
6156
     *                                     subject, and the return value is an array as
6157
     *                                     well.
6158
     *                                     </p>
6159
     * @param int             $count       [optional] <p>
6160
     *                                     The number of matched and replaced needles will
6161
     *                                     be returned in count which is passed by
6162
     *                                     reference.
6163
     *                                     </p>
6164
     *
6165
     * @psalm-pure
6166
     *
6167
     * @return string|string[]
6168
     *                         <p>A string or an array of replacements.</p>
6169
     *
6170
     * @template TStrIReplaceSubject
6171
     * @phpstan-param TStrIReplaceSubject $subject
6172
     * @phpstan-return TStrIReplaceSubject
6173
     */
6174
    public static function str_ireplace($search, $replacement, $subject, &$count = null)
6175
    {
6176 29
        $search = (array) $search;
6177
6178
        /** @noinspection AlterInForeachInspection */
6179 29
        foreach ($search as &$s) {
6180 29
            $s = (string) $s;
6181 29
            if ($s === '') {
6182 6
                $s = '/^(?<=.)$/';
6183
            } else {
6184 29
                $s = '/' . \preg_quote($s, '/') . '/ui';
6185
            }
6186
        }
6187
6188
        // fallback
6189
        /** @phpstan-ignore-next-line - only a fallback for PHP8 */
6190 29
        if ($replacement === null) {
0 ignored issues
show
introduced by
The condition $replacement === null is always false.
Loading history...
6191 1
            $replacement = '';
6192
        }
6193
        /** @phpstan-ignore-next-line - only a fallback for PHP8 */
6194 29
        if ($subject === null) {
6195 1
            $subject = '';
6196
        }
6197
6198
        /**
6199
         * @psalm-suppress PossiblyNullArgument
6200
         * @phpstan-var TStrIReplaceSubject $subject
6201
         */
6202 29
        $subject = \preg_replace($search, $replacement, $subject, -1, $count);
6203
6204 29
        return $subject;
6205
    }
6206
6207
    /**
6208
     * Replaces $search from the beginning of string with $replacement.
6209
     *
6210
     * @param string $str         <p>The input string.</p>
6211
     * @param string $search      <p>The string to search for.</p>
6212
     * @param string $replacement <p>The replacement.</p>
6213
     *
6214
     * @psalm-pure
6215
     *
6216
     * @return string
6217
     *                <p>The string after the replacement.</p>
6218
     */
6219
    public static function str_ireplace_beginning(string $str, string $search, string $replacement): string
6220
    {
6221 17
        if ($str === '') {
6222 4
            if ($replacement === '') {
6223 2
                return '';
6224
            }
6225
6226 2
            if ($search === '') {
6227 2
                return $replacement;
6228
            }
6229
        }
6230
6231 13
        if ($search === '') {
6232 2
            return $str . $replacement;
6233
        }
6234
6235 11
        $searchLength = \strlen($search);
6236 11
        if (\strncasecmp($str, $search, $searchLength) === 0) {
6237 10
            return $replacement . \substr($str, $searchLength);
6238
        }
6239
6240 1
        return $str;
6241
    }
6242
6243
    /**
6244
     * Replaces $search from the ending of string with $replacement.
6245
     *
6246
     * @param string $str         <p>The input string.</p>
6247
     * @param string $search      <p>The string to search for.</p>
6248
     * @param string $replacement <p>The replacement.</p>
6249
     *
6250
     * @psalm-pure
6251
     *
6252
     * @return string
6253
     *                <p>The string after the replacement.</p>
6254
     */
6255
    public static function str_ireplace_ending(string $str, string $search, string $replacement): string
6256
    {
6257 17
        if ($str === '') {
6258 4
            if ($replacement === '') {
6259 2
                return '';
6260
            }
6261
6262 2
            if ($search === '') {
6263 2
                return $replacement;
6264
            }
6265
        }
6266
6267 13
        if ($search === '') {
6268 2
            return $str . $replacement;
6269
        }
6270
6271 11
        if (\stripos($str, $search, \strlen($str) - \strlen($search)) !== false) {
6272 9
            $str = \substr($str, 0, -\strlen($search)) . $replacement;
6273
        }
6274
6275 11
        return $str;
6276
    }
6277
6278
    /**
6279
     * Check if the string starts with the given substring, case-insensitive.
6280
     *
6281
     * EXAMPLE: <code>
6282
     * UTF8::str_istarts_with('ΚόσμεMiddleEnd', 'Κόσμε'); // true
6283
     * UTF8::str_istarts_with('ΚόσμεMiddleEnd', 'κόσμε'); // true
6284
     * </code>
6285
     *
6286
     * @param string $haystack <p>The string to search in.</p>
6287
     * @param string $needle   <p>The substring to search for.</p>
6288
     *
6289
     * @psalm-pure
6290
     *
6291
     * @return bool
6292
     */
6293
    public static function str_istarts_with(string $haystack, string $needle): bool
6294
    {
6295 13
        if ($needle === '') {
6296 2
            return true;
6297
        }
6298
6299 13
        if ($haystack === '') {
6300
            return false;
6301
        }
6302
6303 13
        return self::stripos($haystack, $needle) === 0;
6304
    }
6305
6306
    /**
6307
     * Returns true if the string begins with any of $substrings, false otherwise.
6308
     *
6309
     * - case-insensitive
6310
     *
6311
     * @param string $str        <p>The input string.</p>
6312
     * @param array  $substrings <p>Substrings to look for.</p>
6313
     *
6314
     * @psalm-pure
6315
     *
6316
     * @return bool
6317
     *              <p>Whether or not $str starts with $substring.</p>
6318
     */
6319
    public static function str_istarts_with_any(string $str, array $substrings): bool
6320
    {
6321 5
        if ($str === '') {
6322
            return false;
6323
        }
6324
6325 5
        if ($substrings === []) {
6326
            return false;
6327
        }
6328
6329 5
        foreach ($substrings as &$substring) {
6330 5
            if (self::str_istarts_with($str, $substring)) {
6331 5
                return true;
6332
            }
6333
        }
6334
6335 1
        return false;
6336
    }
6337
6338
    /**
6339
     * Gets the substring after the first occurrence of a separator.
6340
     *
6341
     * @param string $str       <p>The input string.</p>
6342
     * @param string $separator <p>The string separator.</p>
6343
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
6344
     *
6345
     * @psalm-pure
6346
     *
6347
     * @return string
6348
     */
6349
    public static function str_isubstr_after_first_separator(
6350
        string $str,
6351
        string $separator,
6352
        string $encoding = 'UTF-8'
6353
    ): string {
6354 1
        if ($separator === '' || $str === '') {
6355 1
            return '';
6356
        }
6357
6358 1
        $offset = self::stripos($str, $separator);
6359 1
        if ($offset === false) {
6360 1
            return '';
6361
        }
6362
6363 1
        if ($encoding === 'UTF-8') {
6364 1
            return (string) \mb_substr(
6365 1
                $str,
6366 1
                $offset + (int) \mb_strlen($separator)
6367
            );
6368
        }
6369
6370
        return (string) self::substr(
6371
            $str,
6372
            $offset + (int) self::strlen($separator, $encoding),
6373
            null,
6374
            $encoding
6375
        );
6376
    }
6377
6378
    /**
6379
     * Gets the substring after the last occurrence of a separator.
6380
     *
6381
     * @param string $str       <p>The input string.</p>
6382
     * @param string $separator <p>The string separator.</p>
6383
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
6384
     *
6385
     * @psalm-pure
6386
     *
6387
     * @return string
6388
     */
6389
    public static function str_isubstr_after_last_separator(
6390
        string $str,
6391
        string $separator,
6392
        string $encoding = 'UTF-8'
6393
    ): string {
6394 1
        if ($separator === '' || $str === '') {
6395 1
            return '';
6396
        }
6397
6398 1
        $offset = self::strripos($str, $separator);
6399 1
        if ($offset === false) {
6400 1
            return '';
6401
        }
6402
6403 1
        if ($encoding === 'UTF-8') {
6404 1
            return (string) \mb_substr(
6405 1
                $str,
6406 1
                $offset + (int) self::strlen($separator)
6407
            );
6408
        }
6409
6410
        return (string) self::substr(
6411
            $str,
6412
            $offset + (int) self::strlen($separator, $encoding),
6413
            null,
6414
            $encoding
6415
        );
6416
    }
6417
6418
    /**
6419
     * Gets the substring before the first occurrence of a separator.
6420
     *
6421
     * @param string $str       <p>The input string.</p>
6422
     * @param string $separator <p>The string separator.</p>
6423
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
6424
     *
6425
     * @psalm-pure
6426
     *
6427
     * @return string
6428
     */
6429
    public static function str_isubstr_before_first_separator(
6430
        string $str,
6431
        string $separator,
6432
        string $encoding = 'UTF-8'
6433
    ): string {
6434 1
        if ($separator === '' || $str === '') {
6435 1
            return '';
6436
        }
6437
6438 1
        $offset = self::stripos($str, $separator);
6439 1
        if ($offset === false) {
6440 1
            return '';
6441
        }
6442
6443 1
        if ($encoding === 'UTF-8') {
6444 1
            return (string) \mb_substr($str, 0, $offset);
6445
        }
6446
6447
        return (string) self::substr($str, 0, $offset, $encoding);
6448
    }
6449
6450
    /**
6451
     * Gets the substring before the last occurrence of a separator.
6452
     *
6453
     * @param string $str       <p>The input string.</p>
6454
     * @param string $separator <p>The string separator.</p>
6455
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
6456
     *
6457
     * @psalm-pure
6458
     *
6459
     * @return string
6460
     */
6461
    public static function str_isubstr_before_last_separator(
6462
        string $str,
6463
        string $separator,
6464
        string $encoding = 'UTF-8'
6465
    ): string {
6466 1
        if ($separator === '' || $str === '') {
6467 1
            return '';
6468
        }
6469
6470 1
        if ($encoding === 'UTF-8') {
6471 1
            $offset = \mb_strripos($str, $separator);
6472 1
            if ($offset === false) {
6473 1
                return '';
6474
            }
6475
6476 1
            return (string) \mb_substr($str, 0, $offset);
6477
        }
6478
6479
        $offset = self::strripos($str, $separator, 0, $encoding);
6480
        if ($offset === false) {
6481
            return '';
6482
        }
6483
6484
        return (string) self::substr($str, 0, $offset, $encoding);
6485
    }
6486
6487
    /**
6488
     * Gets the substring after (or before via "$before_needle") the first occurrence of the "$needle".
6489
     *
6490
     * @param string $str           <p>The input string.</p>
6491
     * @param string $needle        <p>The string to look for.</p>
6492
     * @param bool   $before_needle [optional] <p>Default: false</p>
6493
     * @param string $encoding      [optional] <p>Default: 'UTF-8'</p>
6494
     *
6495
     * @psalm-pure
6496
     *
6497
     * @return string
6498
     */
6499
    public static function str_isubstr_first(
6500
        string $str,
6501
        string $needle,
6502
        bool $before_needle = false,
6503
        string $encoding = 'UTF-8'
6504
    ): string {
6505
        if (
6506 2
            $needle === ''
6507
            ||
6508 2
            $str === ''
6509
        ) {
6510 2
            return '';
6511
        }
6512
6513 2
        $part = self::stristr(
6514 2
            $str,
6515 2
            $needle,
6516 2
            $before_needle,
6517 2
            $encoding
6518
        );
6519 2
        if ($part === false) {
6520 2
            return '';
6521
        }
6522
6523 2
        return $part;
6524
    }
6525
6526
    /**
6527
     * Gets the substring after (or before via "$before_needle") the last occurrence of the "$needle".
6528
     *
6529
     * @param string $str           <p>The input string.</p>
6530
     * @param string $needle        <p>The string to look for.</p>
6531
     * @param bool   $before_needle [optional] <p>Default: false</p>
6532
     * @param string $encoding      [optional] <p>Default: 'UTF-8'</p>
6533
     *
6534
     * @psalm-pure
6535
     *
6536
     * @return string
6537
     */
6538
    public static function str_isubstr_last(
6539
        string $str,
6540
        string $needle,
6541
        bool $before_needle = false,
6542
        string $encoding = 'UTF-8'
6543
    ): string {
6544
        if (
6545 1
            $needle === ''
6546
            ||
6547 1
            $str === ''
6548
        ) {
6549 1
            return '';
6550
        }
6551
6552 1
        $part = self::strrichr(
6553 1
            $str,
6554 1
            $needle,
6555 1
            $before_needle,
6556 1
            $encoding
6557
        );
6558 1
        if ($part === false) {
6559 1
            return '';
6560
        }
6561
6562 1
        return $part;
6563
    }
6564
6565
    /**
6566
     * Returns the last $n characters of the string.
6567
     *
6568
     * @param string $str      <p>The input string.</p>
6569
     * @param int    $n        <p>Number of characters to retrieve from the end.</p>
6570
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6571
     *
6572
     * @psalm-pure
6573
     *
6574
     * @return string
6575
     */
6576
    public static function str_last_char(
6577
        string $str,
6578
        int $n = 1,
6579
        string $encoding = 'UTF-8'
6580
    ): string {
6581 12
        if ($str === '' || $n <= 0) {
6582 4
            return '';
6583
        }
6584
6585 8
        if ($encoding === 'UTF-8') {
6586 4
            return (string) \mb_substr($str, -$n);
6587
        }
6588
6589 4
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
6590
6591 4
        return (string) self::substr($str, -$n, null, $encoding);
6592
    }
6593
6594
    /**
6595
     * Limit the number of characters in a string.
6596
     *
6597
     * @param string $str        <p>The input string.</p>
6598
     * @param int    $length     [optional] <p>Default: 100</p>
6599
     * @param string $str_add_on [optional] <p>Default: …</p>
6600
     * @param string $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
6601
     *
6602
     * @psalm-pure
6603
     *
6604
     * @return string
6605
     */
6606
    public static function str_limit(
6607
        string $str,
6608
        int $length = 100,
6609
        string $str_add_on = '…',
6610
        string $encoding = 'UTF-8'
6611
    ): string {
6612 2
        if ($str === '' || $length <= 0) {
6613 2
            return '';
6614
        }
6615
6616 2
        if ($encoding === 'UTF-8') {
6617 2
            if ((int) \mb_strlen($str) <= $length) {
6618 2
                return $str;
6619
            }
6620
6621
            /** @noinspection UnnecessaryCastingInspection */
6622 2
            return (string) \mb_substr($str, 0, $length - (int) self::strlen($str_add_on)) . $str_add_on;
6623
        }
6624
6625
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
6626
6627
        if ((int) self::strlen($str, $encoding) <= $length) {
6628
            return $str;
6629
        }
6630
6631
        return ((string) self::substr($str, 0, $length - (int) self::strlen($str_add_on), $encoding)) . $str_add_on;
6632
    }
6633
6634
    /**
6635
     * Limit the number of characters in a string, but also after the next word.
6636
     *
6637
     * EXAMPLE: <code>UTF8::str_limit_after_word('fòô bàř fòô', 8, ''); // 'fòô bàř'</code>
6638
     *
6639
     * @param string $str        <p>The input string.</p>
6640
     * @param int    $length     [optional] <p>Default: 100</p>
6641
     * @param string $str_add_on [optional] <p>Default: …</p>
6642
     * @param string $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
6643
     *
6644
     * @psalm-pure
6645
     *
6646
     * @return string
6647
     */
6648
    public static function str_limit_after_word(
6649
        string $str,
6650
        int $length = 100,
6651
        string $str_add_on = '…',
6652
        string $encoding = 'UTF-8'
6653
    ): string {
6654 6
        if ($str === '' || $length <= 0) {
6655 2
            return '';
6656
        }
6657
6658 6
        if ($encoding === 'UTF-8') {
6659 2
            if ((int) \mb_strlen($str) <= $length) {
6660 2
                return $str;
6661
            }
6662
6663 2
            if (\mb_substr($str, $length - 1, 1) === ' ') {
6664 2
                return ((string) \mb_substr($str, 0, $length - 1)) . $str_add_on;
6665
            }
6666
6667 2
            $str = \mb_substr($str, 0, $length);
6668
6669 2
            $array = \explode(' ', $str, -1);
6670 2
            $new_str = \implode(' ', $array);
6671
6672 2
            if ($new_str === '') {
6673 2
                return ((string) \mb_substr($str, 0, $length - 1)) . $str_add_on;
6674
            }
6675
        } else {
6676 4
            if ((int) self::strlen($str, $encoding) <= $length) {
6677
                return $str;
6678
            }
6679
6680 4
            if (self::substr($str, $length - 1, 1, $encoding) === ' ') {
6681 3
                return ((string) self::substr($str, 0, $length - 1, $encoding)) . $str_add_on;
6682
            }
6683
6684
            /** @noinspection CallableParameterUseCaseInTypeContextInspection - FP */
6685 1
            $str = self::substr($str, 0, $length, $encoding);
6686 1
            if ($str === false) {
6687
                return '' . $str_add_on;
6688
            }
6689
6690 1
            $array = \explode(' ', $str, -1);
6691 1
            $new_str = \implode(' ', $array);
6692
6693 1
            if ($new_str === '') {
6694
                return ((string) self::substr($str, 0, $length - 1, $encoding)) . $str_add_on;
6695
            }
6696
        }
6697
6698 3
        return $new_str . $str_add_on;
6699
    }
6700
6701
    /**
6702
     * Returns the longest common prefix between the $str1 and $str2.
6703
     *
6704
     * @param string $str1     <p>The input sting.</p>
6705
     * @param string $str2     <p>Second string for comparison.</p>
6706
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6707
     *
6708
     * @psalm-pure
6709
     *
6710
     * @return string
6711
     */
6712
    public static function str_longest_common_prefix(
6713
        string $str1,
6714
        string $str2,
6715
        string $encoding = 'UTF-8'
6716
    ): string {
6717
        // init
6718 10
        $longest_common_prefix = '';
6719
6720 10
        if ($encoding === 'UTF-8') {
6721 5
            $max_length = (int) \min(
6722 5
                \mb_strlen($str1),
6723 5
                \mb_strlen($str2)
6724
            );
6725
6726 5
            for ($i = 0; $i < $max_length; ++$i) {
6727 4
                $char = \mb_substr($str1, $i, 1);
6728
6729
                if (
6730 4
                    $char !== false
6731
                    &&
6732 4
                    $char === \mb_substr($str2, $i, 1)
6733
                ) {
6734 3
                    $longest_common_prefix .= $char;
6735
                } else {
6736 3
                    break;
6737
                }
6738
            }
6739
        } else {
6740 5
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
6741
6742 5
            $max_length = (int) \min(
6743 5
                self::strlen($str1, $encoding),
6744 5
                self::strlen($str2, $encoding)
6745
            );
6746
6747 5
            for ($i = 0; $i < $max_length; ++$i) {
6748 4
                $char = self::substr($str1, $i, 1, $encoding);
6749
6750
                if (
6751 4
                    $char !== false
6752
                    &&
6753 4
                    $char === self::substr($str2, $i, 1, $encoding)
6754
                ) {
6755 3
                    $longest_common_prefix .= $char;
6756
                } else {
6757 3
                    break;
6758
                }
6759
            }
6760
        }
6761
6762 10
        return $longest_common_prefix;
6763
    }
6764
6765
    /**
6766
     * Returns the longest common substring between the $str1 and $str2.
6767
     * In the case of ties, it returns that which occurs first.
6768
     *
6769
     * @param string $str1
6770
     * @param string $str2     <p>Second string for comparison.</p>
6771
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6772
     *
6773
     * @psalm-pure
6774
     *
6775
     * @return string
6776
     *                <p>A string with its $str being the longest common substring.</p>
6777
     */
6778
    public static function str_longest_common_substring(
6779
        string $str1,
6780
        string $str2,
6781
        string $encoding = 'UTF-8'
6782
    ): string {
6783 11
        if ($str1 === '' || $str2 === '') {
6784 2
            return '';
6785
        }
6786
6787
        // Uses dynamic programming to solve
6788
        // http://en.wikipedia.org/wiki/Longest_common_substring_problem
6789
6790 9
        if ($encoding === 'UTF-8') {
6791 4
            $str_length = (int) \mb_strlen($str1);
6792 4
            $other_length = (int) \mb_strlen($str2);
6793
        } else {
6794 5
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
6795
6796 5
            $str_length = (int) self::strlen($str1, $encoding);
6797 5
            $other_length = (int) self::strlen($str2, $encoding);
6798
        }
6799
6800
        // Return if either string is empty
6801 9
        if ($str_length === 0 || $other_length === 0) {
6802
            return '';
6803
        }
6804
6805 9
        $len = 0;
6806 9
        $end = 0;
6807 9
        $table = \array_fill(
6808 9
            0,
6809 9
            $str_length + 1,
6810 9
            \array_fill(0, $other_length + 1, 0)
6811
        );
6812
6813 9
        if ($encoding === 'UTF-8') {
6814 9
            for ($i = 1; $i <= $str_length; ++$i) {
6815 9
                for ($j = 1; $j <= $other_length; ++$j) {
6816 9
                    $str_char = \mb_substr($str1, $i - 1, 1);
6817 9
                    $other_char = \mb_substr($str2, $j - 1, 1);
6818
6819 9
                    if ($str_char === $other_char) {
6820 8
                        $table[$i][$j] = $table[$i - 1][$j - 1] + 1;
6821 8
                        if ($table[$i][$j] > $len) {
6822 8
                            $len = $table[$i][$j];
6823 8
                            $end = $i;
6824
                        }
6825
                    } else {
6826 9
                        $table[$i][$j] = 0;
6827
                    }
6828
                }
6829
            }
6830
        } else {
6831
            for ($i = 1; $i <= $str_length; ++$i) {
6832
                for ($j = 1; $j <= $other_length; ++$j) {
6833
                    $str_char = self::substr($str1, $i - 1, 1, $encoding);
6834
                    $other_char = self::substr($str2, $j - 1, 1, $encoding);
6835
6836
                    if ($str_char === $other_char) {
6837
                        $table[$i][$j] = $table[$i - 1][$j - 1] + 1;
6838
                        if ($table[$i][$j] > $len) {
6839
                            $len = $table[$i][$j];
6840
                            $end = $i;
6841
                        }
6842
                    } else {
6843
                        $table[$i][$j] = 0;
6844
                    }
6845
                }
6846
            }
6847
        }
6848
6849 9
        if ($encoding === 'UTF-8') {
6850 9
            return (string) \mb_substr($str1, $end - $len, $len);
6851
        }
6852
6853
        return (string) self::substr($str1, $end - $len, $len, $encoding);
6854
    }
6855
6856
    /**
6857
     * Returns the longest common suffix between the $str1 and $str2.
6858
     *
6859
     * @param string $str1
6860
     * @param string $str2     <p>Second string for comparison.</p>
6861
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6862
     *
6863
     * @psalm-pure
6864
     *
6865
     * @return string
6866
     */
6867
    public static function str_longest_common_suffix(
6868
        string $str1,
6869
        string $str2,
6870
        string $encoding = 'UTF-8'
6871
    ): string {
6872 10
        if ($str1 === '' || $str2 === '') {
6873 2
            return '';
6874
        }
6875
6876 8
        if ($encoding === 'UTF-8') {
6877 4
            $max_length = (int) \min(
6878 4
                \mb_strlen($str1, $encoding),
6879 4
                \mb_strlen($str2, $encoding)
6880
            );
6881
6882 4
            $longest_common_suffix = '';
6883 4
            for ($i = 1; $i <= $max_length; ++$i) {
6884 4
                $char = \mb_substr($str1, -$i, 1);
6885
6886
                if (
6887 4
                    $char !== false
6888
                    &&
6889 4
                    $char === \mb_substr($str2, -$i, 1)
6890
                ) {
6891 3
                    $longest_common_suffix = $char . $longest_common_suffix;
6892
                } else {
6893 3
                    break;
6894
                }
6895
            }
6896
        } else {
6897 4
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
6898
6899 4
            $max_length = (int) \min(
6900 4
                self::strlen($str1, $encoding),
6901 4
                self::strlen($str2, $encoding)
6902
            );
6903
6904 4
            $longest_common_suffix = '';
6905 4
            for ($i = 1; $i <= $max_length; ++$i) {
6906 4
                $char = self::substr($str1, -$i, 1, $encoding);
6907
6908
                if (
6909 4
                    $char !== false
6910
                    &&
6911 4
                    $char === self::substr($str2, -$i, 1, $encoding)
6912
                ) {
6913 3
                    $longest_common_suffix = $char . $longest_common_suffix;
6914
                } else {
6915 3
                    break;
6916
                }
6917
            }
6918
        }
6919
6920 8
        return $longest_common_suffix;
6921
    }
6922
6923
    /**
6924
     * Returns true if $str matches the supplied pattern, false otherwise.
6925
     *
6926
     * @param string $str     <p>The input string.</p>
6927
     * @param string $pattern <p>Regex pattern to match against.</p>
6928
     *
6929
     * @psalm-pure
6930
     *
6931
     * @return bool
6932
     *              <p>Whether or not $str matches the pattern.</p>
6933
     */
6934
    public static function str_matches_pattern(string $str, string $pattern): bool
6935
    {
6936 10
        return (bool) \preg_match('/' . $pattern . '/u', $str);
6937
    }
6938
6939
    /**
6940
     * Returns whether or not a character exists at an index. Offsets may be
6941
     * negative to count from the last character in the string. Implements
6942
     * part of the ArrayAccess interface.
6943
     *
6944
     * @param string $str      <p>The input string.</p>
6945
     * @param int    $offset   <p>The index to check.</p>
6946
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6947
     *
6948
     * @psalm-pure
6949
     *
6950
     * @return bool
6951
     *              <p>Whether or not the index exists.</p>
6952
     */
6953
    public static function str_offset_exists(string $str, int $offset, string $encoding = 'UTF-8'): bool
6954
    {
6955
        // init
6956 6
        $length = (int) self::strlen($str, $encoding);
6957
6958 6
        if ($offset >= 0) {
6959 3
            return $length > $offset;
6960
        }
6961
6962 3
        return $length >= \abs($offset);
6963
    }
6964
6965
    /**
6966
     * Returns the character at the given index. Offsets may be negative to
6967
     * count from the last character in the string. Implements part of the
6968
     * ArrayAccess interface, and throws an OutOfBoundsException if the index
6969
     * does not exist.
6970
     *
6971
     * @param string $str      <p>The input string.</p>
6972
     * @param int    $index    <p>The <strong>index</strong> from which to retrieve the char.</p>
6973
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6974
     *
6975
     * @throws \OutOfBoundsException if the positive or negative offset does not exist
6976
     *
6977
     * @return string
6978
     *                <p>The character at the specified index.</p>
6979
     *
6980
     * @psalm-pure
6981
     */
6982
    public static function str_offset_get(string $str, int $index, string $encoding = 'UTF-8'): string
6983
    {
6984
        // init
6985 2
        $length = (int) self::strlen($str);
6986
6987
        if (
6988 2
            ($index >= 0 && $length <= $index)
6989
            ||
6990 2
            $length < \abs($index)
6991
        ) {
6992 1
            throw new \OutOfBoundsException('No character exists at the index');
6993
        }
6994
6995 1
        return self::char_at($str, $index, $encoding);
6996
    }
6997
6998
    /**
6999
     * Pad a UTF-8 string to a given length with another string.
7000
     *
7001
     * EXAMPLE: <code>UTF8::str_pad('中文空白', 10, '_', STR_PAD_BOTH); // '___中文空白___'</code>
7002
     *
7003
     * @param string     $str        <p>The input string.</p>
7004
     * @param int        $pad_length <p>The length of return string.</p>
7005
     * @param string     $pad_string [optional] <p>String to use for padding the input string.</p>
7006
     * @param int|string $pad_type   [optional] <p>
7007
     *                               Can be <strong>STR_PAD_RIGHT</strong> (default), [or string "right"]<br>
7008
     *                               <strong>STR_PAD_LEFT</strong> [or string "left"] or<br>
7009
     *                               <strong>STR_PAD_BOTH</strong> [or string "both"]
7010
     *                               </p>
7011
     * @param string     $encoding   [optional] <p>Default: 'UTF-8'</p>
7012
     *
7013
     * @psalm-pure
7014
     *
7015
     * @return string
7016
     *                <p>Returns the padded string.</p>
7017
     */
7018
    public static function str_pad(
7019
        string $str,
7020
        int $pad_length,
7021
        string $pad_string = ' ',
7022
        $pad_type = \STR_PAD_RIGHT,
7023
        string $encoding = 'UTF-8'
7024
    ): string {
7025 41
        if ($pad_length === 0 || $pad_string === '') {
7026 1
            return $str;
7027
        }
7028
7029 41
        if ($pad_type !== (int) $pad_type) {
7030 13
            if ($pad_type === 'left') {
7031 3
                $pad_type = \STR_PAD_LEFT;
7032 10
            } elseif ($pad_type === 'right') {
7033 6
                $pad_type = \STR_PAD_RIGHT;
7034 4
            } elseif ($pad_type === 'both') {
7035 3
                $pad_type = \STR_PAD_BOTH;
7036
            } else {
7037 1
                throw new \InvalidArgumentException(
7038 1
                    'Pad expects $pad_type to be "STR_PAD_*" or ' . "to be one of 'left', 'right' or 'both'"
7039
                );
7040
            }
7041
        }
7042
7043 40
        if ($encoding === 'UTF-8') {
7044 25
            $str_length = (int) \mb_strlen($str);
7045
7046 25
            if ($pad_length >= $str_length) {
7047
                switch ($pad_type) {
7048 25
                    case \STR_PAD_LEFT:
7049 8
                        $ps_length = (int) \mb_strlen($pad_string);
7050
7051 8
                        $diff = ($pad_length - $str_length);
7052
7053 8
                        $pre = (string) \mb_substr(
7054 8
                            \str_repeat($pad_string, (int) \ceil($diff / $ps_length)),
7055 8
                            0,
7056 8
                            $diff
7057
                        );
7058 8
                        $post = '';
7059
7060 8
                        break;
7061
7062 20
                    case \STR_PAD_BOTH:
7063 14
                        $diff = ($pad_length - $str_length);
7064
7065 14
                        $ps_length_left = (int) \floor($diff / 2);
7066
7067 14
                        $ps_length_right = (int) \ceil($diff / 2);
7068
7069 14
                        $pre = (string) \mb_substr(
7070 14
                            \str_repeat($pad_string, $ps_length_left),
7071 14
                            0,
7072 14
                            $ps_length_left
7073
                        );
7074 14
                        $post = (string) \mb_substr(
7075 14
                            \str_repeat($pad_string, $ps_length_right),
7076 14
                            0,
7077 14
                            $ps_length_right
7078
                        );
7079
7080 14
                        break;
7081
7082 9
                    case \STR_PAD_RIGHT:
7083
                    default:
7084 9
                        $ps_length = (int) \mb_strlen($pad_string);
7085
7086 9
                        $diff = ($pad_length - $str_length);
7087
7088 9
                        $post = (string) \mb_substr(
7089 9
                            \str_repeat($pad_string, (int) \ceil($diff / $ps_length)),
7090 9
                            0,
7091 9
                            $diff
7092
                        );
7093 9
                        $pre = '';
7094
                }
7095
7096 25
                return $pre . $str . $post;
7097
            }
7098
7099 3
            return $str;
7100
        }
7101
7102 15
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
7103
7104 15
        $str_length = (int) self::strlen($str, $encoding);
7105
7106 15
        if ($pad_length >= $str_length) {
7107
            switch ($pad_type) {
7108 14
                case \STR_PAD_LEFT:
7109 5
                    $ps_length = (int) self::strlen($pad_string, $encoding);
7110
7111 5
                    $diff = ($pad_length - $str_length);
7112
7113 5
                    $pre = (string) self::substr(
7114 5
                        \str_repeat($pad_string, (int) \ceil($diff / $ps_length)),
7115 5
                        0,
7116 5
                        $diff,
7117 5
                        $encoding
7118
                    );
7119 5
                    $post = '';
7120
7121 5
                    break;
7122
7123 9
                case \STR_PAD_BOTH:
7124 3
                    $diff = ($pad_length - $str_length);
7125
7126 3
                    $ps_length_left = (int) \floor($diff / 2);
7127
7128 3
                    $ps_length_right = (int) \ceil($diff / 2);
7129
7130 3
                    $pre = (string) self::substr(
7131 3
                        \str_repeat($pad_string, $ps_length_left),
7132 3
                        0,
7133 3
                        $ps_length_left,
7134 3
                        $encoding
7135
                    );
7136 3
                    $post = (string) self::substr(
7137 3
                        \str_repeat($pad_string, $ps_length_right),
7138 3
                        0,
7139 3
                        $ps_length_right,
7140 3
                        $encoding
7141
                    );
7142
7143 3
                    break;
7144
7145 6
                case \STR_PAD_RIGHT:
7146
                default:
7147 6
                    $ps_length = (int) self::strlen($pad_string, $encoding);
7148
7149 6
                    $diff = ($pad_length - $str_length);
7150
7151 6
                    $post = (string) self::substr(
7152 6
                        \str_repeat($pad_string, (int) \ceil($diff / $ps_length)),
7153 6
                        0,
7154 6
                        $diff,
7155 6
                        $encoding
7156
                    );
7157 6
                    $pre = '';
7158
            }
7159
7160 14
            return $pre . $str . $post;
7161
        }
7162
7163 1
        return $str;
7164
    }
7165
7166
    /**
7167
     * Returns a new string of a given length such that both sides of the
7168
     * string are padded. Alias for "UTF8::str_pad()" with a $pad_type of 'both'.
7169
     *
7170
     * @param string $str
7171
     * @param int    $length   <p>Desired string length after padding.</p>
7172
     * @param string $pad_str  [optional] <p>String used to pad, defaults to space. Default: ' '</p>
7173
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
7174
     *
7175
     * @psalm-pure
7176
     *
7177
     * @return string
7178
     *                <p>The string with padding applied.</p>
7179
     */
7180
    public static function str_pad_both(
7181
        string $str,
7182
        int $length,
7183
        string $pad_str = ' ',
7184
        string $encoding = 'UTF-8'
7185
    ): string {
7186 11
        return self::str_pad(
7187 11
            $str,
7188 11
            $length,
7189 11
            $pad_str,
7190 11
            \STR_PAD_BOTH,
7191 11
            $encoding
7192
        );
7193
    }
7194
7195
    /**
7196
     * Returns a new string of a given length such that the beginning of the
7197
     * string is padded. Alias for "UTF8::str_pad()" with a $pad_type of 'left'.
7198
     *
7199
     * @param string $str
7200
     * @param int    $length   <p>Desired string length after padding.</p>
7201
     * @param string $pad_str  [optional] <p>String used to pad, defaults to space. Default: ' '</p>
7202
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
7203
     *
7204
     * @psalm-pure
7205
     *
7206
     * @return string
7207
     *                <p>The string with left padding.</p>
7208
     */
7209
    public static function str_pad_left(
7210
        string $str,
7211
        int $length,
7212
        string $pad_str = ' ',
7213
        string $encoding = 'UTF-8'
7214
    ): string {
7215 7
        return self::str_pad(
7216 7
            $str,
7217 7
            $length,
7218 7
            $pad_str,
7219 7
            \STR_PAD_LEFT,
7220 7
            $encoding
7221
        );
7222
    }
7223
7224
    /**
7225
     * Returns a new string of a given length such that the end of the string
7226
     * is padded. Alias for "UTF8::str_pad()" with a $pad_type of 'right'.
7227
     *
7228
     * @param string $str
7229
     * @param int    $length   <p>Desired string length after padding.</p>
7230
     * @param string $pad_str  [optional] <p>String used to pad, defaults to space. Default: ' '</p>
7231
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
7232
     *
7233
     * @psalm-pure
7234
     *
7235
     * @return string
7236
     *                <p>The string with right padding.</p>
7237
     */
7238
    public static function str_pad_right(
7239
        string $str,
7240
        int $length,
7241
        string $pad_str = ' ',
7242
        string $encoding = 'UTF-8'
7243
    ): string {
7244 7
        return self::str_pad(
7245 7
            $str,
7246 7
            $length,
7247 7
            $pad_str,
7248 7
            \STR_PAD_RIGHT,
7249 7
            $encoding
7250
        );
7251
    }
7252
7253
    /**
7254
     * Repeat a string.
7255
     *
7256
     * EXAMPLE: <code>UTF8::str_repeat("°~\xf0\x90\x28\xbc", 2); // '°~ð(¼°~ð(¼'</code>
7257
     *
7258
     * @param string $str        <p>
7259
     *                           The string to be repeated.
7260
     *                           </p>
7261
     * @param int    $multiplier <p>
7262
     *                           Number of time the input string should be
7263
     *                           repeated.
7264
     *                           </p>
7265
     *                           <p>
7266
     *                           multiplier has to be greater than or equal to 0.
7267
     *                           If the multiplier is set to 0, the function
7268
     *                           will return an empty string.
7269
     *                           </p>
7270
     *
7271
     * @psalm-pure
7272
     *
7273
     * @return string
7274
     *                <p>The repeated string.</p>
7275
     */
7276
    public static function str_repeat(string $str, int $multiplier): string
7277
    {
7278 9
        $str = self::filter($str);
7279
7280 9
        return \str_repeat($str, $multiplier);
7281
    }
7282
7283
    /**
7284
     * INFO: This is only a wrapper for "str_replace()"  -> the original functions is already UTF-8 safe.
7285
     *
7286
     * Replace all occurrences of the search string with the replacement string
7287
     *
7288
     * @see http://php.net/manual/en/function.str-replace.php
7289
     *
7290
     * @param string|string[] $search  <p>
7291
     *                                 The value being searched for, otherwise known as the needle.
7292
     *                                 An array may be used to designate multiple needles.
7293
     *                                 </p>
7294
     * @param string|string[] $replace <p>
7295
     *                                 The replacement value that replaces found search
7296
     *                                 values. An array may be used to designate multiple replacements.
7297
     *                                 </p>
7298
     * @param string|string[] $subject <p>
7299
     *                                 The string or array of strings being searched and replaced on,
7300
     *                                 otherwise known as the haystack.
7301
     *                                 </p>
7302
     *                                 <p>
7303
     *                                 If subject is an array, then the search and
7304
     *                                 replace is performed with every entry of
7305
     *                                 subject, and the return value is an array as
7306
     *                                 well.
7307
     *                                 </p>
7308
     * @param int|null        $count   [optional] <p>
7309
     *                                 If passed, this will hold the number of matched and replaced needles.
7310
     *                                 </p>
7311
     *
7312
     * @psalm-pure
7313
     *
7314
     * @return string|string[]
7315
     *                         <p>This function returns a string or an array with the replaced values.</p>
7316
     *
7317
     * @template TStrReplaceSubject
7318
     * @phpstan-param TStrReplaceSubject $subject
7319
     * @phpstan-return TStrReplaceSubject
7320
     *
7321
     * @deprecated please use \str_replace() instead
7322
     */
7323
    public static function str_replace(
7324
        $search,
7325
        $replace,
7326
        $subject,
7327
        int &$count = null
7328
    ) {
7329
        /**
7330
         * @psalm-suppress PossiblyNullArgument
7331
         * @phpstan-var TStrReplaceSubject $return;
7332
         */
7333 12
        $return = \str_replace(
7334 12
            $search,
7335 12
            $replace,
7336 12
            $subject,
7337 12
            $count
7338
        );
7339
7340 12
        return $return;
7341
    }
7342
7343
    /**
7344
     * Replaces $search from the beginning of string with $replacement.
7345
     *
7346
     * @param string $str         <p>The input string.</p>
7347
     * @param string $search      <p>The string to search for.</p>
7348
     * @param string $replacement <p>The replacement.</p>
7349
     *
7350
     * @psalm-pure
7351
     *
7352
     * @return string
7353
     *                <p>A string after the replacements.</p>
7354
     */
7355
    public static function str_replace_beginning(
7356
        string $str,
7357
        string $search,
7358
        string $replacement
7359
    ): string {
7360 17
        if ($str === '') {
7361 4
            if ($replacement === '') {
7362 2
                return '';
7363
            }
7364
7365 2
            if ($search === '') {
7366 2
                return $replacement;
7367
            }
7368
        }
7369
7370 13
        if ($search === '') {
7371 2
            return $str . $replacement;
7372
        }
7373
7374 11
        $searchLength = \strlen($search);
7375 11
        if (\strncmp($str, $search, $searchLength) === 0) {
7376 9
            return $replacement . \substr($str, $searchLength);
7377
        }
7378
7379 2
        return $str;
7380
    }
7381
7382
    /**
7383
     * Replaces $search from the ending of string with $replacement.
7384
     *
7385
     * @param string $str         <p>The input string.</p>
7386
     * @param string $search      <p>The string to search for.</p>
7387
     * @param string $replacement <p>The replacement.</p>
7388
     *
7389
     * @psalm-pure
7390
     *
7391
     * @return string
7392
     *                <p>A string after the replacements.</p>
7393
     */
7394
    public static function str_replace_ending(
7395
        string $str,
7396
        string $search,
7397
        string $replacement
7398
    ): string {
7399 17
        if ($str === '') {
7400 4
            if ($replacement === '') {
7401 2
                return '';
7402
            }
7403
7404 2
            if ($search === '') {
7405 2
                return $replacement;
7406
            }
7407
        }
7408
7409 13
        if ($search === '') {
7410 2
            return $str . $replacement;
7411
        }
7412
7413 11
        if (\strpos($str, $search, \strlen($str) - \strlen($search)) !== false) {
7414 8
            $str = \substr($str, 0, -\strlen($search)) . $replacement;
7415
        }
7416
7417 11
        return $str;
7418
    }
7419
7420
    /**
7421
     * Replace the first "$search"-term with the "$replace"-term.
7422
     *
7423
     * @param string $search
7424
     * @param string $replace
7425
     * @param string $subject
7426
     *
7427
     * @psalm-pure
7428
     *
7429
     * @return string
7430
     *
7431
     * @psalm-suppress InvalidReturnType
7432
     */
7433
    public static function str_replace_first(
7434
        string $search,
7435
        string $replace,
7436
        string $subject
7437
    ): string {
7438 2
        $pos = self::strpos($subject, $search);
7439
7440 2
        if ($pos !== false) {
7441
            /**
7442
             * @psalm-suppress InvalidReturnStatement
7443
             */
7444 2
            return self::substr_replace(
7445 2
                $subject,
7446 2
                $replace,
7447 2
                $pos,
7448 2
                (int) self::strlen($search)
7449
            );
7450
        }
7451
7452 2
        return $subject;
7453
    }
7454
7455
    /**
7456
     * Replace the last "$search"-term with the "$replace"-term.
7457
     *
7458
     * @param string $search
7459
     * @param string $replace
7460
     * @param string $subject
7461
     *
7462
     * @psalm-pure
7463
     *
7464
     * @return string
7465
     *
7466
     * @psalm-suppress InvalidReturnType
7467
     */
7468
    public static function str_replace_last(
7469
        string $search,
7470
        string $replace,
7471
        string $subject
7472
    ): string {
7473 2
        $pos = self::strrpos($subject, $search);
7474 2
        if ($pos !== false) {
7475
            /**
7476
             * @psalm-suppress InvalidReturnStatement
7477
             */
7478 2
            return self::substr_replace(
7479 2
                $subject,
7480 2
                $replace,
7481 2
                $pos,
7482 2
                (int) self::strlen($search)
7483
            );
7484
        }
7485
7486 2
        return $subject;
7487
    }
7488
7489
    /**
7490
     * Shuffles all the characters in the string.
7491
     *
7492
     * INFO: uses random algorithm which is weak for cryptography purposes
7493
     *
7494
     * EXAMPLE: <code>UTF8::str_shuffle('fòô bàř fòô'); // 'àòôřb ffòô '</code>
7495
     *
7496
     * @param string $str      <p>The input string</p>
7497
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
7498
     *
7499
     * @return string
7500
     *                <p>The shuffled string.</p>
7501
     */
7502
    public static function str_shuffle(string $str, string $encoding = 'UTF-8'): string
7503
    {
7504 5
        if ($encoding === 'UTF-8') {
7505 5
            $indexes = \range(0, (int) \mb_strlen($str) - 1);
7506 5
            \shuffle($indexes);
7507
7508
            // init
7509 5
            $shuffled_str = '';
7510
7511 5
            foreach ($indexes as &$i) {
7512 5
                $tmp_sub_str = \mb_substr($str, $i, 1);
7513 5
                if ($tmp_sub_str !== false) {
7514 5
                    $shuffled_str .= $tmp_sub_str;
7515
                }
7516
            }
7517
        } else {
7518
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
7519
7520
            $indexes = \range(0, (int) self::strlen($str, $encoding) - 1);
7521
            \shuffle($indexes);
7522
7523
            // init
7524
            $shuffled_str = '';
7525
7526
            foreach ($indexes as &$i) {
7527
                $tmp_sub_str = self::substr($str, $i, 1, $encoding);
7528
                if ($tmp_sub_str !== false) {
7529
                    $shuffled_str .= $tmp_sub_str;
7530
                }
7531
            }
7532
        }
7533
7534 5
        return $shuffled_str;
7535
    }
7536
7537
    /**
7538
     * Returns the substring beginning at $start, and up to, but not including
7539
     * the index specified by $end. If $end is omitted, the function extracts
7540
     * the remaining string. If $end is negative, it is computed from the end
7541
     * of the string.
7542
     *
7543
     * @param string   $str
7544
     * @param int      $start    <p>Initial index from which to begin extraction.</p>
7545
     * @param int|null $end      [optional] <p>Index at which to end extraction. Default: null</p>
7546
     * @param string   $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
7547
     *
7548
     * @psalm-pure
7549
     *
7550
     * @return false|string
7551
     *                      <p>The extracted substring.</p><p>If <i>str</i> is shorter than <i>start</i>
7552
     *                      characters long, <b>FALSE</b> will be returned.
7553
     */
7554
    public static function str_slice(
7555
        string $str,
7556
        int $start,
7557
        int $end = null,
7558
        string $encoding = 'UTF-8'
7559
    ) {
7560 18
        if ($encoding === 'UTF-8') {
7561 7
            if ($end === null) {
7562 1
                $length = (int) \mb_strlen($str);
7563 6
            } elseif ($end >= 0 && $end <= $start) {
7564 2
                return '';
7565 4
            } elseif ($end < 0) {
7566 1
                $length = (int) \mb_strlen($str) + $end - $start;
7567
            } else {
7568 3
                $length = $end - $start;
7569
            }
7570
7571 5
            return \mb_substr($str, $start, $length);
7572
        }
7573
7574 11
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
7575
7576 11
        if ($end === null) {
7577 5
            $length = (int) self::strlen($str, $encoding);
7578 6
        } elseif ($end >= 0 && $end <= $start) {
7579 2
            return '';
7580 4
        } elseif ($end < 0) {
7581 1
            $length = (int) self::strlen($str, $encoding) + $end - $start;
7582
        } else {
7583 3
            $length = $end - $start;
7584
        }
7585
7586 9
        return self::substr($str, $start, $length, $encoding);
7587
    }
7588
7589
    /**
7590
     * Convert a string to e.g.: "snake_case"
7591
     *
7592
     * @param string $str
7593
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
7594
     *
7595
     * @psalm-pure
7596
     *
7597
     * @return string
7598
     *                <p>A string in snake_case.</p>
7599
     */
7600
    public static function str_snakeize(string $str, string $encoding = 'UTF-8'): string
7601
    {
7602 22
        if ($str === '') {
7603
            return '';
7604
        }
7605
7606 22
        $str = \str_replace(
7607 22
            '-',
7608 22
            '_',
7609 22
            self::normalize_whitespace($str)
7610
        );
7611
7612 22
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
7613 19
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
7614
        }
7615
7616 22
        $str = (string) \preg_replace_callback(
7617 22
            '/([\\p{N}|\\p{Lu}])/u',
7618
            /**
7619
             * @param string[] $matches
7620
             *
7621
             * @psalm-pure
7622
             *
7623
             * @return string
7624
             */
7625
            static function (array $matches) use ($encoding): string {
7626 9
                $match = $matches[1];
7627 9
                $match_int = (int) $match;
7628
7629 9
                if ((string) $match_int === $match) {
7630 4
                    return '_' . $match . '_';
7631
                }
7632
7633 5
                if ($encoding === 'UTF-8') {
7634 5
                    return '_' . \mb_strtolower($match);
7635
                }
7636
7637
                return '_' . self::strtolower($match, $encoding);
7638 22
            },
7639 22
            $str
7640
        );
7641
7642 22
        $str = (string) \preg_replace(
7643
            [
7644 22
                '/\\s+/u',           // convert spaces to "_"
7645
                '/^\\s+|\\s+$/u', // trim leading & trailing spaces
7646
                '/_+/',                 // remove double "_"
7647
            ],
7648
            [
7649 22
                '_',
7650
                '',
7651
                '_',
7652
            ],
7653 22
            $str
7654
        );
7655
7656 22
        return \trim(\trim($str, '_')); // trim leading & trailing "_" + whitespace
7657
    }
7658
7659
    /**
7660
     * Sort all characters according to code points.
7661
     *
7662
     * EXAMPLE: <code>UTF8::str_sort('  -ABC-中文空白-  '); // '    ---ABC中文白空'</code>
7663
     *
7664
     * @param string $str    <p>A UTF-8 string.</p>
7665
     * @param bool   $unique <p>Sort unique. If <strong>true</strong>, repeated characters are ignored.</p>
7666
     * @param bool   $desc   <p>If <strong>true</strong>, will sort characters in reverse code point order.</p>
7667
     *
7668
     * @psalm-pure
7669
     *
7670
     * @return string
7671
     *                <p>A string of sorted characters.</p>
7672
     */
7673
    public static function str_sort(string $str, bool $unique = false, bool $desc = false): string
7674
    {
7675
        /** @var int[] $array */
7676 2
        $array = self::codepoints($str);
7677
7678 2
        if ($unique) {
7679 2
            $array = \array_flip(\array_flip($array));
7680
        }
7681
7682 2
        if ($desc) {
7683 2
            \arsort($array);
7684
        } else {
7685 2
            \asort($array);
7686
        }
7687
7688 2
        return self::string($array);
7689
    }
7690
7691
    /**
7692
     * Convert a string to an array of Unicode characters.
7693
     *
7694
     * EXAMPLE: <code>
7695
     * UTF8::str_split_array(['中文空白', 'test'], 2); // [['中文', '空白'], ['te', 'st']]
7696
     * </code>
7697
     *
7698
     * @param int[]|string[] $input                   <p>The string[] or int[] to split into array.</p>
7699
     * @param int            $length                  [optional] <p>Max character length of each array
7700
     *                                                lement.</p>
7701
     * @param bool           $clean_utf8              [optional] <p>Remove non UTF-8 chars from the
7702
     *                                                string.</p>
7703
     * @param bool           $try_to_use_mb_functions [optional] <p>Set to false, if you don't want to use
7704
     *                                                "mb_substr"</p>
7705
     *
7706
     * @psalm-pure
7707
     *
7708
     * @return string[][]
7709
     *                    <p>An array containing chunks of the input.</p>
7710
     */
7711
    public static function str_split_array(
7712
        array $input,
7713
        int $length = 1,
7714
        bool $clean_utf8 = false,
7715
        bool $try_to_use_mb_functions = true
7716
    ): array {
7717 1
        foreach ($input as &$v) {
7718 1
            $v = self::str_split(
7719 1
                $v,
7720 1
                $length,
7721 1
                $clean_utf8,
7722 1
                $try_to_use_mb_functions
7723
            );
7724
        }
7725
7726
        /** @var string[][] $input */
7727 1
        return $input;
7728
    }
7729
7730
    /**
7731
     * Convert a string to an array of unicode characters.
7732
     *
7733
     * EXAMPLE: <code>UTF8::str_split('中文空白'); // array('中', '文', '空', '白')</code>
7734
     *
7735
     * @param int|string $input                   <p>The string or int to split into array.</p>
7736
     * @param int        $length                  [optional] <p>Max character length of each array
7737
     *                                            element.</p>
7738
     * @param bool       $clean_utf8              [optional] <p>Remove non UTF-8 chars from the
7739
     *                                            string.</p>
7740
     * @param bool       $try_to_use_mb_functions [optional] <p>Set to false, if you don't want to use
7741
     *                                            "mb_substr"</p>
7742
     *
7743
     * @psalm-pure
7744
     *
7745
     * @return string[]
7746
     *                  <p>An array containing chunks of chars from the input.</p>
7747
     */
7748
    public static function str_split(
7749
        $input,
7750
        int $length = 1,
7751
        bool $clean_utf8 = false,
7752
        bool $try_to_use_mb_functions = true
7753
    ): array {
7754 90
        if ($length <= 0) {
7755 3
            return [];
7756
        }
7757
7758
        // this is only an old fallback
7759
        /** @noinspection PhpSillyAssignmentInspection - hack for phpstan */
7760
        /** @var int|int[]|string|string[] $input */
7761 89
        $input = $input;
7762 89
        if (\is_array($input)) {
7763
            /** @psalm-suppress InvalidReturnStatement */
7764
            /** @phpstan-ignore-next-line - old code :/ */
7765
            return self::str_split_array(
0 ignored issues
show
Bug Best Practice introduced by
The expression return self::str_split_a...ry_to_use_mb_functions) returns the type array<mixed,string[]> which is incompatible with the documented return type string[].
Loading history...
7766
                $input,
7767
                $length,
7768
                $clean_utf8,
7769
                $try_to_use_mb_functions
7770
            );
7771
        }
7772
7773
        // init
7774 89
        $input = (string) $input;
7775
7776 89
        if ($input === '') {
7777 14
            return [];
7778
        }
7779
7780 86
        if ($clean_utf8) {
7781 19
            $input = self::clean($input);
7782
        }
7783
7784
        if (
7785 86
            $try_to_use_mb_functions
7786
            &&
7787 86
            self::$SUPPORT['mbstring'] === true
7788
        ) {
7789 82
            if (\function_exists('mb_str_split')) {
7790
                /**
7791
                 * @psalm-suppress ImpureFunctionCall - why?
7792
                 */
7793 82
                $return = \mb_str_split($input, $length);
7794 82
                if ($return !== false) {
7795 82
                    return $return;
7796
                }
7797
            }
7798
7799
            $i_max = \mb_strlen($input);
7800
            if ($i_max <= 127) {
7801
                $ret = [];
7802
                for ($i = 0; $i < $i_max; ++$i) {
7803
                    $ret[] = \mb_substr($input, $i, 1);
7804
                }
7805
            } else {
7806
                $return_array = [];
7807
                \preg_match_all('/./us', $input, $return_array);
7808
                $ret = $return_array[0] ?? [];
7809
            }
7810 23
        } elseif (self::$SUPPORT['pcre_utf8'] === true) {
7811 17
            $return_array = [];
7812 17
            \preg_match_all('/./us', $input, $return_array);
7813 17
            $ret = $return_array[0] ?? [];
7814
        } else {
7815
7816
            // fallback
7817
7818 8
            $ret = [];
7819 8
            $len = \strlen($input);
7820
7821 8
            for ($i = 0; $i < $len; ++$i) {
7822 8
                if (($input[$i] & "\x80") === "\x00") {
7823 8
                    $ret[] = $input[$i];
7824
                } elseif (
7825 8
                    isset($input[$i + 1])
7826
                    &&
7827 8
                    ($input[$i] & "\xE0") === "\xC0"
7828
                ) {
7829 4
                    if (($input[$i + 1] & "\xC0") === "\x80") {
7830 4
                        $ret[] = $input[$i] . $input[$i + 1];
7831
7832 4
                        ++$i;
7833
                    }
7834
                } elseif (
7835 6
                    isset($input[$i + 2])
7836
                    &&
7837 6
                    ($input[$i] & "\xF0") === "\xE0"
7838
                ) {
7839
                    if (
7840 6
                        ($input[$i + 1] & "\xC0") === "\x80"
7841
                        &&
7842 6
                        ($input[$i + 2] & "\xC0") === "\x80"
7843
                    ) {
7844 6
                        $ret[] = $input[$i] . $input[$i + 1] . $input[$i + 2];
7845
7846 6
                        $i += 2;
7847
                    }
7848
                } elseif (
7849
                    isset($input[$i + 3])
7850
                    &&
7851
                    ($input[$i] & "\xF8") === "\xF0"
7852
                ) {
7853
                    if (
7854
                        ($input[$i + 1] & "\xC0") === "\x80"
7855
                        &&
7856
                        ($input[$i + 2] & "\xC0") === "\x80"
7857
                        &&
7858
                        ($input[$i + 3] & "\xC0") === "\x80"
7859
                    ) {
7860
                        $ret[] = $input[$i] . $input[$i + 1] . $input[$i + 2] . $input[$i + 3];
7861
7862
                        $i += 3;
7863
                    }
7864
                }
7865
            }
7866
        }
7867
7868 23
        if ($length > 1) {
7869 2
            return \array_map(
7870
                static function (array $item): string {
7871 2
                    return \implode('', $item);
7872 2
                },
7873 2
                \array_chunk($ret, $length)
7874
            );
7875
        }
7876
7877 23
        if (isset($ret[0]) && $ret[0] === '') {
7878
            return [];
7879
        }
7880
7881 23
        return $ret;
7882
    }
7883
7884
    /**
7885
     * Splits the string with the provided regular expression, returning an
7886
     * array of strings. An optional integer $limit will truncate the
7887
     * results.
7888
     *
7889
     * @param string $str
7890
     * @param string $pattern <p>The regex with which to split the string.</p>
7891
     * @param int    $limit   [optional] <p>Maximum number of results to return. Default: -1 === no limit</p>
7892
     *
7893
     * @psalm-pure
7894
     *
7895
     * @return string[]
7896
     *                  <p>An array of strings.</p>
7897
     */
7898
    public static function str_split_pattern(string $str, string $pattern, int $limit = -1): array
7899
    {
7900 16
        if ($limit === 0) {
7901 2
            return [];
7902
        }
7903
7904 14
        if ($pattern === '') {
7905 1
            return [$str];
7906
        }
7907
7908 13
        if (self::$SUPPORT['mbstring'] === true) {
7909 13
            if ($limit >= 0) {
7910 8
                $result_tmp = \mb_split($pattern, $str);
7911 8
                if ($result_tmp === false) {
7912
                    return [];
7913
                }
7914
7915 8
                $result = [];
7916 8
                foreach ($result_tmp as $item_tmp) {
7917 8
                    if ($limit === 0) {
7918 4
                        break;
7919
                    }
7920 8
                    --$limit;
7921
7922 8
                    $result[] = $item_tmp;
7923
                }
7924
7925 8
                return $result;
7926
            }
7927
7928 5
            $result = \mb_split($pattern, $str);
7929 5
            if ($result === false) {
7930
                return [];
7931
            }
7932
7933 5
            return $result;
7934
        }
7935
7936
        if ($limit > 0) {
7937
            ++$limit;
7938
        } else {
7939
            $limit = -1;
7940
        }
7941
7942
        $array = \preg_split('/' . \preg_quote($pattern, '/') . '/u', $str, $limit);
7943
        if ($array === false) {
7944
            return [];
7945
        }
7946
7947
        if ($limit > 0 && \count($array) === $limit) {
7948
            \array_pop($array);
7949
        }
7950
7951
        return $array;
7952
    }
7953
7954
    /**
7955
     * Check if the string starts with the given substring.
7956
     *
7957
     * EXAMPLE: <code>
7958
     * UTF8::str_starts_with('ΚόσμεMiddleEnd', 'Κόσμε'); // true
7959
     * UTF8::str_starts_with('ΚόσμεMiddleEnd', 'κόσμε'); // false
7960
     * </code>
7961
     *
7962
     * @param string $haystack <p>The string to search in.</p>
7963
     * @param string $needle   <p>The substring to search for.</p>
7964
     *
7965
     * @psalm-pure
7966
     *
7967
     * @return bool
7968
     */
7969
    public static function str_starts_with(string $haystack, string $needle): bool
7970
    {
7971 19
        if ($needle === '') {
7972 2
            return true;
7973
        }
7974
7975 19
        if ($haystack === '') {
7976
            return false;
7977
        }
7978
7979 19
        if (\PHP_VERSION_ID >= 80000) {
7980
            /** @phpstan-ignore-next-line - only for PHP8 */
7981
            return \str_starts_with($haystack, $needle);
7982
        }
7983
7984 19
        return \strncmp($haystack, $needle, \strlen($needle)) === 0;
7985
    }
7986
7987
    /**
7988
     * Returns true if the string begins with any of $substrings, false otherwise.
7989
     *
7990
     * - case-sensitive
7991
     *
7992
     * @param string $str        <p>The input string.</p>
7993
     * @param array  $substrings <p>Substrings to look for.</p>
7994
     *
7995
     * @psalm-pure
7996
     *
7997
     * @return bool
7998
     *              <p>Whether or not $str starts with $substring.</p>
7999
     */
8000
    public static function str_starts_with_any(string $str, array $substrings): bool
8001
    {
8002 8
        if ($str === '') {
8003
            return false;
8004
        }
8005
8006 8
        if ($substrings === []) {
8007
            return false;
8008
        }
8009
8010 8
        foreach ($substrings as &$substring) {
8011 8
            if (self::str_starts_with($str, $substring)) {
8012 8
                return true;
8013
            }
8014
        }
8015
8016 6
        return false;
8017
    }
8018
8019
    /**
8020
     * Gets the substring after the first occurrence of a separator.
8021
     *
8022
     * @param string $str       <p>The input string.</p>
8023
     * @param string $separator <p>The string separator.</p>
8024
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
8025
     *
8026
     * @psalm-pure
8027
     *
8028
     * @return string
8029
     */
8030
    public static function str_substr_after_first_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
8031
    {
8032 1
        if ($separator === '' || $str === '') {
8033 1
            return '';
8034
        }
8035
8036 1
        if ($encoding === 'UTF-8') {
8037 1
            $offset = \mb_strpos($str, $separator);
8038 1
            if ($offset === false) {
8039 1
                return '';
8040
            }
8041
8042 1
            return (string) \mb_substr(
8043 1
                $str,
8044 1
                $offset + (int) \mb_strlen($separator)
8045
            );
8046
        }
8047
8048
        $offset = self::strpos($str, $separator, 0, $encoding);
8049
        if ($offset === false) {
8050
            return '';
8051
        }
8052
8053
        return (string) \mb_substr(
8054
            $str,
8055
            $offset + (int) self::strlen($separator, $encoding),
8056
            null,
8057
            $encoding
8058
        );
8059
    }
8060
8061
    /**
8062
     * Gets the substring after the last occurrence of a separator.
8063
     *
8064
     * @param string $str       <p>The input string.</p>
8065
     * @param string $separator <p>The string separator.</p>
8066
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
8067
     *
8068
     * @psalm-pure
8069
     *
8070
     * @return string
8071
     */
8072
    public static function str_substr_after_last_separator(
8073
        string $str,
8074
        string $separator,
8075
        string $encoding = 'UTF-8'
8076
    ): string {
8077 1
        if ($separator === '' || $str === '') {
8078 1
            return '';
8079
        }
8080
8081 1
        if ($encoding === 'UTF-8') {
8082 1
            $offset = \mb_strrpos($str, $separator);
8083 1
            if ($offset === false) {
8084 1
                return '';
8085
            }
8086
8087 1
            return (string) \mb_substr(
8088 1
                $str,
8089 1
                $offset + (int) \mb_strlen($separator)
8090
            );
8091
        }
8092
8093
        $offset = self::strrpos($str, $separator, 0, $encoding);
8094
        if ($offset === false) {
8095
            return '';
8096
        }
8097
8098
        return (string) self::substr(
8099
            $str,
8100
            $offset + (int) self::strlen($separator, $encoding),
8101
            null,
8102
            $encoding
8103
        );
8104
    }
8105
8106
    /**
8107
     * Gets the substring before the first occurrence of a separator.
8108
     *
8109
     * @param string $str       <p>The input string.</p>
8110
     * @param string $separator <p>The string separator.</p>
8111
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
8112
     *
8113
     * @psalm-pure
8114
     *
8115
     * @return string
8116
     */
8117
    public static function str_substr_before_first_separator(
8118
        string $str,
8119
        string $separator,
8120
        string $encoding = 'UTF-8'
8121
    ): string {
8122 1
        if ($separator === '' || $str === '') {
8123 1
            return '';
8124
        }
8125
8126 1
        if ($encoding === 'UTF-8') {
8127 1
            $offset = \mb_strpos($str, $separator);
8128 1
            if ($offset === false) {
8129 1
                return '';
8130
            }
8131
8132 1
            return (string) \mb_substr(
8133 1
                $str,
8134 1
                0,
8135 1
                $offset
8136
            );
8137
        }
8138
8139
        $offset = self::strpos($str, $separator, 0, $encoding);
8140
        if ($offset === false) {
8141
            return '';
8142
        }
8143
8144
        return (string) self::substr(
8145
            $str,
8146
            0,
8147
            $offset,
8148
            $encoding
8149
        );
8150
    }
8151
8152
    /**
8153
     * Gets the substring before the last occurrence of a separator.
8154
     *
8155
     * @param string $str       <p>The input string.</p>
8156
     * @param string $separator <p>The string separator.</p>
8157
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
8158
     *
8159
     * @psalm-pure
8160
     *
8161
     * @return string
8162
     */
8163
    public static function str_substr_before_last_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
8164
    {
8165 1
        if ($separator === '' || $str === '') {
8166 1
            return '';
8167
        }
8168
8169 1
        if ($encoding === 'UTF-8') {
8170 1
            $offset = \mb_strrpos($str, $separator);
8171 1
            if ($offset === false) {
8172 1
                return '';
8173
            }
8174
8175 1
            return (string) \mb_substr(
8176 1
                $str,
8177 1
                0,
8178 1
                $offset
8179
            );
8180
        }
8181
8182
        $offset = self::strrpos($str, $separator, 0, $encoding);
8183
        if ($offset === false) {
8184
            return '';
8185
        }
8186
8187
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
8188
8189
        return (string) self::substr(
8190
            $str,
8191
            0,
8192
            $offset,
8193
            $encoding
8194
        );
8195
    }
8196
8197
    /**
8198
     * Gets the substring after (or before via "$before_needle") the first occurrence of the "$needle".
8199
     *
8200
     * @param string $str           <p>The input string.</p>
8201
     * @param string $needle        <p>The string to look for.</p>
8202
     * @param bool   $before_needle [optional] <p>Default: false</p>
8203
     * @param string $encoding      [optional] <p>Default: 'UTF-8'</p>
8204
     *
8205
     * @psalm-pure
8206
     *
8207
     * @return string
8208
     */
8209
    public static function str_substr_first(
8210
        string $str,
8211
        string $needle,
8212
        bool $before_needle = false,
8213
        string $encoding = 'UTF-8'
8214
    ): string {
8215 2
        if ($str === '' || $needle === '') {
8216 2
            return '';
8217
        }
8218
8219 2
        if ($encoding === 'UTF-8') {
8220 2
            if ($before_needle) {
8221 1
                $part = \mb_strstr(
8222 1
                    $str,
8223 1
                    $needle,
8224 1
                    $before_needle
8225
                );
8226
            } else {
8227 1
                $part = \mb_strstr(
8228 1
                    $str,
8229 2
                    $needle
8230
                );
8231
            }
8232
        } else {
8233
            $part = self::strstr(
8234
                $str,
8235
                $needle,
8236
                $before_needle,
8237
                $encoding
8238
            );
8239
        }
8240
8241 2
        return $part === false ? '' : $part;
8242
    }
8243
8244
    /**
8245
     * Gets the substring after (or before via "$before_needle") the last occurrence of the "$needle".
8246
     *
8247
     * @param string $str           <p>The input string.</p>
8248
     * @param string $needle        <p>The string to look for.</p>
8249
     * @param bool   $before_needle [optional] <p>Default: false</p>
8250
     * @param string $encoding      [optional] <p>Default: 'UTF-8'</p>
8251
     *
8252
     * @psalm-pure
8253
     *
8254
     * @return string
8255
     */
8256
    public static function str_substr_last(
8257
        string $str,
8258
        string $needle,
8259
        bool $before_needle = false,
8260
        string $encoding = 'UTF-8'
8261
    ): string {
8262 2
        if ($str === '' || $needle === '') {
8263 2
            return '';
8264
        }
8265
8266 2
        if ($encoding === 'UTF-8') {
8267 2
            if ($before_needle) {
8268 1
                $part = \mb_strrchr(
8269 1
                    $str,
8270 1
                    $needle,
8271 1
                    $before_needle
8272
                );
8273
            } else {
8274 1
                $part = \mb_strrchr(
8275 1
                    $str,
8276 2
                    $needle
8277
                );
8278
            }
8279
        } else {
8280
            $part = self::strrchr(
8281
                $str,
8282
                $needle,
8283
                $before_needle,
8284
                $encoding
8285
            );
8286
        }
8287
8288 2
        return $part === false ? '' : $part;
8289
    }
8290
8291
    /**
8292
     * Surrounds $str with the given substring.
8293
     *
8294
     * @param string $str
8295
     * @param string $substring <p>The substring to add to both sides.</p>
8296
     *
8297
     * @psalm-pure
8298
     *
8299
     * @return string
8300
     *                <p>A string with the substring both prepended and appended.</p>
8301
     */
8302
    public static function str_surround(string $str, string $substring): string
8303
    {
8304 5
        return $substring . $str . $substring;
8305
    }
8306
8307
    /**
8308
     * Returns a trimmed string with the first letter of each word capitalized.
8309
     * Also accepts an array, $ignore, allowing you to list words not to be
8310
     * capitalized.
8311
     *
8312
     * @param string              $str
8313
     * @param array|string[]|null $ignore                        [optional] <p>An array of words not to capitalize or
8314
     *                                                           null. Default: null</p>
8315
     * @param string              $encoding                      [optional] <p>Default: 'UTF-8'</p>
8316
     * @param bool                $clean_utf8                    [optional] <p>Remove non UTF-8 chars from the
8317
     *                                                           string.</p>
8318
     * @param string|null         $lang                          [optional] <p>Set the language for special cases: az,
8319
     *                                                           el, lt, tr</p>
8320
     * @param bool                $try_to_keep_the_string_length [optional] <p>true === try to keep the string length:
8321
     *                                                           e.g. ẞ -> ß</p>
8322
     * @param bool                $use_trim_first                [optional] <p>true === trim the input string,
8323
     *                                                           first</p>
8324
     * @param string|null         $word_define_chars             [optional] <p>An string of chars that will be used as
8325
     *                                                           whitespace separator === words.</p>
8326
     *
8327
     * @psalm-pure
8328
     *
8329
     * @return string
8330
     *                <p>The titleized string.</p>
8331
     */
8332
    public static function str_titleize(
8333
        string $str,
8334
        array $ignore = null,
8335
        string $encoding = 'UTF-8',
8336
        bool $clean_utf8 = false,
8337
        string $lang = null,
8338
        bool $try_to_keep_the_string_length = false,
8339
        bool $use_trim_first = true,
8340
        string $word_define_chars = null
8341
    ): string {
8342 10
        if ($str === '') {
8343
            return '';
8344
        }
8345
8346 10
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
8347 9
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
8348
        }
8349
8350 10
        if ($use_trim_first) {
8351 10
            $str = \trim($str);
8352
        }
8353
8354 10
        if ($clean_utf8) {
8355
            $str = self::clean($str);
8356
        }
8357
8358 10
        $use_mb_functions = $lang === null && !$try_to_keep_the_string_length;
8359
8360 10
        if ($word_define_chars) {
8361 4
            $word_define_chars = \preg_quote($word_define_chars, '/');
8362
        } else {
8363 6
            $word_define_chars = '';
8364
        }
8365
8366 10
        $str = (string) \preg_replace_callback(
8367 10
            '/([^\\s' . $word_define_chars . ']+)/u',
8368
            static function (array $match) use ($try_to_keep_the_string_length, $lang, $ignore, $use_mb_functions, $encoding): string {
8369 10
                if ($ignore !== null && \in_array($match[0], $ignore, true)) {
8370 4
                    return $match[0];
8371
                }
8372
8373 10
                if ($use_mb_functions) {
8374 10
                    if ($encoding === 'UTF-8') {
8375 10
                        return \mb_strtoupper(\mb_substr($match[0], 0, 1))
8376 10
                               . \mb_strtolower(\mb_substr($match[0], 1));
8377
                    }
8378
8379
                    return \mb_strtoupper(\mb_substr($match[0], 0, 1, $encoding), $encoding)
8380
                           . \mb_strtolower(\mb_substr($match[0], 1, null, $encoding), $encoding);
8381
                }
8382
8383
                return self::ucfirst(
8384
                    self::strtolower(
8385
                        $match[0],
8386
                        $encoding,
8387
                        false,
8388
                        $lang,
8389
                        $try_to_keep_the_string_length
8390
                    ),
8391
                    $encoding,
8392
                    false,
8393
                    $lang,
8394
                    $try_to_keep_the_string_length
8395
                );
8396 10
            },
8397 10
            $str
8398
        );
8399
8400 10
        return $str;
8401
    }
8402
8403
    /**
8404
     * Convert a string into a obfuscate string.
8405
     *
8406
     * EXAMPLE: <code>
8407
     *
8408
     * UTF8::str_obfuscate('[email protected]', 0.5, '*', ['@', '.']); // e.g. "l***@m**lleke*.*r*"
8409
     * </code>
8410
     *
8411
     * @param string   $str
8412
     * @param float    $percent
8413
     * @param string   $obfuscateChar
8414
     * @param string[] $keepChars
8415
     *
8416
     * @psalm-pure
8417
     *
8418
     * @return string
8419
     *                <p>The obfuscate string.</p>
8420
     */
8421
    public static function str_obfuscate(
8422
        string $str,
8423
        float $percent = 0.5,
8424
        string $obfuscateChar = '*',
8425
        array $keepChars = []
8426
    ): string {
8427 1
        $obfuscateCharHelper = "\u{2603}";
8428 1
        $str = \str_replace($obfuscateChar, $obfuscateCharHelper, $str);
8429
8430 1
        $chars = self::chars($str);
8431 1
        $charsMax = \count($chars);
8432 1
        $charsMaxChange = \round($charsMax * $percent);
8433 1
        $charsCounter = 0;
8434 1
        $charKeyDone = [];
8435
8436 1
        while ($charsCounter < $charsMaxChange) {
8437 1
            foreach ($chars as $charKey => $char) {
8438 1
                if (isset($charKeyDone[$charKey])) {
8439 1
                    continue;
8440
                }
8441
8442 1
                if (\random_int(0, 100) > 50) {
8443 1
                    continue;
8444
                }
8445
8446 1
                if ($char === $obfuscateChar) {
8447
                    continue;
8448
                }
8449
8450 1
                ++$charsCounter;
8451 1
                $charKeyDone[$charKey] = true;
8452
8453 1
                if ($charsCounter > $charsMaxChange) {
8454
                    break;
8455
                }
8456
8457 1
                if (\in_array($char, $keepChars, true)) {
8458 1
                    continue;
8459
                }
8460
8461 1
                $chars[$charKey] = $obfuscateChar;
8462
            }
8463
        }
8464
8465 1
        $str = \implode('', $chars);
8466
8467 1
        return \str_replace($obfuscateCharHelper, $obfuscateChar, $str);
8468
    }
8469
8470
    /**
8471
     * Returns a trimmed string in proper title case.
8472
     *
8473
     * Also accepts an array, $ignore, allowing you to list words not to be
8474
     * capitalized.
8475
     *
8476
     * Adapted from John Gruber's script.
8477
     *
8478
     * @see https://gist.github.com/gruber/9f9e8650d68b13ce4d78
8479
     *
8480
     * @param string $str
8481
     * @param array  $ignore   <p>An array of words not to capitalize.</p>
8482
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
8483
     *
8484
     * @psalm-pure
8485
     *
8486
     * @return string
8487
     *                <p>The titleized string.</p>
8488
     */
8489
    public static function str_titleize_for_humans(
8490
        string $str,
8491
        array $ignore = [],
8492
        string $encoding = 'UTF-8'
8493
    ): string {
8494 35
        if ($str === '') {
8495
            return '';
8496
        }
8497
8498
        $small_words = [
8499 35
            '(?<!q&)a',
8500
            'an',
8501
            'and',
8502
            'as',
8503
            'at(?!&t)',
8504
            'but',
8505
            'by',
8506
            'en',
8507
            'for',
8508
            'if',
8509
            'in',
8510
            'of',
8511
            'on',
8512
            'or',
8513
            'the',
8514
            'to',
8515
            'v[.]?',
8516
            'via',
8517
            'vs[.]?',
8518
        ];
8519
8520 35
        if ($ignore !== []) {
8521 1
            $small_words = \array_merge($small_words, $ignore);
8522
        }
8523
8524 35
        $small_words_rx = \implode('|', $small_words);
8525 35
        $apostrophe_rx = '(?x: [\'’] [[:lower:]]* )?';
8526
8527 35
        $str = \trim($str);
8528
8529 35
        if (!self::has_lowercase($str)) {
8530 2
            $str = self::strtolower($str, $encoding);
8531
        }
8532
8533
        // the main substitutions
8534 35
        $str = (string) \preg_replace_callback(
8535
            '~\\b (_*) (?:                                                           # 1. Leading underscore and
8536
                        ( (?<=[ ][/\\\\]) [[:alpha:]]+ [-_[:alpha:]/\\\\]+ |                # 2. file path or 
8537 35
                          [-_[:alpha:]]+ [@.:] [-_[:alpha:]@.:/]+ ' . $apostrophe_rx . ' )  #    URL, domain, or email
8538
                        |
8539 35
                        ( (?i: ' . $small_words_rx . ' ) ' . $apostrophe_rx . ' )           # 3. or small word (case-insensitive)
8540
                        |
8541 35
                        ( [[:alpha:]] [[:lower:]\'’()\[\]{}]* ' . $apostrophe_rx . ' )     # 4. or word w/o internal caps
8542
                        |
8543 35
                        ( [[:alpha:]] [[:alpha:]\'’()\[\]{}]* ' . $apostrophe_rx . ' )     # 5. or some other word
8544
                      ) (_*) \\b                                                          # 6. With trailing underscore
8545
                    ~ux',
8546
            /**
8547
             * @param string[] $matches
8548
             *
8549
             * @psalm-pure
8550
             *
8551
             * @return string
8552
             */
8553
            static function (array $matches) use ($encoding): string {
8554
                // preserve leading underscore
8555 35
                $str = $matches[1];
8556 35
                if ($matches[2]) {
8557
                    // preserve URLs, domains, emails and file paths
8558 5
                    $str .= $matches[2];
8559 35
                } elseif ($matches[3]) {
8560
                    // lower-case small words
8561 25
                    $str .= self::strtolower($matches[3], $encoding);
8562 35
                } elseif ($matches[4]) {
8563
                    // capitalize word w/o internal caps
8564 34
                    $str .= static::ucfirst($matches[4], $encoding);
8565
                } else {
8566
                    // preserve other kinds of word (iPhone)
8567 7
                    $str .= $matches[5];
8568
                }
8569
                // preserve trailing underscore
8570 35
                $str .= $matches[6];
8571
8572 35
                return $str;
8573 35
            },
8574 35
            $str
8575
        );
8576
8577
        // Exceptions for small words: capitalize at start of title...
8578 35
        $str = (string) \preg_replace_callback(
8579
            '~(  \\A [[:punct:]]*            # start of title...
8580
                      |  [:.;?!][ ]+                # or of subsentence...
8581
                      |  [ ][\'"“‘(\[][ ]* )        # or of inserted subphrase...
8582 35
                      ( ' . $small_words_rx . ' ) \\b # ...followed by small word
8583
                     ~uxi',
8584
            /**
8585
             * @param string[] $matches
8586
             *
8587
             * @psalm-pure
8588
             *
8589
             * @return string
8590
             */
8591
            static function (array $matches) use ($encoding): string {
8592 11
                return $matches[1] . static::ucfirst($matches[2], $encoding);
8593 35
            },
8594 35
            $str
8595
        );
8596
8597
        // ...and end of title
8598 35
        $str = (string) \preg_replace_callback(
8599 35
            '~\\b ( ' . $small_words_rx . ' ) # small word...
8600
                      (?= [[:punct:]]* \Z          # ...at the end of the title...
8601
                      |   [\'"’”)\]] [ ] )         # ...or of an inserted subphrase?
8602
                     ~uxi',
8603
            /**
8604
             * @param string[] $matches
8605
             *
8606
             * @psalm-pure
8607
             *
8608
             * @return string
8609
             */
8610
            static function (array $matches) use ($encoding): string {
8611 3
                return static::ucfirst($matches[1], $encoding);
8612 35
            },
8613 35
            $str
8614
        );
8615
8616
        // Exceptions for small words in hyphenated compound words.
8617
        // e.g. "in-flight" -> In-Flight
8618 35
        $str = (string) \preg_replace_callback(
8619
            '~\\b
8620
                        (?<! -)                   # Negative lookbehind for a hyphen; we do not want to match man-in-the-middle but do want (in-flight)
8621 35
                        ( ' . $small_words_rx . ' )
8622
                        (?= -[[:alpha:]]+)        # lookahead for "-someword"
8623
                       ~uxi',
8624
            /**
8625
             * @param string[] $matches
8626
             *
8627
             * @psalm-pure
8628
             *
8629
             * @return string
8630
             */
8631
            static function (array $matches) use ($encoding): string {
8632
                return static::ucfirst($matches[1], $encoding);
8633 35
            },
8634 35
            $str
8635
        );
8636
8637
        // e.g. "Stand-in" -> "Stand-In" (Stand is already capped at this point)
8638 35
        $str = (string) \preg_replace_callback(
8639
            '~\\b
8640
                      (?<!…)                    # Negative lookbehind for a hyphen; we do not want to match man-in-the-middle but do want (stand-in)
8641
                      ( [[:alpha:]]+- )         # $1 = first word and hyphen, should already be properly capped
8642 35
                      ( ' . $small_words_rx . ' ) # ...followed by small word
8643
                      (?!	- )                 # Negative lookahead for another -
8644
                     ~uxi',
8645
            /**
8646
             * @param string[] $matches
8647
             *
8648
             * @psalm-pure
8649
             *
8650
             * @return string
8651
             */
8652
            static function (array $matches) use ($encoding): string {
8653
                return $matches[1] . static::ucfirst($matches[2], $encoding);
8654 35
            },
8655 35
            $str
8656
        );
8657
8658 35
        return $str;
8659
    }
8660
8661
    /**
8662
     * Get a binary representation of a specific string.
8663
     *
8664
     * EXAPLE: <code>UTF8::str_to_binary('😃'); // '11110000100111111001100010000011'</code>
8665
     *
8666
     * @param string $str <p>The input string.</p>
8667
     *
8668
     * @psalm-pure
8669
     *
8670
     * @return false|string
8671
     *                      <p>false on error</p>
8672
     */
8673
    public static function str_to_binary(string $str)
8674
    {
8675
        /** @var array|false $value - needed for PhpStan (stubs error) */
8676 2
        $value = \unpack('H*', $str);
8677 2
        if ($value === false) {
8678
            return false;
8679
        }
8680
8681
        /** @noinspection OffsetOperationsInspection */
8682 2
        return \base_convert($value[1], 16, 2);
8683
    }
8684
8685
    /**
8686
     * @param string   $str
8687
     * @param bool     $remove_empty_values <p>Remove empty values.</p>
8688
     * @param int|null $remove_short_values <p>The min. string length or null to disable</p>
8689
     *
8690
     * @psalm-pure
8691
     *
8692
     * @return string[]
8693
     */
8694
    public static function str_to_lines(string $str, bool $remove_empty_values = false, int $remove_short_values = null): array
8695
    {
8696 17
        if ($str === '') {
8697 1
            return $remove_empty_values ? [] : [''];
8698
        }
8699
8700 16
        if (self::$SUPPORT['mbstring'] === true) {
8701 16
            $return = \mb_split("[\r\n]{1,2}", $str);
8702
        } else {
8703
            $return = \preg_split("/[\r\n]{1,2}/u", $str);
8704
        }
8705
8706 16
        if ($return === false) {
8707
            return $remove_empty_values ? [] : [''];
8708
        }
8709
8710
        if (
8711 16
            $remove_short_values === null
8712
            &&
8713 16
            !$remove_empty_values
8714
        ) {
8715 16
            return $return;
8716
        }
8717
8718
        return self::reduce_string_array(
8719
            $return,
8720
            $remove_empty_values,
8721
            $remove_short_values
8722
        );
8723
    }
8724
8725
    /**
8726
     * Convert a string into an array of words.
8727
     *
8728
     * EXAMPLE: <code>UTF8::str_to_words('中文空白 oöäü#s', '#') // array('', '中文空白', ' ', 'oöäü#s', '')</code>
8729
     *
8730
     * @param string   $str
8731
     * @param string   $char_list           <p>Additional chars for the definition of "words".</p>
8732
     * @param bool     $remove_empty_values <p>Remove empty values.</p>
8733
     * @param int|null $remove_short_values <p>The min. string length or null to disable</p>
8734
     *
8735
     * @psalm-pure
8736
     *
8737
     * @return string[]
8738
     */
8739
    public static function str_to_words(
8740
        string $str,
8741
        string $char_list = '',
8742
        bool $remove_empty_values = false,
8743
        int $remove_short_values = null
8744
    ): array {
8745 16
        if ($str === '') {
8746 4
            return $remove_empty_values ? [] : [''];
8747
        }
8748
8749 16
        $char_list = self::rxClass($char_list, '\pL');
8750
8751 16
        $return = \preg_split("/({$char_list}+(?:[\p{Pd}’']{$char_list}+)*)/u", $str, -1, \PREG_SPLIT_DELIM_CAPTURE);
8752 16
        if ($return === false) {
8753
            return $remove_empty_values ? [] : [''];
8754
        }
8755
8756
        if (
8757 16
            $remove_short_values === null
8758
            &&
8759 16
            !$remove_empty_values
8760
        ) {
8761 16
            return $return;
0 ignored issues
show
Bug Best Practice introduced by
The expression return $return returns an array which contains values of type array which are incompatible with the documented value type string.
Loading history...
8762
        }
8763
8764 2
        $tmp_return = self::reduce_string_array(
8765 2
            $return,
8766 2
            $remove_empty_values,
8767 2
            $remove_short_values
8768
        );
8769
8770 2
        foreach ($tmp_return as &$item) {
8771 2
            $item = (string) $item;
8772
        }
8773
8774 2
        return $tmp_return;
8775
    }
8776
8777
    /**
8778
     * Truncates the string to a given length. If $substring is provided, and
8779
     * truncating occurs, the string is further truncated so that the substring
8780
     * may be appended without exceeding the desired length.
8781
     *
8782
     * @param string $str
8783
     * @param int    $length    <p>Desired length of the truncated string.</p>
8784
     * @param string $substring [optional] <p>The substring to append if it can fit. Default: ''</p>
8785
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
8786
     *
8787
     * @psalm-pure
8788
     *
8789
     * @return string
8790
     *                <p>A string after truncating.</p>
8791
     */
8792
    public static function str_truncate(
8793
        string $str,
8794
        int $length,
8795
        string $substring = '',
8796
        string $encoding = 'UTF-8'
8797
    ): string {
8798 22
        if ($str === '') {
8799
            return '';
8800
        }
8801
8802 22
        if ($encoding === 'UTF-8') {
8803 10
            if ($length >= (int) \mb_strlen($str)) {
8804 2
                return $str;
8805
            }
8806
8807 8
            if ($substring !== '') {
8808 4
                $length -= (int) \mb_strlen($substring);
8809
8810
                /** @noinspection UnnecessaryCastingInspection */
8811 4
                return (string) \mb_substr($str, 0, $length) . $substring;
8812
            }
8813
8814 4
            return (string) \mb_substr($str, 0, $length);
8815
        }
8816
8817 12
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
8818
8819 12
        if ($length >= (int) self::strlen($str, $encoding)) {
8820 2
            return $str;
8821
        }
8822
8823 10
        if ($substring !== '') {
8824 6
            $length -= (int) self::strlen($substring, $encoding);
8825
        }
8826
8827
        return (
8828 10
               (string) self::substr(
8829 10
                   $str,
8830 10
                   0,
8831 10
                   $length,
8832 10
                   $encoding
8833
               )
8834 10
               ) . $substring;
8835
    }
8836
8837
    /**
8838
     * Truncates the string to a given length, while ensuring that it does not
8839
     * split words. If $substring is provided, and truncating occurs, the
8840
     * string is further truncated so that the substring may be appended without
8841
     * exceeding the desired length.
8842
     *
8843
     * @param string $str
8844
     * @param int    $length                                 <p>Desired length of the truncated string.</p>
8845
     * @param string $substring                              [optional] <p>The substring to append if it can fit.
8846
     *                                                       Default:
8847
     *                                                       ''</p>
8848
     * @param string $encoding                               [optional] <p>Default: 'UTF-8'</p>
8849
     * @param bool   $ignore_do_not_split_words_for_one_word [optional] <p>Default: false</p>
8850
     *
8851
     * @psalm-pure
8852
     *
8853
     * @return string
8854
     *                <p>A string after truncating.</p>
8855
     */
8856
    public static function str_truncate_safe(
8857
        string $str,
8858
        int $length,
8859
        string $substring = '',
8860
        string $encoding = 'UTF-8',
8861
        bool $ignore_do_not_split_words_for_one_word = false
8862
    ): string {
8863 47
        if ($str === '' || $length <= 0) {
8864 1
            return $substring;
8865
        }
8866
8867 47
        if ($encoding === 'UTF-8') {
8868 21
            if ($length >= (int) \mb_strlen($str)) {
8869 5
                return $str;
8870
            }
8871
8872
            // need to further trim the string so we can append the substring
8873 17
            $length -= (int) \mb_strlen($substring);
8874 17
            if ($length <= 0) {
8875 1
                return $substring;
8876
            }
8877
8878
            /** @var false|string $truncated - needed for PhpStan (stubs error) */
8879 17
            $truncated = \mb_substr($str, 0, $length);
8880 17
            if ($truncated === false) {
8881
                return '';
8882
            }
8883
8884
            // if the last word was truncated
8885 17
            $space_position = \mb_strpos($str, ' ', $length - 1);
8886 17
            if ($space_position !== $length) {
8887
                // find pos of the last occurrence of a space, get up to that
8888 13
                $last_position = \mb_strrpos($truncated, ' ', 0);
8889
8890
                if (
8891 13
                    $last_position !== false
8892
                    ||
8893
                    (
8894 3
                        $space_position !== false
8895
                        &&
8896 13
                        !$ignore_do_not_split_words_for_one_word
8897
                    )
8898
                ) {
8899 17
                    $truncated = (string) \mb_substr($truncated, 0, (int) $last_position);
8900
                }
8901
            }
8902
        } else {
8903 26
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
8904
8905 26
            if ($length >= (int) self::strlen($str, $encoding)) {
8906 4
                return $str;
8907
            }
8908
8909
            // need to further trim the string so we can append the substring
8910 22
            $length -= (int) self::strlen($substring, $encoding);
8911 22
            if ($length <= 0) {
8912
                return $substring;
8913
            }
8914
8915 22
            $truncated = self::substr($str, 0, $length, $encoding);
8916
8917 22
            if ($truncated === false) {
8918
                return '';
8919
            }
8920
8921
            // if the last word was truncated
8922 22
            $space_position = self::strpos($str, ' ', $length - 1, $encoding);
8923 22
            if ($space_position !== $length) {
8924
                // find pos of the last occurrence of a space, get up to that
8925 12
                $last_position = self::strrpos($truncated, ' ', 0, $encoding);
8926
8927
                if (
8928 12
                    $last_position !== false
8929
                    ||
8930
                    (
8931 4
                        $space_position !== false
8932
                        &&
8933 12
                        !$ignore_do_not_split_words_for_one_word
8934
                    )
8935
                ) {
8936 9
                    $truncated = (string) self::substr($truncated, 0, (int) $last_position, $encoding);
8937
                }
8938
            }
8939
        }
8940
8941 39
        return $truncated . $substring;
8942
    }
8943
8944
    /**
8945
     * Returns a lowercase and trimmed string separated by underscores.
8946
     * Underscores are inserted before uppercase characters (with the exception
8947
     * of the first character of the string), and in place of spaces as well as
8948
     * dashes.
8949
     *
8950
     * @param string $str
8951
     *
8952
     * @psalm-pure
8953
     *
8954
     * @return string
8955
     *                <p>The underscored string.</p>
8956
     */
8957
    public static function str_underscored(string $str): string
8958
    {
8959 16
        return self::str_delimit($str, '_');
8960
    }
8961
8962
    /**
8963
     * Returns an UpperCamelCase version of the supplied string. It trims
8964
     * surrounding spaces, capitalizes letters following digits, spaces, dashes
8965
     * and underscores, and removes spaces, dashes, underscores.
8966
     *
8967
     * @param string      $str                           <p>The input string.</p>
8968
     * @param string      $encoding                      [optional] <p>Default: 'UTF-8'</p>
8969
     * @param bool        $clean_utf8                    [optional] <p>Remove non UTF-8 chars from the string.</p>
8970
     * @param string|null $lang                          [optional] <p>Set the language for special cases: az, el, lt,
8971
     *                                                   tr</p>
8972
     * @param bool        $try_to_keep_the_string_length [optional] <p>true === try to keep the string length: e.g. ẞ
8973
     *                                                   -> ß</p>
8974
     *
8975
     * @psalm-pure
8976
     *
8977
     * @return string
8978
     *                <p>A string in UpperCamelCase.</p>
8979
     */
8980
    public static function str_upper_camelize(
8981
        string $str,
8982
        string $encoding = 'UTF-8',
8983
        bool $clean_utf8 = false,
8984
        string $lang = null,
8985
        bool $try_to_keep_the_string_length = false
8986
    ): string {
8987 13
        return self::ucfirst(self::str_camelize($str, $encoding), $encoding, $clean_utf8, $lang, $try_to_keep_the_string_length);
8988
    }
8989
8990
    /**
8991
     * Get the number of words in a specific string.
8992
     *
8993
     * EXAMPLES: <code>
8994
     * // format: 0 -> return only word count (int)
8995
     * //
8996
     * UTF8::str_word_count('中文空白 öäü abc#c'); // 4
8997
     * UTF8::str_word_count('中文空白 öäü abc#c', 0, '#'); // 3
8998
     *
8999
     * // format: 1 -> return words (array)
9000
     * //
9001
     * UTF8::str_word_count('中文空白 öäü abc#c', 1); // array('中文空白', 'öäü', 'abc', 'c')
9002
     * UTF8::str_word_count('中文空白 öäü abc#c', 1, '#'); // array('中文空白', 'öäü', 'abc#c')
9003
     *
9004
     * // format: 2 -> return words with offset (array)
9005
     * //
9006
     * UTF8::str_word_count('中文空白 öäü ab#c', 2); // array(0 => '中文空白', 5 => 'öäü', 9 => 'abc', 13 => 'c')
9007
     * UTF8::str_word_count('中文空白 öäü ab#c', 2, '#'); // array(0 => '中文空白', 5 => 'öäü', 9 => 'abc#c')
9008
     * </code>
9009
     *
9010
     * @param string $str       <p>The input string.</p>
9011
     * @param int    $format    [optional] <p>
9012
     *                          <strong>0</strong> => return a number of words (default)<br>
9013
     *                          <strong>1</strong> => return an array of words<br>
9014
     *                          <strong>2</strong> => return an array of words with word-offset as key
9015
     *                          </p>
9016
     * @param string $char_list [optional] <p>Additional chars that contains to words and do not start a new word.</p>
9017
     *
9018
     * @psalm-pure
9019
     *
9020
     * @return int|string[]
9021
     *                      <p>The number of words in the string.</p>
9022
     */
9023
    public static function str_word_count(string $str, int $format = 0, string $char_list = '')
9024
    {
9025 2
        $str_parts = self::str_to_words($str, $char_list);
9026
9027 2
        $len = \count($str_parts);
9028
9029 2
        if ($format === 1) {
9030 2
            $number_of_words = [];
9031 2
            for ($i = 1; $i < $len; $i += 2) {
9032 2
                $number_of_words[] = $str_parts[$i];
9033
            }
9034 2
        } elseif ($format === 2) {
9035 2
            $number_of_words = [];
9036 2
            $offset = (int) self::strlen($str_parts[0]);
9037 2
            for ($i = 1; $i < $len; $i += 2) {
9038 2
                $number_of_words[$offset] = $str_parts[$i];
9039 2
                $offset += (int) self::strlen($str_parts[$i]) + (int) self::strlen($str_parts[$i + 1]);
9040
            }
9041
        } else {
9042 2
            $number_of_words = (int) (($len - 1) / 2);
9043
        }
9044
9045 2
        return $number_of_words;
9046
    }
9047
9048
    /**
9049
     * Case-insensitive string comparison.
9050
     *
9051
     * INFO: Case-insensitive version of UTF8::strcmp()
9052
     *
9053
     * EXAMPLE: <code>UTF8::strcasecmp("iñtërnâtiôn\nàlizætiøn", "Iñtërnâtiôn\nàlizætiøn"); // 0</code>
9054
     *
9055
     * @param string $str1     <p>The first string.</p>
9056
     * @param string $str2     <p>The second string.</p>
9057
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
9058
     *
9059
     * @psalm-pure
9060
     *
9061
     * @return int
9062
     *             <strong>&lt; 0</strong> if str1 is less than str2;<br>
9063
     *             <strong>&gt; 0</strong> if str1 is greater than str2,<br>
9064
     *             <strong>0</strong> if they are equal
9065
     */
9066
    public static function strcasecmp(
9067
        string $str1,
9068
        string $str2,
9069
        string $encoding = 'UTF-8'
9070
    ): int {
9071 23
        return self::strcmp(
9072 23
            self::strtocasefold(
9073 23
                $str1,
9074 23
                true,
9075 23
                false,
9076 23
                $encoding,
9077 23
                null,
9078 23
                false
9079
            ),
9080 23
            self::strtocasefold(
9081 23
                $str2,
9082 23
                true,
9083 23
                false,
9084 23
                $encoding,
9085 23
                null,
9086 23
                false
9087
            )
9088
        );
9089
    }
9090
9091
    /**
9092
     * Case-sensitive string comparison.
9093
     *
9094
     * EXAMPLE: <code>UTF8::strcmp("iñtërnâtiôn\nàlizætiøn", "iñtërnâtiôn\nàlizætiøn"); // 0</code>
9095
     *
9096
     * @param string $str1 <p>The first string.</p>
9097
     * @param string $str2 <p>The second string.</p>
9098
     *
9099
     * @psalm-pure
9100
     *
9101
     * @return int
9102
     *             <strong>&lt; 0</strong> if str1 is less than str2<br>
9103
     *             <strong>&gt; 0</strong> if str1 is greater than str2<br>
9104
     *             <strong>0</strong> if they are equal
9105
     */
9106
    public static function strcmp(string $str1, string $str2): int
9107
    {
9108 29
        if ($str1 === $str2) {
9109 21
            return 0;
9110
        }
9111
9112 24
        return \strcmp(
9113
            /** @phpstan-ignore-next-line - we use only NFD */
9114 24
            \Normalizer::normalize($str1, \Normalizer::NFD),
9115
            /** @phpstan-ignore-next-line - we use only NFD */
9116 24
            \Normalizer::normalize($str2, \Normalizer::NFD)
9117
        );
9118
    }
9119
9120
    /**
9121
     * Find length of initial segment not matching mask.
9122
     *
9123
     * @param string   $str
9124
     * @param string   $char_list
9125
     * @param int      $offset
9126
     * @param int|null $length
9127
     * @param string   $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
9128
     *
9129
     * @psalm-pure
9130
     *
9131
     * @return int
9132
     */
9133
    public static function strcspn(
9134
        string $str,
9135
        string $char_list,
9136
        int $offset = 0,
9137
        int $length = null,
9138
        string $encoding = 'UTF-8'
9139
    ): int {
9140 12
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
9141
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
9142
        }
9143
9144 12
        if ($char_list === '') {
9145 2
            return (int) self::strlen($str, $encoding);
9146
        }
9147
9148 11
        if ($offset || $length !== null) {
9149 3
            if ($encoding === 'UTF-8') {
9150 3
                if ($length === null) {
9151 2
                    $str_tmp = \mb_substr($str, $offset);
9152
                } else {
9153 3
                    $str_tmp = \mb_substr($str, $offset, $length);
9154
                }
9155
            } else {
9156
                $str_tmp = self::substr($str, $offset, $length, $encoding);
9157
            }
9158
9159 3
            if ($str_tmp === false) {
9160
                return 0;
9161
            }
9162
9163 3
            $str = $str_tmp;
9164
        }
9165
9166 11
        if ($str === '') {
9167 2
            return 0;
9168
        }
9169
9170 10
        $matches = [];
9171 10
        if (\preg_match('/^(.*?)' . self::rxClass($char_list) . '/us', $str, $matches)) {
9172 9
            $return = self::strlen($matches[1], $encoding);
9173 9
            if ($return === false) {
9174
                return 0;
9175
            }
9176
9177 9
            return $return;
9178
        }
9179
9180 2
        return (int) self::strlen($str, $encoding);
9181
    }
9182
9183
    /**
9184
     * Create a UTF-8 string from code points.
9185
     *
9186
     * INFO: opposite to UTF8::codepoints()
9187
     *
9188
     * EXAMPLE: <code>UTF8::string(array(246, 228, 252)); // 'öäü'</code>
9189
     *
9190
     * @param int|int[]|string|string[] $intOrHex <p>Integer or Hexadecimal codepoints.</p>
9191
     *
9192
     * @phpstan-param int[]|numeric-string[]|int|numeric-string $intOrHex
9193
     *
9194
     * @psalm-pure
9195
     *
9196
     * @return string
9197
     *                <p>A UTF-8 encoded string.</p>
9198
     */
9199
    public static function string($intOrHex): string
9200
    {
9201 4
        if ($intOrHex === []) {
9202 4
            return '';
9203
        }
9204
9205 4
        if (!\is_array($intOrHex)) {
9206 1
            $intOrHex = [$intOrHex];
9207
        }
9208
9209 4
        $str = '';
9210 4
        foreach ($intOrHex as $strPart) {
9211 4
            $str .= '&#' . (int) $strPart . ';';
9212
        }
9213
9214
        // We cannot use html_entity_decode() here, as it will not return
9215
        // characters for many values < 160.
9216 4
        return mb_convert_encoding($str, 'UTF-8', 'HTML-ENTITIES');
0 ignored issues
show
Bug Best Practice introduced by
The expression return mb_convert_encodi...TF-8', 'HTML-ENTITIES') could return the type array which is incompatible with the type-hinted return string. Consider adding an additional type-check to rule them out.
Loading history...
9217
    }
9218
9219
    /**
9220
     * Checks if string starts with "BOM" (Byte Order Mark Character) character.
9221
     *
9222
     * EXAMPLE: <code>UTF8::string_has_bom("\xef\xbb\xbf foobar"); // true</code>
9223
     *
9224
     * @param string $str <p>The input string.</p>
9225
     *
9226
     * @psalm-pure
9227
     *
9228
     * @return bool
9229
     *              <p>
9230
     *              <strong>true</strong> if the string has BOM at the start,<br>
9231
     *              <strong>false</strong> otherwise
9232
     *              </p>
9233
     */
9234
    public static function string_has_bom(string $str): bool
9235
    {
9236 40
        foreach (self::$BOM as $bom_string => &$bom_byte_length) {
9237 40
            if (\strncmp($str, $bom_string, $bom_byte_length) === 0) {
9238 40
                return true;
9239
            }
9240
        }
9241
9242 40
        return false;
9243
    }
9244
9245
    /**
9246
     * Strip HTML and PHP tags from a string + clean invalid UTF-8.
9247
     *
9248
     * EXAMPLE: <code>UTF8::strip_tags("<span>κόσμε\xa0\xa1</span>"); // 'κόσμε'</code>
9249
     *
9250
     * @see http://php.net/manual/en/function.strip-tags.php
9251
     *
9252
     * @param string      $str            <p>
9253
     *                                    The input string.
9254
     *                                    </p>
9255
     * @param string|null $allowable_tags [optional] <p>
9256
     *                                    You can use the optional second parameter to specify tags which should
9257
     *                                    not be stripped.
9258
     *                                    </p>
9259
     *                                    <p>
9260
     *                                    HTML comments and PHP tags are also stripped. This is hardcoded and
9261
     *                                    can not be changed with allowable_tags.
9262
     *                                    </p>
9263
     * @param bool        $clean_utf8     [optional] <p>Remove non UTF-8 chars from the string.</p>
9264
     *
9265
     * @psalm-pure
9266
     *
9267
     * @return string
9268
     *                <p>The stripped string.</p>
9269
     */
9270
    public static function strip_tags(
9271
        string $str,
9272
        string $allowable_tags = null,
9273
        bool $clean_utf8 = false
9274
    ): string {
9275 4
        if ($str === '') {
9276 1
            return '';
9277
        }
9278
9279 4
        if ($clean_utf8) {
9280 2
            $str = self::clean($str);
9281
        }
9282
9283 4
        if ($allowable_tags === null) {
9284 4
            return \strip_tags($str);
9285
        }
9286
9287 2
        return \strip_tags($str, $allowable_tags);
9288
    }
9289
9290
    /**
9291
     * Strip all whitespace characters. This includes tabs and newline
9292
     * characters, as well as multibyte whitespace such as the thin space
9293
     * and ideographic space.
9294
     *
9295
     * EXAMPLE: <code>UTF8::strip_whitespace('   Ο     συγγραφέας  '); // 'Οσυγγραφέας'</code>
9296
     *
9297
     * @param string $str
9298
     *
9299
     * @psalm-pure
9300
     *
9301
     * @return string
9302
     */
9303
    public static function strip_whitespace(string $str): string
9304
    {
9305 36
        if ($str === '') {
9306 3
            return '';
9307
        }
9308
9309 33
        return (string) \preg_replace('/[[:space:]]+/u', '', $str);
9310
    }
9311
9312
    /**
9313
     * Find the position of the first occurrence of a substring in a string, case-insensitive.
9314
     *
9315
     * INFO: use UTF8::stripos_in_byte() for the byte-length
9316
     *
9317
     * EXAMPLE: <code>UTF8::stripos('aσσb', 'ΣΣ'); // 1</code> (σσ == ΣΣ)
9318
     *
9319
     * @see http://php.net/manual/en/function.mb-stripos.php
9320
     *
9321
     * @param string $haystack   <p>The string from which to get the position of the first occurrence of needle.</p>
9322
     * @param string $needle     <p>The string to find in haystack.</p>
9323
     * @param int    $offset     [optional] <p>The position in haystack to start searching.</p>
9324
     * @param string $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
9325
     * @param bool   $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
9326
     *
9327
     * @psalm-pure
9328
     *
9329
     * @return false|int
9330
     *                   Return the <strong>(int)</strong> numeric position of the first occurrence of needle in the
9331
     *                   haystack string,<br> or <strong>false</strong> if needle is not found
9332
     */
9333
    public static function stripos(
9334
        string $haystack,
9335
        string $needle,
9336
        int $offset = 0,
9337
        string $encoding = 'UTF-8',
9338
        bool $clean_utf8 = false
9339
    ) {
9340 25
        if ($haystack === '') {
9341 5
            if (\PHP_VERSION_ID >= 80000 && $needle === '') {
9342
                return 0;
9343
            }
9344
9345 5
            return false;
9346
        }
9347
9348 24
        if ($needle === '' && \PHP_VERSION_ID < 80000) {
9349 2
            return false;
9350
        }
9351
9352 24
        if ($clean_utf8) {
9353
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
9354
            // if invalid characters are found in $haystack before $needle
9355 1
            $haystack = self::clean($haystack);
9356 1
            $needle = self::clean($needle);
9357
        }
9358
9359 24
        if (self::$SUPPORT['mbstring'] === true) {
9360 24
            if ($encoding === 'UTF-8') {
9361 24
                return \mb_stripos($haystack, $needle, $offset);
9362
            }
9363
9364 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
9365
9366 2
            return \mb_stripos($haystack, $needle, $offset, $encoding);
9367
        }
9368
9369 2
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
9370
9371
        if (
9372 2
            $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
9373
            &&
9374 2
            $offset >= 0 // grapheme_stripos() can't handle negative offset
9375
            &&
9376 2
            self::$SUPPORT['intl'] === true
9377
        ) {
9378
            $return_tmp = \grapheme_stripos($haystack, $needle, $offset);
9379
            if ($return_tmp !== false) {
9380
                return $return_tmp;
9381
            }
9382
        }
9383
9384
        //
9385
        // fallback for ascii only
9386
        //
9387
9388 2
        if (ASCII::is_ascii($haystack . $needle)) {
9389 2
            return \stripos($haystack, $needle, $offset);
9390
        }
9391
9392
        //
9393
        // fallback via vanilla php
9394
        //
9395
9396 2
        $haystack = self::strtocasefold($haystack, true, false, $encoding, null, false);
9397 2
        $needle = self::strtocasefold($needle, true, false, $encoding, null, false);
9398
9399 2
        return self::strpos($haystack, $needle, $offset, $encoding);
9400
    }
9401
9402
    /**
9403
     * Returns all of haystack starting from and including the first occurrence of needle to the end.
9404
     *
9405
     * EXAMPLE: <code>
9406
     * $str = 'iñtërnâtiônàlizætiøn';
9407
     * $search = 'NÂT';
9408
     *
9409
     * UTF8::stristr($str, $search)); // 'nâtiônàlizætiøn'
9410
     * UTF8::stristr($str, $search, true)); // 'iñtër'
9411
     * </code>
9412
     *
9413
     * @param string $haystack      <p>The input string. Must be valid UTF-8.</p>
9414
     * @param string $needle        <p>The string to look for. Must be valid UTF-8.</p>
9415
     * @param bool   $before_needle [optional] <p>
9416
     *                              If <b>TRUE</b>, it returns the part of the
9417
     *                              haystack before the first occurrence of the needle (excluding the needle).
9418
     *                              </p>
9419
     * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
9420
     * @param bool   $clean_utf8    [optional] <p>Remove non UTF-8 chars from the string.</p>
9421
     *
9422
     * @psalm-pure
9423
     *
9424
     * @return false|string
9425
     *                      <p>A sub-string,<br>or <strong>false</strong> if needle is not found.</p>
9426
     */
9427
    public static function stristr(
9428
        string $haystack,
9429
        string $needle,
9430
        bool $before_needle = false,
9431
        string $encoding = 'UTF-8',
9432
        bool $clean_utf8 = false
9433
    ) {
9434 13
        if ($haystack === '') {
9435 3
            if (\PHP_VERSION_ID >= 80000 && $needle === '') {
9436
                return '';
9437
            }
9438
9439 3
            return false;
9440
        }
9441
9442 11
        if ($clean_utf8) {
9443
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
9444
            // if invalid characters are found in $haystack before $needle
9445 1
            $needle = self::clean($needle);
9446 1
            $haystack = self::clean($haystack);
9447
        }
9448
9449 11
        if ($needle === '') {
9450 2
            if (\PHP_VERSION_ID >= 80000) {
9451
                return $haystack;
9452
            }
9453
9454 2
            return false;
9455
        }
9456
9457 10
        if (self::$SUPPORT['mbstring'] === true) {
9458 10
            if ($encoding === 'UTF-8') {
9459 10
                return \mb_stristr($haystack, $needle, $before_needle);
9460
            }
9461
9462 1
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
9463
9464 1
            return \mb_stristr($haystack, $needle, $before_needle, $encoding);
9465
        }
9466
9467
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
9468
9469
        if (
9470
            $encoding !== 'UTF-8'
9471
            &&
9472
            self::$SUPPORT['mbstring'] === false
9473
        ) {
9474
            /**
9475
             * @psalm-suppress ImpureFunctionCall - is is only a warning
9476
             */
9477
            \trigger_error('UTF8::stristr() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
9478
        }
9479
9480
        if (
9481
            $encoding === 'UTF-8' // INFO: "grapheme_stristr()" can't handle other encodings
9482
            &&
9483
            self::$SUPPORT['intl'] === true
9484
        ) {
9485
            $return_tmp = \grapheme_stristr($haystack, $needle, $before_needle);
9486
            if ($return_tmp !== false) {
9487
                return $return_tmp;
9488
            }
9489
        }
9490
9491
        if (ASCII::is_ascii($needle . $haystack)) {
9492
            return \stristr($haystack, $needle, $before_needle);
9493
        }
9494
9495
        \preg_match('/^(.*?)' . \preg_quote($needle, '/') . '/usi', $haystack, $match);
9496
9497
        if (!isset($match[1])) {
9498
            return false;
9499
        }
9500
9501
        if ($before_needle) {
9502
            return $match[1];
9503
        }
9504
9505
        return self::substr($haystack, (int) self::strlen($match[1], $encoding), null, $encoding);
9506
    }
9507
9508
    /**
9509
     * Get the string length, not the byte-length!
9510
     *
9511
     * INFO: use UTF8::strwidth() for the char-length
9512
     *
9513
     * EXAMPLE: <code>UTF8::strlen("Iñtërnâtiôn\xE9àlizætiøn")); // 20</code>
9514
     *
9515
     * @see http://php.net/manual/en/function.mb-strlen.php
9516
     *
9517
     * @param string $str        <p>The string being checked for length.</p>
9518
     * @param string $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
9519
     * @param bool   $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
9520
     *
9521
     * @psalm-pure
9522
     *
9523
     * @return false|int
9524
     *                   <p>
9525
     *                   The number <strong>(int)</strong> of characters in the string $str having character encoding
9526
     *                   $encoding.
9527
     *                   (One multi-byte character counted as +1).
9528
     *                   <br>
9529
     *                   Can return <strong>false</strong>, if e.g. mbstring is not installed and we process invalid
9530
     *                   chars.
9531
     *                   </p>
9532
     */
9533
    public static function strlen(
9534
        string $str,
9535
        string $encoding = 'UTF-8',
9536
        bool $clean_utf8 = false
9537
    ) {
9538 174
        if ($str === '') {
9539 21
            return 0;
9540
        }
9541
9542 172
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
9543 12
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
9544
        }
9545
9546 172
        if ($clean_utf8) {
9547
            // "mb_strlen" and "\iconv_strlen" returns wrong length,
9548
            // if invalid characters are found in $str
9549 5
            $str = self::clean($str);
9550
        }
9551
9552
        //
9553
        // fallback via mbstring
9554
        //
9555
9556 172
        if (self::$SUPPORT['mbstring'] === true) {
9557 166
            if ($encoding === 'UTF-8') {
9558
                /** @noinspection PhpUsageOfSilenceOperatorInspection - ignore warnings, it's working anyway */
9559 166
                return @\mb_strlen($str);
9560
            }
9561
9562
            /** @noinspection PhpUsageOfSilenceOperatorInspection - ignore warnings, it's working anyway */
9563 4
            return @\mb_strlen($str, $encoding);
9564
        }
9565
9566
        //
9567
        // fallback for binary || ascii only
9568
        //
9569
9570
        if (
9571 8
            $encoding === 'CP850'
9572
            ||
9573 8
            $encoding === 'ASCII'
9574
        ) {
9575
            return \strlen($str);
9576
        }
9577
9578
        if (
9579 8
            $encoding !== 'UTF-8'
9580
            &&
9581 8
            self::$SUPPORT['mbstring'] === false
9582
            &&
9583 8
            self::$SUPPORT['iconv'] === false
9584
        ) {
9585
            /**
9586
             * @psalm-suppress ImpureFunctionCall - is is only a warning
9587
             */
9588 2
            \trigger_error('UTF8::strlen() without mbstring / iconv cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
9589
        }
9590
9591
        //
9592
        // fallback via iconv
9593
        //
9594
9595 8
        if (self::$SUPPORT['iconv'] === true) {
9596
            $return_tmp = \iconv_strlen($str, $encoding);
9597
            if ($return_tmp !== false) {
9598
                return $return_tmp;
9599
            }
9600
        }
9601
9602
        //
9603
        // fallback via intl
9604
        //
9605
9606
        if (
9607 8
            $encoding === 'UTF-8' // INFO: "grapheme_strlen()" can't handle other encodings
9608
            &&
9609 8
            self::$SUPPORT['intl'] === true
9610
        ) {
9611
            $return_tmp = \grapheme_strlen($str);
9612
            if ($return_tmp !== null) {
9613
                return $return_tmp;
9614
            }
9615
        }
9616
9617
        //
9618
        // fallback for ascii only
9619
        //
9620
9621 8
        if (ASCII::is_ascii($str)) {
9622 4
            return \strlen($str);
9623
        }
9624
9625
        //
9626
        // fallback via vanilla php
9627
        //
9628
9629 8
        \preg_match_all('/./us', $str, $parts);
9630
9631 8
        $return_tmp = \count($parts[0]);
9632 8
        if ($return_tmp === 0) {
9633
            return false;
9634
        }
9635
9636 8
        return $return_tmp;
9637
    }
9638
9639
    /**
9640
     * Get string length in byte.
9641
     *
9642
     * @param string $str
9643
     *
9644
     * @psalm-pure
9645
     *
9646
     * @return int
9647
     */
9648
    public static function strlen_in_byte(string $str): int
9649
    {
9650 1
        if ($str === '') {
9651
            return 0;
9652
        }
9653
9654 1
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
9655
            // "mb_" is available if overload is used, so use it ...
9656
            return \mb_strlen($str, 'CP850'); // 8-BIT
9657
        }
9658
9659 1
        return \strlen($str);
9660
    }
9661
9662
    /**
9663
     * Case-insensitive string comparisons using a "natural order" algorithm.
9664
     *
9665
     * INFO: natural order version of UTF8::strcasecmp()
9666
     *
9667
     * EXAMPLES: <code>
9668
     * UTF8::strnatcasecmp('2', '10Hello WORLD 中文空白!'); // -1
9669
     * UTF8::strcasecmp('2Hello world 中文空白!', '10Hello WORLD 中文空白!'); // 1
9670
     *
9671
     * UTF8::strnatcasecmp('10Hello world 中文空白!', '2Hello WORLD 中文空白!'); // 1
9672
     * UTF8::strcasecmp('10Hello world 中文空白!', '2Hello WORLD 中文空白!'); // -1
9673
     * </code>
9674
     *
9675
     * @param string $str1     <p>The first string.</p>
9676
     * @param string $str2     <p>The second string.</p>
9677
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
9678
     *
9679
     * @psalm-pure
9680
     *
9681
     * @return int
9682
     *             <strong>&lt; 0</strong> if str1 is less than str2<br>
9683
     *             <strong>&gt; 0</strong> if str1 is greater than str2<br>
9684
     *             <strong>0</strong> if they are equal
9685
     */
9686
    public static function strnatcasecmp(string $str1, string $str2, string $encoding = 'UTF-8'): int
9687
    {
9688 2
        return self::strnatcmp(
9689 2
            self::strtocasefold($str1, true, false, $encoding, null, false),
9690 2
            self::strtocasefold($str2, true, false, $encoding, null, false)
9691
        );
9692
    }
9693
9694
    /**
9695
     * String comparisons using a "natural order" algorithm
9696
     *
9697
     * INFO: natural order version of UTF8::strcmp()
9698
     *
9699
     * EXAMPLES: <code>
9700
     * UTF8::strnatcmp('2Hello world 中文空白!', '10Hello WORLD 中文空白!'); // -1
9701
     * UTF8::strcmp('2Hello world 中文空白!', '10Hello WORLD 中文空白!'); // 1
9702
     *
9703
     * UTF8::strnatcmp('10Hello world 中文空白!', '2Hello WORLD 中文空白!'); // 1
9704
     * UTF8::strcmp('10Hello world 中文空白!', '2Hello WORLD 中文空白!'); // -1
9705
     * </code>
9706
     *
9707
     * @see http://php.net/manual/en/function.strnatcmp.php
9708
     *
9709
     * @param string $str1 <p>The first string.</p>
9710
     * @param string $str2 <p>The second string.</p>
9711
     *
9712
     * @psalm-pure
9713
     *
9714
     * @return int
9715
     *             <strong>&lt; 0</strong> if str1 is less than str2;<br>
9716
     *             <strong>&gt; 0</strong> if str1 is greater than str2;<br>
9717
     *             <strong>0</strong> if they are equal
9718
     */
9719
    public static function strnatcmp(string $str1, string $str2): int
9720
    {
9721 4
        if ($str1 === $str2) {
9722 4
            return 0;
9723
        }
9724
9725 4
        return \strnatcmp(
9726 4
            (string) self::strtonatfold($str1),
9727 4
            (string) self::strtonatfold($str2)
9728
        );
9729
    }
9730
9731
    /**
9732
     * Case-insensitive string comparison of the first n characters.
9733
     *
9734
     * EXAMPLE: <code>
9735
     * UTF8::strcasecmp("iñtërnâtiôn\nàlizætiøn321", "iñtërnâtiôn\nàlizætiøn123", 5); // 0
9736
     * </code>
9737
     *
9738
     * @see http://php.net/manual/en/function.strncasecmp.php
9739
     *
9740
     * @param string $str1     <p>The first string.</p>
9741
     * @param string $str2     <p>The second string.</p>
9742
     * @param int    $len      <p>The length of strings to be used in the comparison.</p>
9743
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
9744
     *
9745
     * @psalm-pure
9746
     *
9747
     * @return int
9748
     *             <strong>&lt; 0</strong> if <i>str1</i> is less than <i>str2</i>;<br>
9749
     *             <strong>&gt; 0</strong> if <i>str1</i> is greater than <i>str2</i>;<br>
9750
     *             <strong>0</strong> if they are equal
9751
     */
9752
    public static function strncasecmp(
9753
        string $str1,
9754
        string $str2,
9755
        int $len,
9756
        string $encoding = 'UTF-8'
9757
    ): int {
9758 2
        return self::strncmp(
9759 2
            self::strtocasefold($str1, true, false, $encoding, null, false),
9760 2
            self::strtocasefold($str2, true, false, $encoding, null, false),
9761 2
            $len
9762
        );
9763
    }
9764
9765
    /**
9766
     * String comparison of the first n characters.
9767
     *
9768
     * EXAMPLE: <code>
9769
     * UTF8::strncmp("Iñtërnâtiôn\nàlizætiøn321", "Iñtërnâtiôn\nàlizætiøn123", 5); // 0
9770
     * </code>
9771
     *
9772
     * @see http://php.net/manual/en/function.strncmp.php
9773
     *
9774
     * @param string $str1     <p>The first string.</p>
9775
     * @param string $str2     <p>The second string.</p>
9776
     * @param int    $len      <p>Number of characters to use in the comparison.</p>
9777
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
9778
     *
9779
     * @psalm-pure
9780
     *
9781
     * @return int
9782
     *             <strong>&lt; 0</strong> if <i>str1</i> is less than <i>str2</i>;<br>
9783
     *             <strong>&gt; 0</strong> if <i>str1</i> is greater than <i>str2</i>;<br>
9784
     *             <strong>0</strong> if they are equal
9785
     */
9786
    public static function strncmp(
9787
        string $str1,
9788
        string $str2,
9789
        int $len,
9790
        string $encoding = 'UTF-8'
9791
    ): int {
9792 4
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
9793
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
9794
        }
9795
9796 4
        if ($encoding === 'UTF-8') {
9797 4
            $str1 = (string) \mb_substr($str1, 0, $len);
9798 4
            $str2 = (string) \mb_substr($str2, 0, $len);
9799
        } else {
9800
            $str1 = (string) self::substr($str1, 0, $len, $encoding);
9801
            $str2 = (string) self::substr($str2, 0, $len, $encoding);
9802
        }
9803
9804 4
        return self::strcmp($str1, $str2);
9805
    }
9806
9807
    /**
9808
     * Search a string for any of a set of characters.
9809
     *
9810
     * EXAMPLE: <code>UTF8::strpbrk('-中文空白-', '白'); // '白-'</code>
9811
     *
9812
     * @see http://php.net/manual/en/function.strpbrk.php
9813
     *
9814
     * @param string $haystack  <p>The string where char_list is looked for.</p>
9815
     * @param string $char_list <p>This parameter is case-sensitive.</p>
9816
     *
9817
     * @psalm-pure
9818
     *
9819
     * @return false|string
9820
     *                      <p>The string starting from the character found, or false if it is not found.</p>
9821
     */
9822
    public static function strpbrk(string $haystack, string $char_list)
9823
    {
9824 2
        if ($haystack === '' || $char_list === '') {
9825 2
            return false;
9826
        }
9827
9828 2
        if (\preg_match('/' . self::rxClass($char_list) . '/us', $haystack, $m)) {
9829 2
            return \substr($haystack, (int) \strpos($haystack, $m[0]));
9830
        }
9831
9832 2
        return false;
9833
    }
9834
9835
    /**
9836
     * Find the position of the first occurrence of a substring in a string.
9837
     *
9838
     * INFO: use UTF8::strpos_in_byte() for the byte-length
9839
     *
9840
     * EXAMPLE: <code>UTF8::strpos('ABC-ÖÄÜ-中文空白-中文空白', '中'); // 8</code>
9841
     *
9842
     * @see http://php.net/manual/en/function.mb-strpos.php
9843
     *
9844
     * @param string     $haystack   <p>The string from which to get the position of the first occurrence of needle.</p>
9845
     * @param int|string $needle     <p>The string to find in haystack.<br>Or a code point as int.</p>
9846
     * @param int        $offset     [optional] <p>The search offset. If it is not specified, 0 is used.</p>
9847
     * @param string     $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
9848
     * @param bool       $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
9849
     *
9850
     * @psalm-pure
9851
     *
9852
     * @return false|int
9853
     *                   The <strong>(int)</strong> numeric position of the first occurrence of needle in the haystack
9854
     *                   string.<br> If needle is not found it returns false.
9855
     */
9856
    public static function strpos(
9857
        string $haystack,
9858
        $needle,
9859
        int $offset = 0,
9860
        string $encoding = 'UTF-8',
9861
        bool $clean_utf8 = false
9862
    ) {
9863 52
        if ($haystack === '') {
9864 4
            if (\PHP_VERSION_ID >= 80000) {
9865
                if ($needle === '') {
9866
                    return 0;
9867
                }
9868
            } else {
9869 4
                return false;
9870
            }
9871
        }
9872
9873
        // iconv and mbstring do not support integer $needle
9874 51
        if ((int) $needle === $needle) {
9875
            $needle = (string) self::chr($needle);
9876
        }
9877 51
        $needle = (string) $needle;
9878
9879 51
        if ($haystack === '') {
9880
            if (\PHP_VERSION_ID >= 80000 && $needle === '') {
9881
                return 0;
9882
            }
9883
9884
            return false;
9885
        }
9886
9887 51
        if ($needle === '' && \PHP_VERSION_ID < 80000) {
9888 2
            return false;
9889
        }
9890
9891 51
        if ($clean_utf8) {
9892
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
9893
            // if invalid characters are found in $haystack before $needle
9894 3
            $needle = self::clean($needle);
9895 3
            $haystack = self::clean($haystack);
9896
        }
9897
9898 51
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
9899 10
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
9900
        }
9901
9902
        //
9903
        // fallback via mbstring
9904
        //
9905
9906 51
        if (self::$SUPPORT['mbstring'] === true) {
9907 49
            if ($encoding === 'UTF-8') {
9908
                /** @noinspection PhpUsageOfSilenceOperatorInspection - Offset not contained in string */
9909 49
                return @\mb_strpos($haystack, $needle, $offset);
9910
            }
9911
9912
            /** @noinspection PhpUsageOfSilenceOperatorInspection - Offset not contained in string */
9913 2
            return @\mb_strpos($haystack, $needle, $offset, $encoding);
9914
        }
9915
9916
        //
9917
        // fallback for binary || ascii only
9918
        //
9919
        if (
9920 4
            $encoding === 'CP850'
9921
            ||
9922 4
            $encoding === 'ASCII'
9923
        ) {
9924 2
            return \strpos($haystack, $needle, $offset);
9925
        }
9926
9927
        if (
9928 4
            $encoding !== 'UTF-8'
9929
            &&
9930 4
            self::$SUPPORT['iconv'] === false
9931
            &&
9932 4
            self::$SUPPORT['mbstring'] === false
9933
        ) {
9934
            /**
9935
             * @psalm-suppress ImpureFunctionCall - is is only a warning
9936
             */
9937 2
            \trigger_error('UTF8::strpos() without mbstring / iconv cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
9938
        }
9939
9940
        //
9941
        // fallback via intl
9942
        //
9943
9944
        if (
9945 4
            $encoding === 'UTF-8' // INFO: "grapheme_strpos()" can't handle other encodings
9946
            &&
9947 4
            $offset >= 0 // grapheme_strpos() can't handle negative offset
9948
            &&
9949 4
            self::$SUPPORT['intl'] === true
9950
        ) {
9951
            $return_tmp = \grapheme_strpos($haystack, $needle, $offset);
9952
            if ($return_tmp !== false) {
9953
                return $return_tmp;
9954
            }
9955
        }
9956
9957
        //
9958
        // fallback via iconv
9959
        //
9960
9961
        if (
9962 4
            $offset >= 0 // iconv_strpos() can't handle negative offset
9963
            &&
9964 4
            self::$SUPPORT['iconv'] === true
9965
        ) {
9966
            // ignore invalid negative offset to keep compatibility
9967
            // with php < 5.5.35, < 5.6.21, < 7.0.6
9968
            $return_tmp = \iconv_strpos($haystack, $needle, $offset > 0 ? $offset : 0, $encoding);
9969
            if ($return_tmp !== false) {
9970
                return $return_tmp;
9971
            }
9972
        }
9973
9974
        //
9975
        // fallback for ascii only
9976
        //
9977
9978 4
        if (ASCII::is_ascii($haystack . $needle)) {
9979
            /** @noinspection PhpUsageOfSilenceOperatorInspection - Offset not contained in string */
9980 2
            return @\strpos($haystack, $needle, $offset);
9981
        }
9982
9983
        //
9984
        // fallback via vanilla php
9985
        //
9986
9987 4
        $haystack_tmp = self::substr($haystack, $offset, null, $encoding);
9988 4
        if ($haystack_tmp === false) {
9989
            $haystack_tmp = '';
9990
        }
9991 4
        $haystack = (string) $haystack_tmp;
9992
9993 4
        if ($offset < 0) {
9994
            $offset = 0;
9995
        }
9996
9997 4
        $pos = \strpos($haystack, $needle);
9998 4
        if ($pos === false) {
9999 3
            return false;
10000
        }
10001
10002 4
        if ($pos) {
10003 4
            return $offset + (int) self::strlen(\substr($haystack, 0, $pos), $encoding);
10004
        }
10005
10006 2
        return $offset + 0;
10007
    }
10008
10009
    /**
10010
     * Find the position of the first occurrence of a substring in a string.
10011
     *
10012
     * @param string $haystack <p>
10013
     *                         The string being checked.
10014
     *                         </p>
10015
     * @param string $needle   <p>
10016
     *                         The position counted from the beginning of haystack.
10017
     *                         </p>
10018
     * @param int    $offset   [optional] <p>
10019
     *                         The search offset. If it is not specified, 0 is used.
10020
     *                         </p>
10021
     *
10022
     * @psalm-pure
10023
     *
10024
     * @return false|int
10025
     *                   <p>The numeric position of the first occurrence of needle in the
10026
     *                   haystack string. If needle is not found, it returns false.</p>
10027
     */
10028
    public static function strpos_in_byte(string $haystack, string $needle, int $offset = 0)
10029
    {
10030 2
        if ($haystack === '' || $needle === '') {
10031
            return false;
10032
        }
10033
10034 2
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
10035
            // "mb_" is available if overload is used, so use it ...
10036
            return \mb_strpos($haystack, $needle, $offset, 'CP850'); // 8-BIT
10037
        }
10038
10039 2
        return \strpos($haystack, $needle, $offset);
10040
    }
10041
10042
    /**
10043
     * Find the position of the first occurrence of a substring in a string, case-insensitive.
10044
     *
10045
     * @param string $haystack <p>
10046
     *                         The string being checked.
10047
     *                         </p>
10048
     * @param string $needle   <p>
10049
     *                         The position counted from the beginning of haystack.
10050
     *                         </p>
10051
     * @param int    $offset   [optional] <p>
10052
     *                         The search offset. If it is not specified, 0 is used.
10053
     *                         </p>
10054
     *
10055
     * @psalm-pure
10056
     *
10057
     * @return false|int
10058
     *                   <p>The numeric position of the first occurrence of needle in the
10059
     *                   haystack string. If needle is not found, it returns false.</p>
10060
     */
10061
    public static function stripos_in_byte(string $haystack, string $needle, int $offset = 0)
10062
    {
10063 2
        if ($haystack === '' || $needle === '') {
10064
            return false;
10065
        }
10066
10067 2
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
10068
            // "mb_" is available if overload is used, so use it ...
10069
            return \mb_stripos($haystack, $needle, $offset, 'CP850'); // 8-BIT
10070
        }
10071
10072 2
        return \stripos($haystack, $needle, $offset);
10073
    }
10074
10075
    /**
10076
     * Find the last occurrence of a character in a string within another.
10077
     *
10078
     * EXAMPLE: <code>UTF8::strrchr('κόσμεκόσμε-äöü', 'κόσμε'); // 'κόσμε-äöü'</code>
10079
     *
10080
     * @see http://php.net/manual/en/function.mb-strrchr.php
10081
     *
10082
     * @param string $haystack      <p>The string from which to get the last occurrence of needle.</p>
10083
     * @param string $needle        <p>The string to find in haystack</p>
10084
     * @param bool   $before_needle [optional] <p>
10085
     *                              Determines which portion of haystack
10086
     *                              this function returns.
10087
     *                              If set to true, it returns all of haystack
10088
     *                              from the beginning to the last occurrence of needle.
10089
     *                              If set to false, it returns all of haystack
10090
     *                              from the last occurrence of needle to the end,
10091
     *                              </p>
10092
     * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
10093
     * @param bool   $clean_utf8    [optional] <p>Remove non UTF-8 chars from the string.</p>
10094
     *
10095
     * @psalm-pure
10096
     *
10097
     * @return false|string
10098
     *                      <p>The portion of haystack or false if needle is not found.</p>
10099
     */
10100
    public static function strrchr(
10101
        string $haystack,
10102
        string $needle,
10103
        bool $before_needle = false,
10104
        string $encoding = 'UTF-8',
10105
        bool $clean_utf8 = false
10106
    ) {
10107 2
        if ($haystack === '' || $needle === '') {
10108 2
            return false;
10109
        }
10110
10111 2
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
10112 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
10113
        }
10114
10115 2
        if ($clean_utf8) {
10116
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
10117
            // if invalid characters are found in $haystack before $needle
10118 2
            $needle = self::clean($needle);
10119 2
            $haystack = self::clean($haystack);
10120
        }
10121
10122
        //
10123
        // fallback via mbstring
10124
        //
10125
10126 2
        if (self::$SUPPORT['mbstring'] === true) {
10127 2
            if ($encoding === 'UTF-8') {
10128 2
                return \mb_strrchr($haystack, $needle, $before_needle);
10129
            }
10130
10131 2
            return \mb_strrchr($haystack, $needle, $before_needle, $encoding);
10132
        }
10133
10134
        //
10135
        // fallback for binary || ascii only
10136
        //
10137
10138
        if (
10139
            !$before_needle
10140
            &&
10141
            (
10142
                $encoding === 'CP850'
10143
                ||
10144
                $encoding === 'ASCII'
10145
            )
10146
        ) {
10147
            return \strrchr($haystack, $needle);
10148
        }
10149
10150
        if (
10151
            $encoding !== 'UTF-8'
10152
            &&
10153
            self::$SUPPORT['mbstring'] === false
10154
        ) {
10155
            /**
10156
             * @psalm-suppress ImpureFunctionCall - is is only a warning
10157
             */
10158
            \trigger_error('UTF8::strrchr() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
10159
        }
10160
10161
        //
10162
        // fallback via iconv
10163
        //
10164
10165
        if (self::$SUPPORT['iconv'] === true) {
10166
            $needle_tmp = self::substr($needle, 0, 1, $encoding);
10167
            if ($needle_tmp === false) {
10168
                return false;
10169
            }
10170
            $needle = $needle_tmp;
10171
10172
            $pos = \iconv_strrpos($haystack, $needle, $encoding);
10173
            if ($pos === false) {
10174
                return false;
10175
            }
10176
10177
            if ($before_needle) {
10178
                return self::substr($haystack, 0, $pos, $encoding);
10179
            }
10180
10181
            return self::substr($haystack, $pos, null, $encoding);
10182
        }
10183
10184
        //
10185
        // fallback via vanilla php
10186
        //
10187
10188
        $needle_tmp = self::substr($needle, 0, 1, $encoding);
10189
        if ($needle_tmp === false) {
10190
            return false;
10191
        }
10192
        $needle = $needle_tmp;
10193
10194
        $pos = self::strrpos($haystack, $needle, 0, $encoding);
10195
        if ($pos === false) {
10196
            return false;
10197
        }
10198
10199
        if ($before_needle) {
10200
            return self::substr($haystack, 0, $pos, $encoding);
10201
        }
10202
10203
        return self::substr($haystack, $pos, null, $encoding);
10204
    }
10205
10206
    /**
10207
     * Reverses characters order in the string.
10208
     *
10209
     * EXAMPLE: <code>UTF8::strrev('κ-öäü'); // 'üäö-κ'</code>
10210
     *
10211
     * @param string $str      <p>The input string.</p>
10212
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
10213
     *
10214
     * @psalm-pure
10215
     *
10216
     * @return string
10217
     *                <p>The string with characters in the reverse sequence.</p>
10218
     */
10219
    public static function strrev(string $str, string $encoding = 'UTF-8'): string
10220
    {
10221 10
        if ($str === '') {
10222 4
            return '';
10223
        }
10224
10225
        // init
10226 8
        $reversed = '';
10227
10228 8
        $str = self::emoji_encode($str, true);
10229
10230 8
        if ($encoding === 'UTF-8') {
10231 8
            if (self::$SUPPORT['intl'] === true) {
10232
                // try "grapheme" first: https://stackoverflow.com/questions/17496493/strrev-dosent-support-utf-8
10233 8
                $i = (int) \grapheme_strlen($str);
10234 8
                while ($i--) {
10235 8
                    $reversed_tmp = \grapheme_substr($str, $i, 1);
10236 8
                    if ($reversed_tmp !== false) {
10237 8
                        $reversed .= $reversed_tmp;
10238
                    }
10239
                }
10240
            } else {
10241
                $i = (int) \mb_strlen($str);
10242 8
                while ($i--) {
10243
                    $reversed_tmp = \mb_substr($str, $i, 1);
10244
                    if ($reversed_tmp !== false) {
10245
                        $reversed .= $reversed_tmp;
10246
                    }
10247
                }
10248
            }
10249
        } else {
10250
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
10251
10252
            $i = (int) self::strlen($str, $encoding);
10253
            while ($i--) {
10254
                $reversed_tmp = self::substr($str, $i, 1, $encoding);
10255
                if ($reversed_tmp !== false) {
10256
                    $reversed .= $reversed_tmp;
10257
                }
10258
            }
10259
        }
10260
10261 8
        return self::emoji_decode($reversed, true);
10262
    }
10263
10264
    /**
10265
     * Find the last occurrence of a character in a string within another, case-insensitive.
10266
     *
10267
     * EXAMPLE: <code>UTF8::strrichr('Aκόσμεκόσμε-äöü', 'aκόσμε'); // 'Aκόσμεκόσμε-äöü'</code>
10268
     *
10269
     * @see http://php.net/manual/en/function.mb-strrichr.php
10270
     *
10271
     * @param string $haystack      <p>The string from which to get the last occurrence of needle.</p>
10272
     * @param string $needle        <p>The string to find in haystack.</p>
10273
     * @param bool   $before_needle [optional] <p>
10274
     *                              Determines which portion of haystack
10275
     *                              this function returns.
10276
     *                              If set to true, it returns all of haystack
10277
     *                              from the beginning to the last occurrence of needle.
10278
     *                              If set to false, it returns all of haystack
10279
     *                              from the last occurrence of needle to the end,
10280
     *                              </p>
10281
     * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
10282
     * @param bool   $clean_utf8    [optional] <p>Remove non UTF-8 chars from the string.</p>
10283
     *
10284
     * @psalm-pure
10285
     *
10286
     * @return false|string
10287
     *                      <p>The portion of haystack or<br>false if needle is not found.</p>
10288
     */
10289
    public static function strrichr(
10290
        string $haystack,
10291
        string $needle,
10292
        bool $before_needle = false,
10293
        string $encoding = 'UTF-8',
10294
        bool $clean_utf8 = false
10295
    ) {
10296 3
        if ($haystack === '' || $needle === '') {
10297 2
            return false;
10298
        }
10299
10300 3
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
10301 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
10302
        }
10303
10304 3
        if ($clean_utf8) {
10305
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
10306
            // if invalid characters are found in $haystack before $needle
10307 2
            $needle = self::clean($needle);
10308 2
            $haystack = self::clean($haystack);
10309
        }
10310
10311
        //
10312
        // fallback via mbstring
10313
        //
10314
10315 3
        if (self::$SUPPORT['mbstring'] === true) {
10316 3
            if ($encoding === 'UTF-8') {
10317 3
                return \mb_strrichr($haystack, $needle, $before_needle);
10318
            }
10319
10320 2
            return \mb_strrichr($haystack, $needle, $before_needle, $encoding);
10321
        }
10322
10323
        //
10324
        // fallback via vanilla php
10325
        //
10326
10327
        $needle_tmp = self::substr($needle, 0, 1, $encoding);
10328
        if ($needle_tmp === false) {
10329
            return false;
10330
        }
10331
        $needle = $needle_tmp;
10332
10333
        $pos = self::strripos($haystack, $needle, 0, $encoding);
10334
        if ($pos === false) {
10335
            return false;
10336
        }
10337
10338
        if ($before_needle) {
10339
            return self::substr($haystack, 0, $pos, $encoding);
10340
        }
10341
10342
        return self::substr($haystack, $pos, null, $encoding);
10343
    }
10344
10345
    /**
10346
     * Find the position of the last occurrence of a substring in a string, case-insensitive.
10347
     *
10348
     * EXAMPLE: <code>UTF8::strripos('ABC-ÖÄÜ-中文空白-中文空白', '中'); // 13</code>
10349
     *
10350
     * @param string     $haystack   <p>The string to look in.</p>
10351
     * @param int|string $needle     <p>The string to look for.</p>
10352
     * @param int        $offset     [optional] <p>Number of characters to ignore in the beginning or end.</p>
10353
     * @param string     $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
10354
     * @param bool       $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
10355
     *
10356
     * @psalm-pure
10357
     *
10358
     * @return false|int
10359
     *                   <p>The <strong>(int)</strong> numeric position of the last occurrence of needle in the haystack
10360
     *                   string.<br>If needle is not found, it returns false.</p>
10361
     */
10362
    public static function strripos(
10363
        string $haystack,
10364
        $needle,
10365
        int $offset = 0,
10366
        string $encoding = 'UTF-8',
10367
        bool $clean_utf8 = false
10368
    ) {
10369 14
        if ($haystack === '') {
10370 3
            if (\PHP_VERSION_ID >= 80000) {
10371
                if ($needle === '') {
10372
                    return 0;
10373
                }
10374
            } else {
10375 3
                return false;
10376
            }
10377
        }
10378
10379
        // iconv and mbstring do not support integer $needle
10380 14
        if ((int) $needle === $needle && $needle >= 0) {
10381
            $needle = (string) self::chr($needle);
10382
        }
10383 14
        $needle = (string) $needle;
10384
10385 14
        if ($haystack === '') {
10386
            if (\PHP_VERSION_ID >= 80000 && $needle === '') {
10387
                return 0;
10388
            }
10389
10390
            return false;
10391
        }
10392
10393 14
        if ($needle === '' && \PHP_VERSION_ID < 80000) {
10394 3
            return false;
10395
        }
10396
10397 14
        if ($clean_utf8) {
10398
            // mb_strripos() && iconv_strripos() is not tolerant to invalid characters
10399 3
            $needle = self::clean($needle);
10400 3
            $haystack = self::clean($haystack);
10401
        }
10402
10403 14
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
10404 9
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
10405
        }
10406
10407
        //
10408
        // fallback via mbstrig
10409
        //
10410
10411 14
        if (self::$SUPPORT['mbstring'] === true) {
10412 14
            if ($encoding === 'UTF-8') {
10413 14
                return \mb_strripos($haystack, $needle, $offset);
10414
            }
10415
10416
            return \mb_strripos($haystack, $needle, $offset, $encoding);
10417
        }
10418
10419
        //
10420
        // fallback for binary || ascii only
10421
        //
10422
10423
        if (
10424
            $encoding === 'CP850'
10425
            ||
10426
            $encoding === 'ASCII'
10427
        ) {
10428
            return \strripos($haystack, $needle, $offset);
10429
        }
10430
10431
        if (
10432
            $encoding !== 'UTF-8'
10433
            &&
10434
            self::$SUPPORT['mbstring'] === false
10435
        ) {
10436
            /**
10437
             * @psalm-suppress ImpureFunctionCall - is is only a warning
10438
             */
10439
            \trigger_error('UTF8::strripos() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
10440
        }
10441
10442
        //
10443
        // fallback via intl
10444
        //
10445
10446
        if (
10447
            $encoding === 'UTF-8' // INFO: "grapheme_strripos()" can't handle other encodings
10448
            &&
10449
            $offset >= 0 // grapheme_strripos() can't handle negative offset
10450
            &&
10451
            self::$SUPPORT['intl'] === true
10452
        ) {
10453
            $return_tmp = \grapheme_strripos($haystack, $needle, $offset);
10454
            if ($return_tmp !== false) {
10455
                return $return_tmp;
10456
            }
10457
        }
10458
10459
        //
10460
        // fallback for ascii only
10461
        //
10462
10463
        if (ASCII::is_ascii($haystack . $needle)) {
10464
            return \strripos($haystack, $needle, $offset);
10465
        }
10466
10467
        //
10468
        // fallback via vanilla php
10469
        //
10470
10471
        $haystack = self::strtocasefold($haystack, true, false, $encoding);
10472
        $needle = self::strtocasefold($needle, true, false, $encoding);
10473
10474
        return self::strrpos($haystack, $needle, $offset, $encoding, $clean_utf8);
10475
    }
10476
10477
    /**
10478
     * Finds position of last occurrence of a string within another, case-insensitive.
10479
     *
10480
     * @param string $haystack <p>
10481
     *                         The string from which to get the position of the last occurrence
10482
     *                         of needle.
10483
     *                         </p>
10484
     * @param string $needle   <p>
10485
     *                         The string to find in haystack.
10486
     *                         </p>
10487
     * @param int    $offset   [optional] <p>
10488
     *                         The position in haystack
10489
     *                         to start searching.
10490
     *                         </p>
10491
     *
10492
     * @psalm-pure
10493
     *
10494
     * @return false|int
10495
     *                   <p>eturn the numeric position of the last occurrence of needle in the
10496
     *                   haystack string, or false if needle is not found.</p>
10497
     */
10498
    public static function strripos_in_byte(string $haystack, string $needle, int $offset = 0)
10499
    {
10500 2
        if ($haystack === '' || $needle === '') {
10501
            return false;
10502
        }
10503
10504 2
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
10505
            // "mb_" is available if overload is used, so use it ...
10506
            return \mb_strripos($haystack, $needle, $offset, 'CP850'); // 8-BIT
10507
        }
10508
10509 2
        return \strripos($haystack, $needle, $offset);
10510
    }
10511
10512
    /**
10513
     * Find the position of the last occurrence of a substring in a string.
10514
     *
10515
     * EXAMPLE: <code>UTF8::strrpos('ABC-ÖÄÜ-中文空白-中文空白', '中'); // 13</code>
10516
     *
10517
     * @see http://php.net/manual/en/function.mb-strrpos.php
10518
     *
10519
     * @param string     $haystack   <p>The string being checked, for the last occurrence of needle</p>
10520
     * @param int|string $needle     <p>The string to find in haystack.<br>Or a code point as int.</p>
10521
     * @param int        $offset     [optional] <p>May be specified to begin searching an arbitrary number of characters
10522
     *                               into the string. Negative values will stop searching at an arbitrary point prior to
10523
     *                               the end of the string.
10524
     *                               </p>
10525
     * @param string     $encoding   [optional] <p>Set the charset.</p>
10526
     * @param bool       $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
10527
     *
10528
     * @psalm-pure
10529
     *
10530
     * @return false|int
10531
     *                   <p>The <strong>(int)</strong> numeric position of the last occurrence of needle in the haystack
10532
     *                   string.<br>If needle is not found, it returns false.</p>
10533
     */
10534
    public static function strrpos(
10535
        string $haystack,
10536
        $needle,
10537
        int $offset = 0,
10538
        string $encoding = 'UTF-8',
10539
        bool $clean_utf8 = false
10540
    ) {
10541 35
        if ($haystack === '') {
10542 4
            if (\PHP_VERSION_ID >= 80000) {
10543
                if ($needle === '') {
10544
                    return 0;
10545
                }
10546
            } else {
10547 4
                return false;
10548
            }
10549
        }
10550
10551
        // iconv and mbstring do not support integer $needle
10552 34
        if ((int) $needle === $needle && $needle >= 0) {
10553 1
            $needle = (string) self::chr($needle);
10554
        }
10555 34
        $needle = (string) $needle;
10556
10557 34
        if ($haystack === '') {
10558
            if (\PHP_VERSION_ID >= 80000 && $needle === '') {
10559
                return 0;
10560
            }
10561
10562
            return false;
10563
        }
10564
10565 34
        if ($needle === '' && \PHP_VERSION_ID < 80000) {
10566 2
            return false;
10567
        }
10568
10569 34
        if ($clean_utf8) {
10570
            // mb_strrpos && iconv_strrpos is not tolerant to invalid characters
10571 4
            $needle = self::clean($needle);
10572 4
            $haystack = self::clean($haystack);
10573
        }
10574
10575 34
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
10576 8
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
10577
        }
10578
10579
        //
10580
        // fallback via mbstring
10581
        //
10582
10583 34
        if (self::$SUPPORT['mbstring'] === true) {
10584 34
            if ($encoding === 'UTF-8') {
10585 34
                return \mb_strrpos($haystack, $needle, $offset);
10586
            }
10587
10588 2
            return \mb_strrpos($haystack, $needle, $offset, $encoding);
10589
        }
10590
10591
        //
10592
        // fallback for binary || ascii only
10593
        //
10594
10595
        if (
10596
            $encoding === 'CP850'
10597
            ||
10598
            $encoding === 'ASCII'
10599
        ) {
10600
            return \strrpos($haystack, $needle, $offset);
10601
        }
10602
10603
        if (
10604
            $encoding !== 'UTF-8'
10605
            &&
10606
            self::$SUPPORT['mbstring'] === false
10607
        ) {
10608
            /**
10609
             * @psalm-suppress ImpureFunctionCall - is is only a warning
10610
             */
10611
            \trigger_error('UTF8::strrpos() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
10612
        }
10613
10614
        //
10615
        // fallback via intl
10616
        //
10617
10618
        if (
10619
            $offset >= 0 // grapheme_strrpos() can't handle negative offset
10620
            &&
10621
            $encoding === 'UTF-8' // INFO: "grapheme_strrpos()" can't handle other encodings
10622
            &&
10623
            self::$SUPPORT['intl'] === true
10624
        ) {
10625
            $return_tmp = \grapheme_strrpos($haystack, $needle, $offset);
10626
            if ($return_tmp !== false) {
10627
                return $return_tmp;
10628
            }
10629
        }
10630
10631
        //
10632
        // fallback for ascii only
10633
        //
10634
10635
        if (ASCII::is_ascii($haystack . $needle)) {
10636
            return \strrpos($haystack, $needle, $offset);
10637
        }
10638
10639
        //
10640
        // fallback via vanilla php
10641
        //
10642
10643
        $haystack_tmp = null;
10644
        if ($offset > 0) {
10645
            $haystack_tmp = self::substr($haystack, $offset);
10646
        } elseif ($offset < 0) {
10647
            $haystack_tmp = self::substr($haystack, 0, $offset);
10648
            $offset = 0;
10649
        }
10650
10651
        if ($haystack_tmp !== null) {
10652
            if ($haystack_tmp === false) {
10653
                $haystack_tmp = '';
10654
            }
10655
            $haystack = (string) $haystack_tmp;
10656
        }
10657
10658
        $pos = \strrpos($haystack, $needle);
10659
        if ($pos === false) {
10660
            return false;
10661
        }
10662
10663
        /** @var false|string $str_tmp - needed for PhpStan (stubs error) */
10664
        $str_tmp = \substr($haystack, 0, $pos);
10665
        if ($str_tmp === false) {
10666
            return false;
10667
        }
10668
10669
        return $offset + (int) self::strlen($str_tmp);
10670
    }
10671
10672
    /**
10673
     * Find the position of the last occurrence of a substring in a string.
10674
     *
10675
     * @param string $haystack <p>
10676
     *                         The string being checked, for the last occurrence
10677
     *                         of needle.
10678
     *                         </p>
10679
     * @param string $needle   <p>
10680
     *                         The string to find in haystack.
10681
     *                         </p>
10682
     * @param int    $offset   [optional] <p>May be specified to begin searching an arbitrary number of characters into
10683
     *                         the string. Negative values will stop searching at an arbitrary point
10684
     *                         prior to the end of the string.
10685
     *                         </p>
10686
     *
10687
     * @psalm-pure
10688
     *
10689
     * @return false|int
10690
     *                   <p>The numeric position of the last occurrence of needle in the
10691
     *                   haystack string. If needle is not found, it returns false.</p>
10692
     */
10693
    public static function strrpos_in_byte(string $haystack, string $needle, int $offset = 0)
10694
    {
10695 2
        if ($haystack === '' || $needle === '') {
10696
            return false;
10697
        }
10698
10699 2
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
10700
            // "mb_" is available if overload is used, so use it ...
10701
            return \mb_strrpos($haystack, $needle, $offset, 'CP850'); // 8-BIT
10702
        }
10703
10704 2
        return \strrpos($haystack, $needle, $offset);
10705
    }
10706
10707
    /**
10708
     * Finds the length of the initial segment of a string consisting entirely of characters contained within a given
10709
     * mask.
10710
     *
10711
     * EXAMPLE: <code>UTF8::strspn('iñtërnâtiônàlizætiøn', 'itñ'); // '3'</code>
10712
     *
10713
     * @param string   $str      <p>The input string.</p>
10714
     * @param string   $mask     <p>The mask of chars</p>
10715
     * @param int      $offset   [optional]
10716
     * @param int|null $length   [optional]
10717
     * @param string   $encoding [optional] <p>Set the charset.</p>
10718
     *
10719
     * @psalm-pure
10720
     *
10721
     * @return false|int
10722
     */
10723
    public static function strspn(
10724
        string $str,
10725
        string $mask,
10726
        int $offset = 0,
10727
        int $length = null,
10728
        string $encoding = 'UTF-8'
10729
    ) {
10730 10
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
10731
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
10732
        }
10733
10734 10
        if ($offset || $length !== null) {
10735 2
            if ($encoding === 'UTF-8') {
10736 2
                if ($length === null) {
10737
                    $str = (string) \mb_substr($str, $offset);
10738
                } else {
10739 2
                    $str = (string) \mb_substr($str, $offset, $length);
10740
                }
10741
            } else {
10742
                $str = (string) self::substr($str, $offset, $length, $encoding);
10743
            }
10744
        }
10745
10746 10
        if ($str === '' || $mask === '') {
10747 2
            return 0;
10748
        }
10749
10750 8
        $matches = [];
10751
10752 8
        return \preg_match('/^' . self::rxClass($mask) . '+/u', $str, $matches) ? (int) self::strlen($matches[0], $encoding) : 0;
10753
    }
10754
10755
    /**
10756
     * Returns part of haystack string from the first occurrence of needle to the end of haystack.
10757
     *
10758
     * EXAMPLE: <code>
10759
     * $str = 'iñtërnâtiônàlizætiøn';
10760
     * $search = 'nât';
10761
     *
10762
     * UTF8::strstr($str, $search)); // 'nâtiônàlizætiøn'
10763
     * UTF8::strstr($str, $search, true)); // 'iñtër'
10764
     * </code>
10765
     *
10766
     * @param string $haystack      <p>The input string. Must be valid UTF-8.</p>
10767
     * @param string $needle        <p>The string to look for. Must be valid UTF-8.</p>
10768
     * @param bool   $before_needle [optional] <p>
10769
     *                              If <b>TRUE</b>, strstr() returns the part of the
10770
     *                              haystack before the first occurrence of the needle (excluding the needle).
10771
     *                              </p>
10772
     * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
10773
     * @param bool   $clean_utf8    [optional] <p>Remove non UTF-8 chars from the string.</p>
10774
     *
10775
     * @psalm-pure
10776
     *
10777
     * @return false|string
10778
     *                      <p>A sub-string,<br>or <strong>false</strong> if needle is not found.</p>
10779
     */
10780
    public static function strstr(
10781
        string $haystack,
10782
        string $needle,
10783
        bool $before_needle = false,
10784
        string $encoding = 'UTF-8',
10785
        bool $clean_utf8 = false
10786
    ) {
10787 3
        if ($haystack === '') {
10788 2
            if (\PHP_VERSION_ID >= 80000 && $needle === '') {
10789
                return '';
10790
            }
10791
10792 2
            return false;
10793
        }
10794
10795 3
        if ($clean_utf8) {
10796
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
10797
            // if invalid characters are found in $haystack before $needle
10798
            $needle = self::clean($needle);
10799
            $haystack = self::clean($haystack);
10800
        }
10801
10802 3
        if ($needle === '') {
10803 1
            if (\PHP_VERSION_ID >= 80000) {
10804
                return $haystack;
10805
            }
10806
10807 1
            return false;
10808
        }
10809
10810 3
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
10811 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
10812
        }
10813
10814
        //
10815
        // fallback via mbstring
10816
        //
10817
10818 3
        if (self::$SUPPORT['mbstring'] === true) {
10819 3
            if ($encoding === 'UTF-8') {
10820 3
                return \mb_strstr($haystack, $needle, $before_needle);
10821
            }
10822
10823 2
            return \mb_strstr($haystack, $needle, $before_needle, $encoding);
10824
        }
10825
10826
        //
10827
        // fallback for binary || ascii only
10828
        //
10829
10830
        if (
10831
            $encoding === 'CP850'
10832
            ||
10833
            $encoding === 'ASCII'
10834
        ) {
10835
            return \strstr($haystack, $needle, $before_needle);
10836
        }
10837
10838
        if (
10839
            $encoding !== 'UTF-8'
10840
            &&
10841
            self::$SUPPORT['mbstring'] === false
10842
        ) {
10843
            /**
10844
             * @psalm-suppress ImpureFunctionCall - is is only a warning
10845
             */
10846
            \trigger_error('UTF8::strstr() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
10847
        }
10848
10849
        //
10850
        // fallback via intl
10851
        //
10852
10853
        if (
10854
            $encoding === 'UTF-8' // INFO: "grapheme_strstr()" can't handle other encodings
10855
            &&
10856
            self::$SUPPORT['intl'] === true
10857
        ) {
10858
            $return_tmp = \grapheme_strstr($haystack, $needle, $before_needle);
10859
            if ($return_tmp !== false) {
10860
                return $return_tmp;
10861
            }
10862
        }
10863
10864
        //
10865
        // fallback for ascii only
10866
        //
10867
10868
        if (ASCII::is_ascii($haystack . $needle)) {
10869
            return \strstr($haystack, $needle, $before_needle);
10870
        }
10871
10872
        //
10873
        // fallback via vanilla php
10874
        //
10875
10876
        \preg_match('/^(.*?)' . \preg_quote($needle, '/') . '/us', $haystack, $match);
10877
10878
        if (!isset($match[1])) {
10879
            return false;
10880
        }
10881
10882
        if ($before_needle) {
10883
            return $match[1];
10884
        }
10885
10886
        return self::substr($haystack, (int) self::strlen($match[1]));
10887
    }
10888
10889
    /**
10890
     * Finds first occurrence of a string within another.
10891
     *
10892
     * @param string $haystack      <p>
10893
     *                              The string from which to get the first occurrence
10894
     *                              of needle.
10895
     *                              </p>
10896
     * @param string $needle        <p>
10897
     *                              The string to find in haystack.
10898
     *                              </p>
10899
     * @param bool   $before_needle [optional] <p>
10900
     *                              Determines which portion of haystack
10901
     *                              this function returns.
10902
     *                              If set to true, it returns all of haystack
10903
     *                              from the beginning to the first occurrence of needle.
10904
     *                              If set to false, it returns all of haystack
10905
     *                              from the first occurrence of needle to the end,
10906
     *                              </p>
10907
     *
10908
     * @psalm-pure
10909
     *
10910
     * @return false|string
10911
     *                      <p>The portion of haystack,
10912
     *                      or false if needle is not found.</p>
10913
     */
10914
    public static function strstr_in_byte(
10915
        string $haystack,
10916
        string $needle,
10917
        bool $before_needle = false
10918
    ) {
10919 2
        if ($haystack === '' || $needle === '') {
10920
            return false;
10921
        }
10922
10923 2
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
10924
            // "mb_" is available if overload is used, so use it ...
10925
            return \mb_strstr($haystack, $needle, $before_needle, 'CP850'); // 8-BIT
10926
        }
10927
10928 2
        return \strstr($haystack, $needle, $before_needle);
10929
    }
10930
10931
    /**
10932
     * Unicode transformation for case-less matching.
10933
     *
10934
     * EXAMPLE: <code>UTF8::strtocasefold('ǰ◌̱'); // 'ǰ◌̱'</code>
10935
     *
10936
     * @see http://unicode.org/reports/tr21/tr21-5.html
10937
     *
10938
     * @param string      $str        <p>The input string.</p>
10939
     * @param bool        $full       [optional] <p>
10940
     *                                <b>true</b>, replace full case folding chars (default)<br>
10941
     *                                <b>false</b>, use only limited static array [UTF8::$COMMON_CASE_FOLD]
10942
     *                                </p>
10943
     * @param bool        $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
10944
     * @param string      $encoding   [optional] <p>Set the charset.</p>
10945
     * @param string|null $lang       [optional] <p>Set the language for special cases: az, el, lt, tr</p>
10946
     * @param bool        $lower      [optional] <p>Use lowercase string, otherwise use uppercase string. PS: uppercase
10947
     *                                is for some languages better ...</p>
10948
     *
10949
     * @psalm-pure
10950
     *
10951
     * @return string
10952
     */
10953
    public static function strtocasefold(
10954
        string $str,
10955
        bool $full = true,
10956
        bool $clean_utf8 = false,
10957
        string $encoding = 'UTF-8',
10958
        string $lang = null,
10959
        bool $lower = true
10960
    ): string {
10961 32
        if ($str === '') {
10962 5
            return '';
10963
        }
10964
10965 31
        if ($clean_utf8) {
10966
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
10967
            // if invalid characters are found in $haystack before $needle
10968 2
            $str = self::clean($str);
10969
        }
10970
10971 31
        $str = self::fixStrCaseHelper($str, $lower, $full);
10972
10973 31
        if ($lang === null && $encoding === 'UTF-8') {
10974 31
            if ($lower) {
10975 2
                return \mb_strtolower($str);
10976
            }
10977
10978 29
            return \mb_strtoupper($str);
10979
        }
10980
10981 2
        if ($lower) {
10982
            return self::strtolower($str, $encoding, false, $lang);
10983
        }
10984
10985 2
        return self::strtoupper($str, $encoding, false, $lang);
10986
    }
10987
10988
    /**
10989
     * Make a string lowercase.
10990
     *
10991
     * EXAMPLE: <code>UTF8::strtolower('DÉJÀ Σσς Iıİi'); // 'déjà σσς iıii'</code>
10992
     *
10993
     * @see http://php.net/manual/en/function.mb-strtolower.php
10994
     *
10995
     * @param string      $str                           <p>The string being lowercased.</p>
10996
     * @param string      $encoding                      [optional] <p>Set the charset for e.g. "mb_" function</p>
10997
     * @param bool        $clean_utf8                    [optional] <p>Remove non UTF-8 chars from the string.</p>
10998
     * @param string|null $lang                          [optional] <p>Set the language for special cases: az, el, lt,
10999
     *                                                   tr</p>
11000
     * @param bool        $try_to_keep_the_string_length [optional] <p>true === try to keep the string length: e.g. ẞ
11001
     *                                                   -> ß</p>
11002
     *
11003
     * @psalm-pure
11004
     *
11005
     * @return string
11006
     *                <p>String with all alphabetic characters converted to lowercase.</p>
11007
     */
11008
    public static function strtolower(
11009
        $str,
11010
        string $encoding = 'UTF-8',
11011
        bool $clean_utf8 = false,
11012
        string $lang = null,
11013
        bool $try_to_keep_the_string_length = false
11014
    ): string {
11015
        // init
11016 73
        $str = (string) $str;
11017
11018 73
        if ($str === '') {
11019 1
            return '';
11020
        }
11021
11022 72
        if ($clean_utf8) {
11023
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
11024
            // if invalid characters are found in $haystack before $needle
11025 2
            $str = self::clean($str);
11026
        }
11027
11028
        // hack for old php version or for the polyfill ...
11029 72
        if ($try_to_keep_the_string_length) {
11030
            $str = self::fixStrCaseHelper($str, true);
11031
        }
11032
11033 72
        if ($lang === null && $encoding === 'UTF-8') {
11034 13
            return \mb_strtolower($str);
11035
        }
11036
11037 61
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
11038
11039 61
        if ($lang !== null) {
11040 2
            if (self::$SUPPORT['intl'] === true) {
11041 2
                if (self::$INTL_TRANSLITERATOR_LIST === null) {
11042
                    self::$INTL_TRANSLITERATOR_LIST = self::getData('transliterator_list');
11043
                }
11044
11045 2
                $language_code = $lang . '-Lower';
11046 2
                if (!\in_array($language_code, self::$INTL_TRANSLITERATOR_LIST, true)) {
11047
                    /**
11048
                     * @psalm-suppress ImpureFunctionCall - is is only a warning
11049
                     */
11050
                    \trigger_error('UTF8::strtolower() cannot handle special language: ' . $lang . ' | supported: ' . \print_r(self::$INTL_TRANSLITERATOR_LIST, true), \E_USER_WARNING);
0 ignored issues
show
Bug introduced by
Are you sure print_r(self::INTL_TRANSLITERATOR_LIST, true) of type string|true can be used in concatenation? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

11050
                    \trigger_error('UTF8::strtolower() cannot handle special language: ' . $lang . ' | supported: ' . /** @scrutinizer ignore-type */ \print_r(self::$INTL_TRANSLITERATOR_LIST, true), \E_USER_WARNING);
Loading history...
11051
11052
                    $language_code = 'Any-Lower';
11053
                }
11054
11055 2
                return (string) \transliterator_transliterate($language_code, $str);
11056
            }
11057
11058
            /**
11059
             * @psalm-suppress ImpureFunctionCall - is is only a warning
11060
             */
11061
            \trigger_error('UTF8::strtolower() without intl cannot handle the "lang" parameter: ' . $lang, \E_USER_WARNING);
11062
        }
11063
11064
        // always fallback via symfony polyfill
11065 61
        return \mb_strtolower($str, $encoding);
11066
    }
11067
11068
    /**
11069
     * Make a string uppercase.
11070
     *
11071
     * EXAMPLE: <code>UTF8::strtoupper('Déjà Σσς Iıİi'); // 'DÉJÀ ΣΣΣ IIİI'</code>
11072
     *
11073
     * @see http://php.net/manual/en/function.mb-strtoupper.php
11074
     *
11075
     * @param string      $str                           <p>The string being uppercased.</p>
11076
     * @param string      $encoding                      [optional] <p>Set the charset.</p>
11077
     * @param bool        $clean_utf8                    [optional] <p>Remove non UTF-8 chars from the string.</p>
11078
     * @param string|null $lang                          [optional] <p>Set the language for special cases: az, el, lt,
11079
     *                                                   tr</p>
11080
     * @param bool        $try_to_keep_the_string_length [optional] <p>true === try to keep the string length: e.g. ẞ
11081
     *                                                   -> ß</p>
11082
     *
11083
     * @psalm-pure
11084
     *
11085
     * @return string
11086
     *                <p>String with all alphabetic characters converted to uppercase.</p>
11087
     */
11088
    public static function strtoupper(
11089
        $str,
11090
        string $encoding = 'UTF-8',
11091
        bool $clean_utf8 = false,
11092
        string $lang = null,
11093
        bool $try_to_keep_the_string_length = false
11094
    ): string {
11095
        // init
11096 17
        $str = (string) $str;
11097
11098 17
        if ($str === '') {
11099 1
            return '';
11100
        }
11101
11102 16
        if ($clean_utf8) {
11103
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
11104
            // if invalid characters are found in $haystack before $needle
11105 2
            $str = self::clean($str);
11106
        }
11107
11108
        // hack for old php version or for the polyfill ...
11109 16
        if ($try_to_keep_the_string_length) {
11110 2
            $str = self::fixStrCaseHelper($str);
11111
        }
11112
11113 16
        if ($lang === null && $encoding === 'UTF-8') {
11114 8
            return \mb_strtoupper($str);
11115
        }
11116
11117 10
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
11118
11119 10
        if ($lang !== null) {
11120 2
            if (self::$SUPPORT['intl'] === true) {
11121 2
                if (self::$INTL_TRANSLITERATOR_LIST === null) {
11122
                    self::$INTL_TRANSLITERATOR_LIST = self::getData('transliterator_list');
11123
                }
11124
11125 2
                $language_code = $lang . '-Upper';
11126 2
                if (!\in_array($language_code, self::$INTL_TRANSLITERATOR_LIST, true)) {
11127
                    /**
11128
                     * @psalm-suppress ImpureFunctionCall - is is only a warning
11129
                     */
11130
                    \trigger_error('UTF8::strtoupper() without intl for special language: ' . $lang, \E_USER_WARNING);
11131
11132
                    $language_code = 'Any-Upper';
11133
                }
11134
11135 2
                return (string) \transliterator_transliterate($language_code, $str);
11136
            }
11137
11138
            /**
11139
             * @psalm-suppress ImpureFunctionCall - is is only a warning
11140
             */
11141
            \trigger_error('UTF8::strtolower() without intl cannot handle the "lang"-parameter: ' . $lang, \E_USER_WARNING);
11142
        }
11143
11144
        // always fallback via symfony polyfill
11145 10
        return \mb_strtoupper($str, $encoding);
11146
    }
11147
11148
    /**
11149
     * Translate characters or replace sub-strings.
11150
     *
11151
     * EXAMPLE:
11152
     * <code>
11153
     * $array = [
11154
     *     'Hello'   => '○●◎',
11155
     *     '中文空白' => 'earth',
11156
     * ];
11157
     * UTF8::strtr('Hello 中文空白', $array); // '○●◎ earth'
11158
     * </code>
11159
     *
11160
     * @see http://php.net/manual/en/function.strtr.php
11161
     *
11162
     * @param string          $str  <p>The string being translated.</p>
11163
     * @param string|string[] $from <p>The string replacing from.</p>
11164
     * @param string|string[] $to   [optional] <p>The string being translated to to.</p>
11165
     *
11166
     * @psalm-pure
11167
     *
11168
     * @return string
11169
     *                <p>This function returns a copy of str, translating all occurrences of each character in "from"
11170
     *                to the corresponding character in "to".</p>
11171
     */
11172
    public static function strtr(string $str, $from, $to = ''): string
11173
    {
11174 2
        if ($str === '') {
11175
            return '';
11176
        }
11177
11178 2
        if ($from === $to) {
11179
            return $str;
11180
        }
11181
11182 2
        if ($to !== '') {
11183 2
            if (!\is_array($from)) {
11184 2
                $from = self::str_split($from);
11185
            }
11186
11187 2
            if (!\is_array($to)) {
11188 2
                $to = self::str_split($to);
11189
            }
11190
11191 2
            $count_from = \count($from);
11192 2
            $count_to = \count($to);
11193
11194 2
            if ($count_from !== $count_to) {
11195 2
                if ($count_from > $count_to) {
11196 2
                    $from = \array_slice($from, 0, $count_to);
11197 2
                } elseif ($count_from < $count_to) {
11198 2
                    $to = \array_slice($to, 0, $count_from);
11199
                }
11200
            }
11201
11202 2
            $from = \array_combine($from, $to);
11203 2
            if ($from === false) {
11204
                throw new \InvalidArgumentException('The number of elements for each array isn\'t equal or the arrays are empty: (from: ' . \print_r($from, true) . ' | to: ' . \print_r($to, true) . ')');
0 ignored issues
show
Bug introduced by
Are you sure print_r($from, true) of type string|true can be used in concatenation? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

11204
                throw new \InvalidArgumentException('The number of elements for each array isn\'t equal or the arrays are empty: (from: ' . /** @scrutinizer ignore-type */ \print_r($from, true) . ' | to: ' . \print_r($to, true) . ')');
Loading history...
Bug introduced by
Are you sure print_r($to, true) of type string|true can be used in concatenation? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

11204
                throw new \InvalidArgumentException('The number of elements for each array isn\'t equal or the arrays are empty: (from: ' . \print_r($from, true) . ' | to: ' . /** @scrutinizer ignore-type */ \print_r($to, true) . ')');
Loading history...
11205
            }
11206
        }
11207
11208 2
        if (\is_string($from)) {
11209 2
            return \str_replace($from, $to, $str);
0 ignored issues
show
Bug introduced by
It seems like $to can also be of type array<mixed,string[]>; however, parameter $replace of str_replace() does only seem to accept string|string[], maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

11209
            return \str_replace($from, /** @scrutinizer ignore-type */ $to, $str);
Loading history...
11210
        }
11211
11212 2
        return \strtr($str, $from);
11213
    }
11214
11215
    /**
11216
     * Return the width of a string.
11217
     *
11218
     * INFO: use UTF8::strlen() for the byte-length
11219
     *
11220
     * EXAMPLE: <code>UTF8::strwidth("Iñtërnâtiôn\xE9àlizætiøn")); // 21</code>
11221
     *
11222
     * @param string $str        <p>The input string.</p>
11223
     * @param string $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
11224
     * @param bool   $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
11225
     *
11226
     * @psalm-pure
11227
     *
11228
     * @return int
11229
     */
11230
    public static function strwidth(
11231
        string $str,
11232
        string $encoding = 'UTF-8',
11233
        bool $clean_utf8 = false
11234
    ): int {
11235 2
        if ($str === '') {
11236 2
            return 0;
11237
        }
11238
11239 2
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
11240 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
11241
        }
11242
11243 2
        if ($clean_utf8) {
11244
            // iconv and mbstring are not tolerant to invalid encoding
11245
            // further, their behaviour is inconsistent with that of PHP's substr
11246 2
            $str = self::clean($str);
11247
        }
11248
11249
        //
11250
        // fallback via mbstring
11251
        //
11252
11253 2
        if (self::$SUPPORT['mbstring'] === true) {
11254 2
            if ($encoding === 'UTF-8') {
11255 2
                return \mb_strwidth($str);
11256
            }
11257
11258
            return \mb_strwidth($str, $encoding);
11259
        }
11260
11261
        //
11262
        // fallback via vanilla php
11263
        //
11264
11265
        if ($encoding !== 'UTF-8') {
11266
            $str = self::encode('UTF-8', $str, false, $encoding);
11267
        }
11268
11269
        $wide = 0;
11270
        $str = (string) \preg_replace('/[\x{1100}-\x{115F}\x{2329}\x{232A}\x{2E80}-\x{303E}\x{3040}-\x{A4CF}\x{AC00}-\x{D7A3}\x{F900}-\x{FAFF}\x{FE10}-\x{FE19}\x{FE30}-\x{FE6F}\x{FF00}-\x{FF60}\x{FFE0}-\x{FFE6}\x{20000}-\x{2FFFD}\x{30000}-\x{3FFFD}]/u', '', $str, -1, $wide);
11271
11272
        return ($wide << 1) + (int) self::strlen($str);
11273
    }
11274
11275
    /**
11276
     * Get part of a string.
11277
     *
11278
     * EXAMPLE: <code>UTF8::substr('中文空白', 1, 2); // '文空'</code>
11279
     *
11280
     * @see http://php.net/manual/en/function.mb-substr.php
11281
     *
11282
     * @param string   $str        <p>The string being checked.</p>
11283
     * @param int      $offset     <p>The first position used in str.</p>
11284
     * @param int|null $length     [optional] <p>The maximum length of the returned string.</p>
11285
     * @param string   $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
11286
     * @param bool     $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
11287
     *
11288
     * @psalm-pure
11289
     *
11290
     * @return false|string
11291
     *                      The portion of <i>str</i> specified by the <i>offset</i> and
11292
     *                      <i>length</i> parameters.</p><p>If <i>str</i> is shorter than <i>offset</i>
11293
     *                      characters long, <b>FALSE</b> will be returned.
11294
     */
11295
    public static function substr(
11296
        string $str,
11297
        int $offset = 0,
11298
        int $length = null,
11299
        string $encoding = 'UTF-8',
11300
        bool $clean_utf8 = false
11301
    ) {
11302
        // empty string
11303 172
        if ($str === '' || $length === 0) {
11304 8
            return '';
11305
        }
11306
11307 168
        if ($clean_utf8) {
11308
            // iconv and mbstring are not tolerant to invalid encoding
11309
            // further, their behaviour is inconsistent with that of PHP's substr
11310 2
            $str = self::clean($str);
11311
        }
11312
11313
        // whole string
11314 168
        if (!$offset && $length === null) {
11315 7
            return $str;
11316
        }
11317
11318 163
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
11319 19
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
11320
        }
11321
11322
        //
11323
        // fallback via mbstring
11324
        //
11325
11326 163
        if (self::$SUPPORT['mbstring'] === true && $encoding === 'UTF-8') {
11327 161
            if ($length === null) {
11328 64
                return \mb_substr($str, $offset);
11329
            }
11330
11331 102
            return \mb_substr($str, $offset, $length);
11332
        }
11333
11334
        //
11335
        // fallback for binary || ascii only
11336
        //
11337
11338
        if (
11339 4
            $encoding === 'CP850'
11340
            ||
11341 4
            $encoding === 'ASCII'
11342
        ) {
11343
            if ($length === null) {
11344
                return \substr($str, $offset);
11345
            }
11346
11347
            return \substr($str, $offset, $length);
11348
        }
11349
11350
        // otherwise we need the string-length
11351 4
        $str_length = 0;
11352 4
        if ($offset || $length === null) {
11353 4
            $str_length = self::strlen($str, $encoding);
11354
        }
11355
11356
        // e.g.: invalid chars + mbstring not installed
11357 4
        if ($str_length === false) {
11358
            return false;
11359
        }
11360
11361
        // empty string
11362 4
        if ($offset === $str_length && !$length) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $length of type integer|null is loosely compared to false; this is ambiguous if the integer can be 0. You might want to explicitly use === null instead.

In PHP, under loose comparison (like ==, or !=, or switch conditions), values of different types might be equal.

For integer values, zero is a special case, in particular the following results might be unexpected:

0   == false // true
0   == null  // true
123 == false // false
123 == null  // false

// It is often better to use strict comparison
0 === false // false
0 === null  // false
Loading history...
11363
            return '';
11364
        }
11365
11366
        // impossible
11367 4
        if ($offset && $offset > $str_length) {
11368
            return '';
11369
        }
11370
11371 4
        $length = $length ?? $str_length;
11372
11373
        if (
11374 4
            $encoding !== 'UTF-8'
11375
            &&
11376 4
            self::$SUPPORT['mbstring'] === false
11377
        ) {
11378
            /**
11379
             * @psalm-suppress ImpureFunctionCall - is is only a warning
11380
             */
11381 2
            \trigger_error('UTF8::substr() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
11382
        }
11383
11384
        //
11385
        // fallback via intl
11386
        //
11387
11388
        if (
11389 4
            $encoding === 'UTF-8' // INFO: "grapheme_substr()" can't handle other encodings
11390
            &&
11391 4
            $offset >= 0 // grapheme_substr() can't handle negative offset
11392
            &&
11393 4
            self::$SUPPORT['intl'] === true
11394
        ) {
11395
            $return_tmp = \grapheme_substr($str, $offset, $length);
11396
            if ($return_tmp !== false) {
11397
                return $return_tmp;
11398
            }
11399
        }
11400
11401
        //
11402
        // fallback via iconv
11403
        //
11404
11405
        if (
11406 4
            $length >= 0 // "iconv_substr()" can't handle negative length
11407
            &&
11408 4
            self::$SUPPORT['iconv'] === true
11409
        ) {
11410
            $return_tmp = \iconv_substr($str, $offset, $length);
11411
            if ($return_tmp !== false) {
11412
                return $return_tmp;
11413
            }
11414
        }
11415
11416
        //
11417
        // fallback for ascii only
11418
        //
11419
11420 4
        if (ASCII::is_ascii($str)) {
11421
            return \substr($str, $offset, $length);
11422
        }
11423
11424
        //
11425
        // fallback via vanilla php
11426
        //
11427
11428
        // split to array, and remove invalid characters
11429
        // &&
11430
        // extract relevant part, and join to make sting again
11431 4
        return \implode('', \array_slice(self::str_split($str), $offset, $length));
11432
    }
11433
11434
    /**
11435
     * Binary-safe comparison of two strings from an offset, up to a length of characters.
11436
     *
11437
     * EXAMPLE: <code>
11438
     * UTF8::substr_compare("○●◎\r", '●◎', 0, 2); // -1
11439
     * UTF8::substr_compare("○●◎\r", '◎●', 1, 2); // 1
11440
     * UTF8::substr_compare("○●◎\r", '●◎', 1, 2); // 0
11441
     * </code>
11442
     *
11443
     * @param string   $str1               <p>The main string being compared.</p>
11444
     * @param string   $str2               <p>The secondary string being compared.</p>
11445
     * @param int      $offset             [optional] <p>The start position for the comparison. If negative, it starts
11446
     *                                     counting from the end of the string.</p>
11447
     * @param int|null $length             [optional] <p>The length of the comparison. The default value is the largest
11448
     *                                     of the length of the str compared to the length of main_str less the
11449
     *                                     offset.</p>
11450
     * @param bool     $case_insensitivity [optional] <p>If case_insensitivity is TRUE, comparison is case
11451
     *                                     insensitive.</p>
11452
     * @param string   $encoding           [optional] <p>Set the charset for e.g. "mb_" function</p>
11453
     *
11454
     * @psalm-pure
11455
     *
11456
     * @return int
11457
     *             <strong>&lt; 0</strong> if str1 is less than str2;<br>
11458
     *             <strong>&gt; 0</strong> if str1 is greater than str2,<br>
11459
     *             <strong>0</strong> if they are equal
11460
     */
11461
    public static function substr_compare(
11462
        string $str1,
11463
        string $str2,
11464
        int $offset = 0,
11465
        int $length = null,
11466
        bool $case_insensitivity = false,
11467
        string $encoding = 'UTF-8'
11468
    ): int {
11469
        if (
11470 2
            $offset !== 0
11471
            ||
11472 2
            $length !== null
11473
        ) {
11474 2
            if ($encoding === 'UTF-8') {
11475 2
                if ($length === null) {
11476 2
                    $str1 = (string) \mb_substr($str1, $offset);
11477
                } else {
11478 2
                    $str1 = (string) \mb_substr($str1, $offset, $length);
11479
                }
11480 2
                $str2 = (string) \mb_substr($str2, 0, (int) self::strlen($str1));
11481
            } else {
11482
                $encoding = self::normalize_encoding($encoding, 'UTF-8');
11483
11484
                $str1 = (string) self::substr($str1, $offset, $length, $encoding);
11485
                $str2 = (string) self::substr($str2, 0, (int) self::strlen($str1), $encoding);
11486
            }
11487
        }
11488
11489 2
        if ($case_insensitivity) {
11490 2
            return self::strcasecmp($str1, $str2, $encoding);
11491
        }
11492
11493 2
        return self::strcmp($str1, $str2);
11494
    }
11495
11496
    /**
11497
     * Count the number of substring occurrences.
11498
     *
11499
     * EXAMPLE: <code>UTF8::substr_count('中文空白', '文空', 1, 2); // 1</code>
11500
     *
11501
     * @see http://php.net/manual/en/function.substr-count.php
11502
     *
11503
     * @param string   $haystack   <p>The string to search in.</p>
11504
     * @param string   $needle     <p>The substring to search for.</p>
11505
     * @param int      $offset     [optional] <p>The offset where to start counting.</p>
11506
     * @param int|null $length     [optional] <p>
11507
     *                             The maximum length after the specified offset to search for the
11508
     *                             substring. It outputs a warning if the offset plus the length is
11509
     *                             greater than the haystack length.
11510
     *                             </p>
11511
     * @param string   $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
11512
     * @param bool     $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
11513
     *
11514
     * @psalm-pure
11515
     *
11516
     * @return false|int
11517
     *                   <p>This functions returns an integer or false if there isn't a string.</p>
11518
     */
11519
    public static function substr_count(
11520
        string $haystack,
11521
        string $needle,
11522
        int $offset = 0,
11523
        int $length = null,
11524
        string $encoding = 'UTF-8',
11525
        bool $clean_utf8 = false
11526
    ) {
11527 5
        if ($needle === '') {
11528 2
            return false;
11529
        }
11530
11531 5
        if ($haystack === '') {
11532 2
            if (\PHP_VERSION_ID >= 80000) {
11533
                return 0;
11534
            }
11535
11536 2
            return 0;
11537
        }
11538
11539 5
        if ($length === 0) {
11540 2
            return 0;
11541
        }
11542
11543 5
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
11544 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
11545
        }
11546
11547 5
        if ($clean_utf8) {
11548
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
11549
            // if invalid characters are found in $haystack before $needle
11550
            $needle = self::clean($needle);
11551
            $haystack = self::clean($haystack);
11552
        }
11553
11554 5
        if ($offset || $length > 0) {
11555 2
            if ($length === null) {
11556 2
                $length_tmp = self::strlen($haystack, $encoding);
11557 2
                if ($length_tmp === false) {
11558
                    return false;
11559
                }
11560 2
                $length = $length_tmp;
11561
            }
11562
11563 2
            if ($encoding === 'UTF-8') {
11564 2
                $haystack = (string) \mb_substr($haystack, $offset, $length);
11565
            } else {
11566 2
                $haystack = (string) \mb_substr($haystack, $offset, $length, $encoding);
11567
            }
11568
        }
11569
11570
        if (
11571 5
            $encoding !== 'UTF-8'
11572
            &&
11573 5
            self::$SUPPORT['mbstring'] === false
11574
        ) {
11575
            /**
11576
             * @psalm-suppress ImpureFunctionCall - is is only a warning
11577
             */
11578
            \trigger_error('UTF8::substr_count() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
11579
        }
11580
11581 5
        if (self::$SUPPORT['mbstring'] === true) {
11582 5
            if ($encoding === 'UTF-8') {
11583 5
                return \mb_substr_count($haystack, $needle);
11584
            }
11585
11586 2
            return \mb_substr_count($haystack, $needle, $encoding);
11587
        }
11588
11589
        \preg_match_all('/' . \preg_quote($needle, '/') . '/us', $haystack, $matches, \PREG_SET_ORDER);
11590
11591
        return \count($matches);
11592
    }
11593
11594
    /**
11595
     * Count the number of substring occurrences.
11596
     *
11597
     * @param string   $haystack <p>
11598
     *                           The string being checked.
11599
     *                           </p>
11600
     * @param string   $needle   <p>
11601
     *                           The string being found.
11602
     *                           </p>
11603
     * @param int      $offset   [optional] <p>
11604
     *                           The offset where to start counting
11605
     *                           </p>
11606
     * @param int|null $length   [optional] <p>
11607
     *                           The maximum length after the specified offset to search for the
11608
     *                           substring. It outputs a warning if the offset plus the length is
11609
     *                           greater than the haystack length.
11610
     *                           </p>
11611
     *
11612
     * @psalm-pure
11613
     *
11614
     * @return false|int
11615
     *                   <p>The number of times the
11616
     *                   needle substring occurs in the
11617
     *                   haystack string.</p>
11618
     */
11619
    public static function substr_count_in_byte(
11620
        string $haystack,
11621
        string $needle,
11622
        int $offset = 0,
11623
        int $length = null
11624
    ) {
11625 4
        if ($haystack === '' || $needle === '') {
11626 1
            return 0;
11627
        }
11628
11629
        if (
11630 3
            ($offset || $length !== null)
11631
            &&
11632 3
            self::$SUPPORT['mbstring_func_overload'] === true
11633
        ) {
11634
            if ($length === null) {
11635
                $length_tmp = self::strlen($haystack);
11636
                if ($length_tmp === false) {
11637
                    return false;
11638
                }
11639
                $length = $length_tmp;
11640
            }
11641
11642
            if (
11643
                (
11644
                    $length !== 0
11645
                    &&
11646
                    $offset !== 0
11647
                )
11648
                &&
11649
                ($length + $offset) <= 0
11650
                &&
11651
                \PHP_VERSION_ID < 71000 // output from "substr_count()" have changed in PHP 7.1
11652
            ) {
11653
                return false;
11654
            }
11655
11656
            /** @var false|string $haystack_tmp - needed for PhpStan (stubs error) */
11657
            $haystack_tmp = \substr($haystack, $offset, $length);
11658
            if ($haystack_tmp === false) {
11659
                $haystack_tmp = '';
11660
            }
11661
            $haystack = (string) $haystack_tmp;
11662
        }
11663
11664 3
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
11665
            // "mb_" is available if overload is used, so use it ...
11666
            return \mb_substr_count($haystack, $needle, 'CP850'); // 8-BIT
11667
        }
11668
11669 3
        if ($length === null) {
11670 3
            return \substr_count($haystack, $needle, $offset);
11671
        }
11672
11673
        return \substr_count($haystack, $needle, $offset, $length);
11674
    }
11675
11676
    /**
11677
     * Returns the number of occurrences of $substring in the given string.
11678
     * By default, the comparison is case-sensitive, but can be made insensitive
11679
     * by setting $case_sensitive to false.
11680
     *
11681
     * @param string $str            <p>The input string.</p>
11682
     * @param string $substring      <p>The substring to search for.</p>
11683
     * @param bool   $case_sensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
11684
     * @param string $encoding       [optional] <p>Set the charset for e.g. "mb_" function</p>
11685
     *
11686
     * @psalm-pure
11687
     *
11688
     * @return int
11689
     */
11690
    public static function substr_count_simple(
11691
        string $str,
11692
        string $substring,
11693
        bool $case_sensitive = true,
11694
        string $encoding = 'UTF-8'
11695
    ): int {
11696 15
        if ($str === '' || $substring === '') {
11697 2
            return 0;
11698
        }
11699
11700 13
        if ($encoding === 'UTF-8') {
11701 7
            if ($case_sensitive) {
11702
                return (int) \mb_substr_count($str, $substring);
11703
            }
11704
11705 7
            return (int) \mb_substr_count(
11706 7
                \mb_strtoupper($str),
11707 7
                \mb_strtoupper($substring)
11708
            );
11709
        }
11710
11711 6
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
11712
11713 6
        if ($case_sensitive) {
11714 3
            return (int) \mb_substr_count($str, $substring, $encoding);
11715
        }
11716
11717 3
        return (int) \mb_substr_count(
11718 3
            self::strtocasefold($str, true, false, $encoding, null, false),
11719 3
            self::strtocasefold($substring, true, false, $encoding, null, false),
11720 3
            $encoding
11721
        );
11722
    }
11723
11724
    /**
11725
     * Removes a prefix ($needle) from the beginning of the string ($haystack), case-insensitive.
11726
     *
11727
     * EXMAPLE: <code>
11728
     * UTF8::substr_ileft('ΚόσμεMiddleEnd', 'Κόσμε'); // 'MiddleEnd'
11729
     * UTF8::substr_ileft('ΚόσμεMiddleEnd', 'κόσμε'); // 'MiddleEnd'
11730
     * </code>
11731
     *
11732
     * @param string $haystack <p>The string to search in.</p>
11733
     * @param string $needle   <p>The substring to search for.</p>
11734
     *
11735
     * @psalm-pure
11736
     *
11737
     * @return string
11738
     *                <p>Return the sub-string.</p>
11739
     */
11740
    public static function substr_ileft(string $haystack, string $needle): string
11741
    {
11742 2
        if ($haystack === '') {
11743 2
            return '';
11744
        }
11745
11746 2
        if ($needle === '') {
11747 2
            return $haystack;
11748
        }
11749
11750 2
        if (self::str_istarts_with($haystack, $needle)) {
11751 2
            $haystack = (string) \mb_substr($haystack, (int) self::strlen($needle));
11752
        }
11753
11754 2
        return $haystack;
11755
    }
11756
11757
    /**
11758
     * Get part of a string process in bytes.
11759
     *
11760
     * @param string   $str    <p>The string being checked.</p>
11761
     * @param int      $offset <p>The first position used in str.</p>
11762
     * @param int|null $length [optional] <p>The maximum length of the returned string.</p>
11763
     *
11764
     * @psalm-pure
11765
     *
11766
     * @return false|string
11767
     *                      The portion of <i>str</i> specified by the <i>offset</i> and
11768
     *                      <i>length</i> parameters.</p><p>If <i>str</i> is shorter than <i>offset</i>
11769
     *                      characters long, <b>FALSE</b> will be returned.
11770
     */
11771
    public static function substr_in_byte(string $str, int $offset = 0, int $length = null)
11772
    {
11773
        // empty string
11774 1
        if ($str === '' || $length === 0) {
11775
            return '';
11776
        }
11777
11778
        // whole string
11779 1
        if (!$offset && $length === null) {
11780
            return $str;
11781
        }
11782
11783 1
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
11784
            // "mb_" is available if overload is used, so use it ...
11785
            return \mb_substr($str, $offset, $length, 'CP850'); // 8-BIT
11786
        }
11787
11788 1
        return \substr($str, $offset, $length ?? 2147483647);
11789
    }
11790
11791
    /**
11792
     * Removes a suffix ($needle) from the end of the string ($haystack), case-insensitive.
11793
     *
11794
     * EXAMPLE: <code>
11795
     * UTF8::substr_iright('BeginMiddleΚόσμε', 'Κόσμε'); // 'BeginMiddle'
11796
     * UTF8::substr_iright('BeginMiddleΚόσμε', 'κόσμε'); // 'BeginMiddle'
11797
     * </code>
11798
     *
11799
     * @param string $haystack <p>The string to search in.</p>
11800
     * @param string $needle   <p>The substring to search for.</p>
11801
     *
11802
     * @psalm-pure
11803
     *
11804
     * @return string
11805
     *                <p>Return the sub-string.<p>
11806
     */
11807
    public static function substr_iright(string $haystack, string $needle): string
11808
    {
11809 2
        if ($haystack === '') {
11810 2
            return '';
11811
        }
11812
11813 2
        if ($needle === '') {
11814 2
            return $haystack;
11815
        }
11816
11817 2
        if (self::str_iends_with($haystack, $needle)) {
11818 2
            $haystack = (string) \mb_substr($haystack, 0, (int) self::strlen($haystack) - (int) self::strlen($needle));
11819
        }
11820
11821 2
        return $haystack;
11822
    }
11823
11824
    /**
11825
     * Removes a prefix ($needle) from the beginning of the string ($haystack).
11826
     *
11827
     * EXAMPLE: <code>
11828
     * UTF8::substr_left('ΚόσμεMiddleEnd', 'Κόσμε'); // 'MiddleEnd'
11829
     * UTF8::substr_left('ΚόσμεMiddleEnd', 'κόσμε'); // 'ΚόσμεMiddleEnd'
11830
     * </code>
11831
     *
11832
     * @param string $haystack <p>The string to search in.</p>
11833
     * @param string $needle   <p>The substring to search for.</p>
11834
     *
11835
     * @psalm-pure
11836
     *
11837
     * @return string
11838
     *                <p>Return the sub-string.</p>
11839
     */
11840
    public static function substr_left(string $haystack, string $needle): string
11841
    {
11842 2
        if ($haystack === '') {
11843 2
            return '';
11844
        }
11845
11846 2
        if ($needle === '') {
11847 2
            return $haystack;
11848
        }
11849
11850 2
        if (self::str_starts_with($haystack, $needle)) {
11851 2
            $haystack = (string) \mb_substr($haystack, (int) self::strlen($needle));
11852
        }
11853
11854 2
        return $haystack;
11855
    }
11856
11857
    /**
11858
     * Replace text within a portion of a string.
11859
     *
11860
     * EXAMPLE: <code>UTF8::substr_replace(array('Iñtërnâtiônàlizætiøn', 'foo'), 'æ', 1); // array('Iæñtërnâtiônàlizætiøn', 'fæoo')</code>
11861
     *
11862
     * source: https://gist.github.com/stemar/8287074
11863
     *
11864
     * @param string|string[] $str         <p>The input string or an array of stings.</p>
11865
     * @param string|string[] $replacement <p>The replacement string or an array of stings.</p>
11866
     * @param int|int[]       $offset      <p>
11867
     *                                     If start is positive, the replacing will begin at the start'th offset
11868
     *                                     into string.
11869
     *                                     <br><br>
11870
     *                                     If start is negative, the replacing will begin at the start'th character
11871
     *                                     from the end of string.
11872
     *                                     </p>
11873
     * @param int|int[]|null  $length      [optional] <p>If given and is positive, it represents the length of the
11874
     *                                     portion of string which is to be replaced. If it is negative, it
11875
     *                                     represents the number of characters from the end of string at which to
11876
     *                                     stop replacing. If it is not given, then it will default to strlen(
11877
     *                                     string ); i.e. end the replacing at the end of string. Of course, if
11878
     *                                     length is zero then this function will have the effect of inserting
11879
     *                                     replacement into string at the given start offset.</p>
11880
     * @param string          $encoding    [optional] <p>Set the charset for e.g. "mb_" function</p>
11881
     *
11882
     * @psalm-pure
11883
     *
11884
     * @return string|string[]
11885
     *                         <p>The result string is returned. If string is an array then array is returned.</p>
11886
     *
11887
     * @template TSubstrReplace
11888
     * @phpstan-param TSubstrReplace $str
11889
     * @phpstan-return TSubstrReplace
11890
     */
11891
    public static function substr_replace(
11892
        $str,
11893
        $replacement,
11894
        $offset,
11895
        $length = null,
11896
        string $encoding = 'UTF-8'
11897
    ) {
11898 10
        if (\is_array($str)) {
11899 1
            $num = \count($str);
11900
11901
            // the replacement
11902 1
            if (\is_array($replacement)) {
11903 1
                $replacement = \array_slice($replacement, 0, $num);
11904
            } else {
11905 1
                $replacement = \array_pad([$replacement], $num, $replacement);
11906
            }
11907
11908
            // the offset
11909 1
            if (\is_array($offset)) {
11910 1
                $offset = \array_slice($offset, 0, $num);
11911 1
                foreach ($offset as &$value_tmp) {
11912 1
                    $value_tmp = (int) $value_tmp === $value_tmp ? $value_tmp : 0;
11913
                }
11914 1
                unset($value_tmp);
11915
            } else {
11916 1
                $offset = \array_pad([$offset], $num, $offset);
11917
            }
11918
11919
            // the length
11920 1
            if ($length === null) {
11921 1
                $length = \array_fill(0, $num, 0);
11922 1
            } elseif (\is_array($length)) {
11923 1
                $length = \array_slice($length, 0, $num);
11924 1
                foreach ($length as &$value_tmp_V2) {
11925 1
                    $value_tmp_V2 = (int) $value_tmp_V2 === $value_tmp_V2 ? $value_tmp_V2 : $num;
11926
                }
11927 1
                unset($value_tmp_V2);
11928
            } else {
11929 1
                $length = \array_pad([$length], $num, $length);
11930
            }
11931
11932
            // recursive call
11933
            /** @phpstan-ignore-next-line - phpstan currently can't handle recursive calls */
11934 1
            return \array_map([self::class, 'substr_replace'], $str, $replacement, $offset, $length);
11935
        }
11936
11937 10
        if (\is_array($replacement)) {
11938 1
            if ($replacement !== []) {
11939 1
                $replacement = $replacement[0];
11940
            } else {
11941 1
                $replacement = '';
11942
            }
11943
        }
11944
11945
        // init
11946 10
        $str = (string) $str;
11947 10
        $replacement = (string) $replacement;
11948
11949 10
        if (\is_array($length)) {
11950
            throw new \InvalidArgumentException('Parameter "$length" can only be an array, if "$str" is also an array.');
11951
        }
11952
11953 10
        if (\is_array($offset)) {
11954
            throw new \InvalidArgumentException('Parameter "$offset" can only be an array, if "$str" is also an array.');
11955
        }
11956
11957 10
        if ($str === '') {
11958 1
            return $replacement;
11959
        }
11960
11961 9
        if (self::$SUPPORT['mbstring'] === true) {
11962 9
            $string_length = (int) self::strlen($str, $encoding);
11963
11964 9
            if ($offset < 0) {
11965 1
                $offset = (int) \max(0, $string_length + $offset);
11966 9
            } elseif ($offset > $string_length) {
11967 1
                $offset = $string_length;
11968
            }
11969
11970 9
            if ($length !== null && $length < 0) {
11971 1
                $length = (int) \max(0, $string_length - $offset + $length);
11972 9
            } elseif ($length === null || $length > $string_length) {
11973 4
                $length = $string_length;
11974
            }
11975
11976 9
            if (($offset + $length) > $string_length) {
11977 4
                $length = $string_length - $offset;
11978
            }
11979
11980 9
            return ((string) \mb_substr($str, 0, $offset, $encoding)) .
11981 9
                   $replacement .
11982 9
                   ((string) \mb_substr($str, $offset + $length, $string_length - $offset - $length, $encoding));
11983
        }
11984
11985
        //
11986
        // fallback for ascii only
11987
        //
11988
11989
        if (ASCII::is_ascii($str)) {
11990
            return ($length === null) ?
11991
                \substr_replace($str, $replacement, $offset) :
11992
                \substr_replace($str, $replacement, $offset, $length);
11993
        }
11994
11995
        //
11996
        // fallback via vanilla php
11997
        //
11998
11999
        \preg_match_all('/./us', $str, $str_matches);
12000
        \preg_match_all('/./us', $replacement, $replacement_matches);
12001
12002
        if ($length === null) {
12003
            $length_tmp = self::strlen($str, $encoding);
12004
            if ($length_tmp === false) {
12005
                // e.g.: non mbstring support + invalid chars
12006
                return '';
12007
            }
12008
            $length = $length_tmp;
12009
        }
12010
12011
        \array_splice($str_matches[0], $offset, $length, $replacement_matches[0]);
12012
12013
        return \implode('', $str_matches[0]);
12014
    }
12015
12016
    /**
12017
     * Removes a suffix ($needle) from the end of the string ($haystack).
12018
     *
12019
     * EXAMPLE: <code>
12020
     * UTF8::substr_right('BeginMiddleΚόσμε', 'Κόσμε'); // 'BeginMiddle'
12021
     * UTF8::substr_right('BeginMiddleΚόσμε', 'κόσμε'); // 'BeginMiddleΚόσμε'
12022
     * </code>
12023
     *
12024
     * @param string $haystack <p>The string to search in.</p>
12025
     * @param string $needle   <p>The substring to search for.</p>
12026
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
12027
     *
12028
     * @psalm-pure
12029
     *
12030
     * @return string
12031
     *                <p>Return the sub-string.</p>
12032
     */
12033
    public static function substr_right(
12034
        string $haystack,
12035
        string $needle,
12036
        string $encoding = 'UTF-8'
12037
    ): string {
12038 2
        if ($haystack === '') {
12039 2
            return '';
12040
        }
12041
12042 2
        if ($needle === '') {
12043 2
            return $haystack;
12044
        }
12045
12046
        if (
12047 2
            $encoding === 'UTF-8'
12048
            &&
12049 2
            \substr($haystack, -\strlen($needle)) === $needle
12050
        ) {
12051 2
            return (string) \mb_substr($haystack, 0, (int) \mb_strlen($haystack) - (int) \mb_strlen($needle));
12052
        }
12053
12054 2
        if (\substr($haystack, -\strlen($needle)) === $needle) {
12055
            return (string) self::substr(
12056
                $haystack,
12057
                0,
12058
                (int) self::strlen($haystack, $encoding) - (int) self::strlen($needle, $encoding),
12059
                $encoding
12060
            );
12061
        }
12062
12063 2
        return $haystack;
12064
    }
12065
12066
    /**
12067
     * Returns a case swapped version of the string.
12068
     *
12069
     * EXAMPLE: <code>UTF8::swapCase('déJÀ σσς iıII'); // 'DÉjà ΣΣΣ IIii'</code>
12070
     *
12071
     * @param string $str        <p>The input string.</p>
12072
     * @param string $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
12073
     * @param bool   $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
12074
     *
12075
     * @psalm-pure
12076
     *
12077
     * @return string
12078
     *                <p>Each character's case swapped.</p>
12079
     */
12080
    public static function swapCase(string $str, string $encoding = 'UTF-8', bool $clean_utf8 = false): string
12081
    {
12082 6
        if ($str === '') {
12083 1
            return '';
12084
        }
12085
12086 6
        if ($clean_utf8) {
12087
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
12088
            // if invalid characters are found in $haystack before $needle
12089 2
            $str = self::clean($str);
12090
        }
12091
12092 6
        if ($encoding === 'UTF-8') {
12093 4
            return (string) (\mb_strtolower($str) ^ \mb_strtoupper($str) ^ $str);
12094
        }
12095
12096 4
        return (string) (self::strtolower($str, $encoding) ^ self::strtoupper($str, $encoding) ^ $str);
12097
    }
12098
12099
    /**
12100
     * Checks whether symfony-polyfills are used.
12101
     *
12102
     * @psalm-pure
12103
     *
12104
     * @return bool
12105
     *              <p><strong>true</strong> if in use, <strong>false</strong> otherwise</p>
12106
     *
12107
     * @internal <p>Please do not use it anymore, we will make is private in next major version.</p>
12108
     */
12109
    public static function symfony_polyfill_used(): bool
12110
    {
12111
        // init
12112
        $return = false;
12113
12114
        $return_tmp = \extension_loaded('mbstring');
12115
        if (!$return_tmp && \function_exists('mb_strlen')) {
12116
            $return = true;
12117
        }
12118
12119
        $return_tmp = \extension_loaded('iconv');
12120
        if (!$return_tmp && \function_exists('iconv')) {
12121
            $return = true;
12122
        }
12123
12124
        return $return;
12125
    }
12126
12127
    /**
12128
     * @param string $str
12129
     * @param int    $tab_length
12130
     *
12131
     * @psalm-pure
12132
     *
12133
     * @return string
12134
     */
12135
    public static function tabs_to_spaces(string $str, int $tab_length = 4): string
12136
    {
12137 6
        if ($tab_length === 4) {
12138 3
            $spaces = '    ';
12139 3
        } elseif ($tab_length === 2) {
12140 1
            $spaces = '  ';
12141
        } else {
12142 2
            $spaces = \str_repeat(' ', $tab_length);
12143
        }
12144
12145 6
        return \str_replace("\t", $spaces, $str);
12146
    }
12147
12148
    /**
12149
     * Converts the first character of each word in the string to uppercase
12150
     * and all other chars to lowercase.
12151
     *
12152
     * @param string      $str                           <p>The input string.</p>
12153
     * @param string      $encoding                      [optional] <p>Set the charset for e.g. "mb_" function</p>
12154
     * @param bool        $clean_utf8                    [optional] <p>Remove non UTF-8 chars from the string.</p>
12155
     * @param string|null $lang                          [optional] <p>Set the language for special cases: az, el, lt,
12156
     *                                                   tr</p>
12157
     * @param bool        $try_to_keep_the_string_length [optional] <p>true === try to keep the string length: e.g. ẞ
12158
     *                                                   -> ß</p>
12159
     *
12160
     * @psalm-pure
12161
     *
12162
     * @return string
12163
     *                <p>A string with all characters of $str being title-cased.</p>
12164
     */
12165
    public static function titlecase(
12166
        string $str,
12167
        string $encoding = 'UTF-8',
12168
        bool $clean_utf8 = false,
12169
        string $lang = null,
12170
        bool $try_to_keep_the_string_length = false
12171
    ): string {
12172 5
        if ($clean_utf8) {
12173
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
12174
            // if invalid characters are found in $haystack before $needle
12175
            $str = self::clean($str);
12176
        }
12177
12178
        if (
12179 5
            $lang === null
12180
            &&
12181 5
            !$try_to_keep_the_string_length
12182
        ) {
12183 5
            if ($encoding === 'UTF-8') {
12184 3
                return \mb_convert_case($str, \MB_CASE_TITLE);
12185
            }
12186
12187 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
12188
12189 2
            return \mb_convert_case($str, \MB_CASE_TITLE, $encoding);
12190
        }
12191
12192
        return self::str_titleize(
12193
            $str,
12194
            null,
12195
            $encoding,
12196
            false,
12197
            $lang,
12198
            $try_to_keep_the_string_length,
12199
            false
12200
        );
12201
    }
12202
12203
    /**
12204
     * Convert a string into ASCII.
12205
     *
12206
     * EXAMPLE: <code>UTF8::to_ascii('déjà σσς iıii'); // 'deja sss iiii'</code>
12207
     *
12208
     * @param string $str     <p>The input string.</p>
12209
     * @param string $unknown [optional] <p>Character use if character unknown. (default is ?)</p>
12210
     * @param bool   $strict  [optional] <p>Use "transliterator_transliterate()" from PHP-Intl | WARNING: bad
12211
     *                        performance</p>
12212
     *
12213
     * @psalm-pure
12214
     *
12215
     * @return string
12216
     */
12217
    public static function to_ascii(
12218
        string $str,
12219
        string $unknown = '?',
12220
        bool $strict = false
12221
    ): string {
12222 37
        return ASCII::to_transliterate($str, $unknown, $strict);
12223
    }
12224
12225
    /**
12226
     * @param bool|float|int|string $str
12227
     *
12228
     * @psalm-pure
12229
     *
12230
     * @return bool
12231
     */
12232
    public static function to_boolean($str): bool
12233
    {
12234
        // init
12235 25
        $str = (string) $str;
12236
12237 25
        if ($str === '') {
12238 2
            return false;
12239
        }
12240
12241
        // Info: http://php.net/manual/en/filter.filters.validate.php
12242
        $map = [
12243 23
            'true'  => true,
12244
            '1'     => true,
12245
            'on'    => true,
12246
            'yes'   => true,
12247
            'false' => false,
12248
            '0'     => false,
12249
            'off'   => false,
12250
            'no'    => false,
12251
        ];
12252
12253 23
        if (isset($map[$str])) {
12254 13
            return $map[$str];
12255
        }
12256
12257 10
        $key = \strtolower($str);
12258 10
        if (isset($map[$key])) {
12259 2
            return $map[$key];
12260
        }
12261
12262 8
        if (\is_numeric($str)) {
12263 6
            return ((float) $str) > 0;
12264
        }
12265
12266 2
        return (bool) \trim($str);
12267
    }
12268
12269
    /**
12270
     * Convert given string to safe filename (and keep string case).
12271
     *
12272
     * @param string $str
12273
     * @param bool   $use_transliterate No transliteration, conversion etc. is done by default - unsafe characters are
12274
     *                                  simply replaced with hyphen.
12275
     * @param string $fallback_char
12276
     *
12277
     * @psalm-pure
12278
     *
12279
     * @return string
12280
     */
12281
    public static function to_filename(
12282
        string $str,
12283
        bool $use_transliterate = false,
12284
        string $fallback_char = '-'
12285
    ): string {
12286 1
        return ASCII::to_filename(
12287 1
            $str,
12288 1
            $use_transliterate,
12289 1
            $fallback_char
12290
        );
12291
    }
12292
12293
    /**
12294
     * Convert a string into "ISO-8859"-encoding (Latin-1).
12295
     *
12296
     * EXAMPLE: <code>UTF8::to_utf8(UTF8::to_iso8859('  -ABC-中文空白-  ')); // '  -ABC-????-  '</code>
12297
     *
12298
     * @param string|string[] $str
12299
     *
12300
     * @psalm-pure
12301
     *
12302
     * @return string|string[]
12303
     *
12304
     * @template TToIso8859
12305
     * @phpstan-param TToIso8859 $str
12306
     * @phpstan-return TToIso8859
12307
     */
12308
    public static function to_iso8859($str)
12309
    {
12310 8
        if (\is_array($str)) {
12311 2
            foreach ($str as &$v) {
12312 2
                $v = self::to_iso8859($v);
12313
            }
12314
12315 2
            return $str;
12316
        }
12317
12318 8
        $str = (string) $str;
12319 8
        if ($str === '') {
12320 2
            return '';
12321
        }
12322
12323 8
        return self::utf8_decode($str);
12324
    }
12325
12326
    /**
12327
     * This function leaves UTF-8 characters alone, while converting almost all non-UTF8 to UTF8.
12328
     *
12329
     * <ul>
12330
     * <li>It decode UTF-8 codepoints and Unicode escape sequences.</li>
12331
     * <li>It assumes that the encoding of the original string is either WINDOWS-1252 or ISO-8859.</li>
12332
     * <li>WARNING: It does not remove invalid UTF-8 characters, so you maybe need to use "UTF8::clean()" for this
12333
     * case.</li>
12334
     * </ul>
12335
     *
12336
     * EXAMPLE: <code>UTF8::to_utf8(["\u0063\u0061\u0074"]); // array('cat')</code>
12337
     *
12338
     * @param string|string[] $str                        <p>Any string or array of strings.</p>
12339
     * @param bool            $decode_html_entity_to_utf8 <p>Set to true, if you need to decode html-entities.</p>
12340
     *
12341
     * @psalm-pure
12342
     *
12343
     * @return string|string[]
12344
     *                         <p>The UTF-8 encoded string</p>
12345
     *
12346
     * @template TToUtf8
12347
     * @phpstan-param TToUtf8 $str
12348
     * @phpstan-return TToUtf8
12349
     */
12350
    public static function to_utf8($str, bool $decode_html_entity_to_utf8 = false)
12351
    {
12352 42
        if (\is_array($str)) {
12353 4
            foreach ($str as &$v) {
12354 4
                $v = self::to_utf8_string($v, $decode_html_entity_to_utf8);
12355
            }
12356
12357
            /** @phpstan-var TToUtf8 $str */
12358 4
            return $str;
12359
        }
12360
12361
        /** @phpstan-var TToUtf8 $str */
12362 42
        $str = self::to_utf8_string($str, $decode_html_entity_to_utf8);
12363
12364 42
        return $str;
12365
    }
12366
12367
    /**
12368
     * This function leaves UTF-8 characters alone, while converting almost all non-UTF8 to UTF8.
12369
     *
12370
     * <ul>
12371
     * <li>It decode UTF-8 codepoints and Unicode escape sequences.</li>
12372
     * <li>It assumes that the encoding of the original string is either WINDOWS-1252 or ISO-8859.</li>
12373
     * <li>WARNING: It does not remove invalid UTF-8 characters, so you maybe need to use "UTF8::clean()" for this
12374
     * case.</li>
12375
     * </ul>
12376
     *
12377
     * EXAMPLE: <code>UTF8::to_utf8_string("\u0063\u0061\u0074"); // 'cat'</code>
12378
     *
12379
     * @param string $str                        <p>Any string.</p>
12380
     * @param bool   $decode_html_entity_to_utf8 <p>Set to true, if you need to decode html-entities.</p>
12381
     *
12382
     * @psalm-pure
12383
     *
12384
     * @return string
12385
     *                <p>The UTF-8 encoded string</p>
12386
     */
12387
    public static function to_utf8_string(string $str, bool $decode_html_entity_to_utf8 = false): string
12388
    {
12389 42
        if ($str === '') {
12390 7
            return $str;
12391
        }
12392
12393 42
        $max = \strlen($str);
12394 42
        $buf = '';
12395
12396 42
        for ($i = 0; $i < $max; ++$i) {
12397 42
            $c1 = $str[$i];
12398
12399 42
            if ($c1 >= "\xC0") { // should be converted to UTF8, if it's not UTF8 already
12400
12401 38
                if ($c1 <= "\xDF") { // looks like 2 bytes UTF8
12402
12403 35
                    $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
12404
12405 35
                    if ($c2 >= "\x80" && $c2 <= "\xBF") { // yeah, almost sure it's UTF8 already
12406 21
                        $buf .= $c1 . $c2;
12407 21
                        ++$i;
12408
                    } else { // not valid UTF8 - convert it
12409 35
                        $buf .= self::to_utf8_convert_helper($c1);
12410
                    }
12411 35
                } elseif ($c1 >= "\xE0" && $c1 <= "\xEF") { // looks like 3 bytes UTF8
12412
12413 34
                    $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
12414 34
                    $c3 = $i + 2 >= $max ? "\x00" : $str[$i + 2];
12415
12416 34
                    if ($c2 >= "\x80" && $c2 <= "\xBF" && $c3 >= "\x80" && $c3 <= "\xBF") { // yeah, almost sure it's UTF8 already
12417 17
                        $buf .= $c1 . $c2 . $c3;
12418 17
                        $i += 2;
12419
                    } else { // not valid UTF8 - convert it
12420 34
                        $buf .= self::to_utf8_convert_helper($c1);
12421
                    }
12422 28
                } elseif ($c1 >= "\xF0" && $c1 <= "\xF7") { // looks like 4 bytes UTF8
12423
12424 28
                    $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
12425 28
                    $c3 = $i + 2 >= $max ? "\x00" : $str[$i + 2];
12426 28
                    $c4 = $i + 3 >= $max ? "\x00" : $str[$i + 3];
12427
12428 28
                    if ($c2 >= "\x80" && $c2 <= "\xBF" && $c3 >= "\x80" && $c3 <= "\xBF" && $c4 >= "\x80" && $c4 <= "\xBF") { // yeah, almost sure it's UTF8 already
12429 10
                        $buf .= $c1 . $c2 . $c3 . $c4;
12430 10
                        $i += 3;
12431
                    } else { // not valid UTF8 - convert it
12432 28
                        $buf .= self::to_utf8_convert_helper($c1);
12433
                    }
12434
                } else { // doesn't look like UTF8, but should be converted
12435
12436 38
                    $buf .= self::to_utf8_convert_helper($c1);
12437
                }
12438 40
            } elseif (($c1 & "\xC0") === "\x80") { // needs conversion
12439
12440 4
                $buf .= self::to_utf8_convert_helper($c1);
12441
            } else { // it doesn't need conversion
12442
12443 40
                $buf .= $c1;
12444
            }
12445
        }
12446
12447
        // decode unicode escape sequences + unicode surrogate pairs
12448 42
        $buf = \preg_replace_callback(
12449 42
            '/\\\\u([dD][89abAB][0-9a-fA-F]{2})\\\\u([dD][cdefCDEF][\da-fA-F]{2})|\\\\u([0-9a-fA-F]{4})/',
12450
            /**
12451
             * @param array $matches
12452
             *
12453
             * @psalm-pure
12454
             *
12455
             * @return string
12456
             */
12457
            static function (array $matches): string {
12458 13
                if (isset($matches[3])) {
12459 13
                    $cp = (int) \hexdec($matches[3]);
12460
                } else {
12461
                    // http://unicode.org/faq/utf_bom.html#utf16-4
12462 1
                    $cp = ((int) \hexdec($matches[1]) << 10)
12463 1
                          + (int) \hexdec($matches[2])
12464 1
                          + 0x10000
12465 1
                          - (0xD800 << 10)
12466 1
                          - 0xDC00;
12467
                }
12468
12469
                // https://github.com/php/php-src/blob/php-7.3.2/ext/standard/html.c#L471
12470
                //
12471
                // php_utf32_utf8(unsigned char *buf, unsigned k)
12472
12473 13
                if ($cp < 0x80) {
12474 8
                    return (string) self::chr($cp);
12475
                }
12476
12477 10
                if ($cp < 0xA0) {
12478
                    /** @noinspection UnnecessaryCastingInspection */
12479
                    return (string) self::chr(0xC0 | $cp >> 6) . (string) self::chr(0x80 | $cp & 0x3F);
12480
                }
12481
12482 10
                return self::decimal_to_chr($cp);
12483 42
            },
12484 42
            $buf
12485
        );
12486
12487 42
        if ($buf === null) {
12488
            return '';
12489
        }
12490
12491
        // decode UTF-8 codepoints
12492 42
        if ($decode_html_entity_to_utf8) {
12493 3
            $buf = self::html_entity_decode($buf);
12494
        }
12495
12496 42
        return $buf;
12497
    }
12498
12499
    /**
12500
     * Returns the given string as an integer, or null if the string isn't numeric.
12501
     *
12502
     * @param string $str
12503
     *
12504
     * @psalm-pure
12505
     *
12506
     * @return int|null
12507
     *                  <p>null if the string isn't numeric</p>
12508
     */
12509
    public static function to_int(string $str)
12510
    {
12511 1
        if (\is_numeric($str)) {
12512 1
            return (int) $str;
12513
        }
12514
12515 1
        return null;
12516
    }
12517
12518
    /**
12519
     * Returns the given input as string, or null if the input isn't int|float|string
12520
     * and do not implement the "__toString()" method.
12521
     *
12522
     * @param float|int|object|string|null $input
12523
     *
12524
     * @psalm-pure
12525
     *
12526
     * @return string|null
12527
     *                     <p>null if the input isn't int|float|string and has no "__toString()" method</p>
12528
     */
12529
    public static function to_string($input)
12530
    {
12531 1
        if ($input === null) {
12532
            return null;
12533
        }
12534
12535
        /** @var string $input_type - hack for psalm */
12536 1
        $input_type = \gettype($input);
12537
12538
        if (
12539 1
            $input_type === 'string'
12540
            ||
12541 1
            $input_type === 'integer'
12542
            ||
12543 1
            $input_type === 'float'
12544
            ||
12545 1
            $input_type === 'double'
12546
        ) {
12547 1
            return (string) $input;
12548
        }
12549
12550
        /** @phpstan-ignore-next-line - "gettype": FP? */
12551 1
        if ($input_type === 'object' && \method_exists($input, '__toString')) {
12552 1
            return (string) $input;
12553
        }
12554
12555 1
        return null;
12556
    }
12557
12558
    /**
12559
     * Strip whitespace or other characters from the beginning and end of a UTF-8 string.
12560
     *
12561
     * INFO: This is slower then "trim()"
12562
     *
12563
     * We can only use the original-function, if we use <= 7-Bit in the string / chars
12564
     * but the check for ASCII (7-Bit) cost more time, then we can safe here.
12565
     *
12566
     * EXAMPLE: <code>UTF8::trim('   -ABC-中文空白-  '); // '-ABC-中文空白-'</code>
12567
     *
12568
     * @param string      $str   <p>The string to be trimmed</p>
12569
     * @param string|null $chars [optional] <p>Optional characters to be stripped</p>
12570
     *
12571
     * @psalm-pure
12572
     *
12573
     * @return string
12574
     *                <p>The trimmed string.</p>
12575
     */
12576
    public static function trim(string $str = '', string $chars = null): string
12577
    {
12578 57
        if ($str === '') {
12579 9
            return '';
12580
        }
12581
12582 50
        if (self::$SUPPORT['mbstring'] === true) {
12583 50
            if ($chars !== null) {
12584
                /** @noinspection PregQuoteUsageInspection */
12585 28
                $chars = \preg_quote($chars);
12586 28
                $pattern = "^[${chars}]+|[${chars}]+\$";
12587
            } else {
12588 22
                $pattern = '^[\\s]+|[\\s]+$';
12589
            }
12590
12591 50
            return (string) \mb_ereg_replace($pattern, '', $str);
12592
        }
12593
12594 8
        if ($chars !== null) {
12595
            $chars = \preg_quote($chars, '/');
12596
            $pattern = "^[${chars}]+|[${chars}]+\$";
12597
        } else {
12598 8
            $pattern = '^[\\s]+|[\\s]+$';
12599
        }
12600
12601 8
        return self::regex_replace($str, $pattern, '');
12602
    }
12603
12604
    /**
12605
     * Makes string's first char uppercase.
12606
     *
12607
     * EXAMPLE: <code>UTF8::ucfirst('ñtërnâtiônàlizætiøn foo'); // 'Ñtërnâtiônàlizætiøn foo'</code>
12608
     *
12609
     * @param string      $str                           <p>The input string.</p>
12610
     * @param string      $encoding                      [optional] <p>Set the charset for e.g. "mb_" function</p>
12611
     * @param bool        $clean_utf8                    [optional] <p>Remove non UTF-8 chars from the string.</p>
12612
     * @param string|null $lang                          [optional] <p>Set the language for special cases: az, el, lt,
12613
     *                                                   tr</p>
12614
     * @param bool        $try_to_keep_the_string_length [optional] <p>true === try to keep the string length: e.g. ẞ
12615
     *                                                   -> ß</p>
12616
     *
12617
     * @psalm-pure
12618
     *
12619
     * @return string
12620
     *                <p>The resulting string with with char uppercase.</p>
12621
     */
12622
    public static function ucfirst(
12623
        string $str,
12624
        string $encoding = 'UTF-8',
12625
        bool $clean_utf8 = false,
12626
        string $lang = null,
12627
        bool $try_to_keep_the_string_length = false
12628
    ): string {
12629 69
        if ($str === '') {
12630 3
            return '';
12631
        }
12632
12633 68
        if ($clean_utf8) {
12634
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
12635
            // if invalid characters are found in $haystack before $needle
12636 1
            $str = self::clean($str);
12637
        }
12638
12639 68
        $use_mb_functions = $lang === null && !$try_to_keep_the_string_length;
12640
12641 68
        if ($encoding === 'UTF-8') {
12642 22
            $str_part_two = (string) \mb_substr($str, 1);
12643
12644 22
            if ($use_mb_functions) {
12645 22
                $str_part_one = \mb_strtoupper(
12646 22
                    (string) \mb_substr($str, 0, 1)
12647
                );
12648
            } else {
12649
                $str_part_one = self::strtoupper(
12650
                    (string) \mb_substr($str, 0, 1),
12651
                    $encoding,
12652
                    false,
12653
                    $lang,
12654 22
                    $try_to_keep_the_string_length
12655
                );
12656
            }
12657
        } else {
12658 47
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
12659
12660 47
            $str_part_two = (string) self::substr($str, 1, null, $encoding);
12661
12662 47
            if ($use_mb_functions) {
12663 47
                $str_part_one = \mb_strtoupper(
12664 47
                    (string) \mb_substr($str, 0, 1, $encoding),
12665 47
                    $encoding
12666
                );
12667
            } else {
12668
                $str_part_one = self::strtoupper(
12669
                    (string) self::substr($str, 0, 1, $encoding),
12670
                    $encoding,
12671
                    false,
12672
                    $lang,
12673
                    $try_to_keep_the_string_length
12674
                );
12675
            }
12676
        }
12677
12678 68
        return $str_part_one . $str_part_two;
12679
    }
12680
12681
    /**
12682
     * Uppercase for all words in the string.
12683
     *
12684
     * EXAMPLE: <code>UTF8::ucwords('iñt ërn âTi ônà liz æti øn'); // 'Iñt Ërn ÂTi Ônà Liz Æti Øn'</code>
12685
     *
12686
     * @param string   $str        <p>The input string.</p>
12687
     * @param string[] $exceptions [optional] <p>Exclusion for some words.</p>
12688
     * @param string   $char_list  [optional] <p>Additional chars that contains to words and do not start a new
12689
     *                             word.</p>
12690
     * @param string   $encoding   [optional] <p>Set the charset.</p>
12691
     * @param bool     $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
12692
     *
12693
     * @psalm-pure
12694
     *
12695
     * @return string
12696
     */
12697
    public static function ucwords(
12698
        string $str,
12699
        array $exceptions = [],
12700
        string $char_list = '',
12701
        string $encoding = 'UTF-8',
12702
        bool $clean_utf8 = false
12703
    ): string {
12704 9
        if (!$str) {
12705 2
            return '';
12706
        }
12707
12708
        // INFO: mb_convert_case($str, MB_CASE_TITLE);
12709
        // -> MB_CASE_TITLE didn't only uppercase the first letter, it also lowercase all other letters
12710
12711 8
        if ($clean_utf8) {
12712
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
12713
            // if invalid characters are found in $haystack before $needle
12714 1
            $str = self::clean($str);
12715
        }
12716
12717 8
        $use_php_default_functions = !(bool) ($char_list . \implode('', $exceptions));
12718
12719
        if (
12720 8
            $use_php_default_functions
12721
            &&
12722 8
            ASCII::is_ascii($str)
12723
        ) {
12724
            return \ucwords($str);
12725
        }
12726
12727 8
        $words = self::str_to_words($str, $char_list);
12728 8
        $use_exceptions = $exceptions !== [];
12729
12730 8
        $words_str = '';
12731 8
        foreach ($words as &$word) {
12732 8
            if (!$word) {
12733 8
                continue;
12734
            }
12735
12736
            if (
12737 8
                !$use_exceptions
12738
                ||
12739 8
                !\in_array($word, $exceptions, true)
12740
            ) {
12741 8
                $words_str .= self::ucfirst($word, $encoding);
12742
            } else {
12743 8
                $words_str .= $word;
12744
            }
12745
        }
12746
12747 8
        return $words_str;
12748
    }
12749
12750
    /**
12751
     * Multi decode HTML entity + fix urlencoded-win1252-chars.
12752
     *
12753
     * EXAMPLE: <code>UTF8::urldecode('tes%20öäü%20\u00edtest+test'); // 'tes öäü ítest test'</code>
12754
     *
12755
     * e.g:
12756
     * 'test+test'                     => 'test test'
12757
     * 'D&#252;sseldorf'               => 'Düsseldorf'
12758
     * 'D%FCsseldorf'                  => 'Düsseldorf'
12759
     * 'D&#xFC;sseldorf'               => 'Düsseldorf'
12760
     * 'D%26%23xFC%3Bsseldorf'         => 'Düsseldorf'
12761
     * 'Düsseldorf'                   => 'Düsseldorf'
12762
     * 'D%C3%BCsseldorf'               => 'Düsseldorf'
12763
     * 'D%C3%83%C2%BCsseldorf'         => 'Düsseldorf'
12764
     * 'D%25C3%2583%25C2%25BCsseldorf' => 'Düsseldorf'
12765
     *
12766
     * @param string $str          <p>The input string.</p>
12767
     * @param bool   $multi_decode <p>Decode as often as possible.</p>
12768
     *
12769
     * @psalm-pure
12770
     *
12771
     * @return string
12772
     */
12773
    public static function urldecode(string $str, bool $multi_decode = true): string
12774
    {
12775 4
        if ($str === '') {
12776 3
            return '';
12777
        }
12778
12779 4
        $str = self::urldecode_unicode_helper($str);
12780
12781 4
        if ($multi_decode) {
12782
            do {
12783 3
                $str_compare = $str;
12784
12785
                /**
12786
                 * @psalm-suppress PossiblyInvalidArgument
12787
                 */
12788 3
                $str = \urldecode(
12789 3
                    self::html_entity_decode(
12790 3
                        self::to_utf8($str),
12791 3
                        \ENT_QUOTES | \ENT_HTML5
12792
                    )
12793
                );
12794 3
            } while ($str_compare !== $str);
12795
        } else {
12796
            /**
12797
             * @psalm-suppress PossiblyInvalidArgument
12798
             */
12799 1
            $str = \urldecode(
12800 1
                self::html_entity_decode(
12801 1
                    self::to_utf8($str),
12802 1
                    \ENT_QUOTES | \ENT_HTML5
12803
                )
12804
            );
12805
        }
12806
12807 4
        return self::fix_simple_utf8($str);
12808
    }
12809
12810
    /**
12811
     * Decodes a UTF-8 string to ISO-8859-1.
12812
     *
12813
     * EXAMPLE: <code>UTF8::encode('UTF-8', UTF8::utf8_decode('-ABC-中文空白-')); // '-ABC-????-'</code>
12814
     *
12815
     * @param string $str             <p>The input string.</p>
12816
     * @param bool   $keep_utf8_chars
12817
     *
12818
     * @psalm-pure
12819
     *
12820
     * @return string
12821
     */
12822
    public static function utf8_decode(string $str, bool $keep_utf8_chars = false): string
12823
    {
12824 14
        if ($str === '') {
12825 6
            return '';
12826
        }
12827
12828
        // save for later comparision
12829 14
        $str_backup = $str;
12830 14
        $len = \strlen($str);
12831
12832 14
        if (self::$ORD === null) {
12833
            self::$ORD = self::getData('ord');
12834
        }
12835
12836 14
        if (self::$CHR === null) {
12837
            self::$CHR = self::getData('chr');
12838
        }
12839
12840 14
        $no_char_found = '?';
12841 14
        for ($i = 0, $j = 0; $i < $len; ++$i, ++$j) {
12842 14
            switch ($str[$i] & "\xF0") {
12843 14
                case "\xC0":
12844 13
                case "\xD0":
12845 13
                    $c = (self::$ORD[$str[$i] & "\x1F"] << 6) | self::$ORD[$str[++$i] & "\x3F"];
12846 13
                    $str[$j] = $c < 256 ? self::$CHR[$c] : $no_char_found;
12847
12848 13
                    break;
12849
12850 13
                case "\xF0":
12851
                    ++$i;
12852
12853
                // no break
12854
12855 13
                case "\xE0":
12856 11
                    $str[$j] = $no_char_found;
12857 11
                    $i += 2;
12858
12859 11
                    break;
12860
12861
                default:
12862 12
                    $str[$j] = $str[$i];
12863
            }
12864
        }
12865
12866
        /** @var false|string $return - needed for PhpStan (stubs error) */
12867 14
        $return = \substr($str, 0, $j);
12868 14
        if ($return === false) {
12869
            $return = '';
12870
        }
12871
12872
        if (
12873 14
            $keep_utf8_chars
12874
            &&
12875 14
            (int) self::strlen($return) >= (int) self::strlen($str_backup)
12876
        ) {
12877 2
            return $str_backup;
12878
        }
12879
12880 14
        return $return;
12881
    }
12882
12883
    /**
12884
     * Encodes an ISO-8859-1 string to UTF-8.
12885
     *
12886
     * EXAMPLE: <code>UTF8::utf8_decode(UTF8::utf8_encode('-ABC-中文空白-')); // '-ABC-中文空白-'</code>
12887
     *
12888
     * @param string $str <p>The input string.</p>
12889
     *
12890
     * @psalm-pure
12891
     *
12892
     * @return string
12893
     */
12894
    public static function utf8_encode(string $str): string
12895
    {
12896 16
        if ($str === '') {
12897 14
            return '';
12898
        }
12899
12900
        /** @var false|string $str - the polyfill maybe return false */
12901 16
        $str = \utf8_encode($str);
0 ignored issues
show
Bug introduced by
It seems like $str can also be of type false; however, parameter $string of utf8_encode() does only seem to accept string, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

12901
        $str = \utf8_encode(/** @scrutinizer ignore-type */ $str);
Loading history...
12902
12903 16
        if ($str === false) {
12904
            return '';
12905
        }
12906
12907 16
        return $str;
12908
    }
12909
12910
    /**
12911
     * Returns an array with all utf8 whitespace characters.
12912
     *
12913
     * @see http://www.bogofilter.org/pipermail/bogofilter/2003-March/001889.html
12914
     *
12915
     * @psalm-pure
12916
     *
12917
     * @return string[]
12918
     *                  An array with all known whitespace characters as values and the type of whitespace as keys
12919
     *                  as defined in above URL
12920
     */
12921
    public static function whitespace_table(): array
12922
    {
12923 2
        return self::$WHITESPACE_TABLE;
12924
    }
12925
12926
    /**
12927
     * Limit the number of words in a string.
12928
     *
12929
     * EXAMPLE: <code>UTF8::words_limit('fòô bàř fòô', 2, ''); // 'fòô bàř'</code>
12930
     *
12931
     * @param string $str        <p>The input string.</p>
12932
     * @param int    $limit      <p>The limit of words as integer.</p>
12933
     * @param string $str_add_on <p>Replacement for the striped string.</p>
12934
     *
12935
     * @psalm-pure
12936
     *
12937
     * @return string
12938
     */
12939
    public static function words_limit(
12940
        string $str,
12941
        int $limit = 100,
12942
        string $str_add_on = '…'
12943
    ): string {
12944 2
        if ($str === '' || $limit < 1) {
12945 2
            return '';
12946
        }
12947
12948 2
        \preg_match('/^\\s*+(?:[^\\s]++\\s*+){1,' . $limit . '}/u', $str, $matches);
12949
12950
        if (
12951 2
            !isset($matches[0])
12952
            ||
12953 2
            \mb_strlen($str) === (int) \mb_strlen($matches[0])
12954
        ) {
12955 2
            return $str;
12956
        }
12957
12958 2
        return \rtrim($matches[0]) . $str_add_on;
12959
    }
12960
12961
    /**
12962
     * Wraps a string to a given number of characters
12963
     *
12964
     * EXAMPLE: <code>UTF8::wordwrap('Iñtërnâtiônàlizætiøn', 2, '<br>', true)); // 'Iñ<br>të<br>rn<br>ât<br>iô<br>nà<br>li<br>zæ<br>ti<br>øn'</code>
12965
     *
12966
     * @see http://php.net/manual/en/function.wordwrap.php
12967
     *
12968
     * @param string $str   <p>The input string.</p>
12969
     * @param int    $width [optional] <p>The column width.</p>
12970
     * @param string $break [optional] <p>The line is broken using the optional break parameter.</p>
12971
     * @param bool   $cut   [optional] <p>
12972
     *                      If the cut is set to true, the string is
12973
     *                      always wrapped at or before the specified width. So if you have
12974
     *                      a word that is larger than the given width, it is broken apart.
12975
     *                      </p>
12976
     *
12977
     * @psalm-pure
12978
     *
12979
     * @return string
12980
     *                <p>The given string wrapped at the specified column.</p>
12981
     */
12982
    public static function wordwrap(
12983
        string $str,
12984
        int $width = 75,
12985
        string $break = "\n",
12986
        bool $cut = false
12987
    ): string {
12988 12
        if ($str === '' || $break === '') {
12989 4
            return '';
12990
        }
12991
12992 10
        $str_split = \explode($break, $str);
12993
12994
        /** @var string[] $charsArray */
12995 10
        $charsArray = [];
12996 10
        $word_split = '';
12997 10
        foreach ($str_split as $i => $i_value) {
12998 10
            if ($i) {
12999 3
                $charsArray[] = $break;
13000 3
                $word_split .= '#';
13001
            }
13002
13003 10
            foreach (self::str_split($i_value) as $c) {
13004 10
                $charsArray[] = $c;
13005 10
                if ($c === ' ') {
13006 3
                    $word_split .= ' ';
13007
                } else {
13008 10
                    $word_split .= '?';
13009
                }
13010
            }
13011
        }
13012
13013 10
        $str_return = '';
13014 10
        $j = 0;
13015 10
        $b = -1;
13016 10
        $i = -1;
13017 10
        $word_split = \wordwrap($word_split, $width, '#', $cut);
13018
13019 10
        $max = \mb_strlen($word_split);
13020
        /** @noinspection PhpAssignmentInConditionInspection - is ok here */
13021 10
        while (($b = \mb_strpos($word_split, '#', $b + 1)) !== false) {
13022 8
            for (++$i; $i < $b; ++$i) {
13023 8
                if (isset($charsArray[$j])) {
13024 8
                    $str_return .= $charsArray[$j];
13025 8
                    unset($charsArray[$j]);
13026
                }
13027 8
                ++$j;
13028
13029
                // prevent endless loop, e.g. if there is a error in the "mb_*" polyfill
13030 8
                if ($i > $max) {
13031
                    break 2;
13032
                }
13033
            }
13034
13035
            if (
13036 8
                $break === $charsArray[$j]
13037
                ||
13038 8
                $charsArray[$j] === ' '
13039
            ) {
13040 5
                unset($charsArray[$j++]);
13041
            }
13042
13043 8
            $str_return .= $break;
13044
13045
            // prevent endless loop, e.g. if there is a error in the "mb_*" polyfill
13046 8
            if ($b > $max) {
13047
                break;
13048
            }
13049
        }
13050
13051 10
        return $str_return . \implode('', $charsArray);
13052
    }
13053
13054
    /**
13055
     * Line-Wrap the string after $limit, but split the string by "$delimiter" before ...
13056
     *    ... so that we wrap the per line.
13057
     *
13058
     * @param string      $str             <p>The input string.</p>
13059
     * @param int         $width           [optional] <p>The column width.</p>
13060
     * @param string      $break           [optional] <p>The line is broken using the optional break parameter.</p>
13061
     * @param bool        $cut             [optional] <p>
13062
     *                                     If the cut is set to true, the string is
13063
     *                                     always wrapped at or before the specified width. So if you have
13064
     *                                     a word that is larger than the given width, it is broken apart.
13065
     *                                     </p>
13066
     * @param bool        $add_final_break [optional] <p>
13067
     *                                     If this flag is true, then the method will add a $break at the end
13068
     *                                     of the result string.
13069
     *                                     </p>
13070
     * @param string|null $delimiter       [optional] <p>
13071
     *                                     You can change the default behavior, where we split the string by newline.
13072
     *                                     </p>
13073
     *
13074
     * @psalm-pure
13075
     *
13076
     * @return string
13077
     */
13078
    public static function wordwrap_per_line(
13079
        string $str,
13080
        int $width = 75,
13081
        string $break = "\n",
13082
        bool $cut = false,
13083
        bool $add_final_break = true,
13084
        string $delimiter = null
13085
    ): string {
13086 1
        if ($delimiter === null) {
13087 1
            $strings = \preg_split('/\\r\\n|\\r|\\n/', $str);
13088
        } else {
13089 1
            $strings = \explode($delimiter, $str);
13090
        }
13091
13092 1
        $string_helper_array = [];
13093 1
        if ($strings !== false) {
13094 1
            foreach ($strings as $value) {
13095 1
                $string_helper_array[] = self::wordwrap($value, $width, $break, $cut);
13096
            }
13097
        }
13098
13099 1
        if ($add_final_break) {
13100 1
            $final_break = $break;
13101
        } else {
13102 1
            $final_break = '';
13103
        }
13104
13105 1
        return \implode($delimiter ?? "\n", $string_helper_array) . $final_break;
13106
    }
13107
13108
    /**
13109
     * Returns an array of Unicode White Space characters.
13110
     *
13111
     * @psalm-pure
13112
     *
13113
     * @return string[]
13114
     *                  <p>An array with numeric code point as key and White Space Character as value.</p>
13115
     */
13116
    public static function ws(): array
13117
    {
13118 2
        return self::$WHITESPACE;
13119
    }
13120
13121
    /**
13122
     * Checks whether the passed string contains only byte sequences that are valid UTF-8 characters.
13123
     *
13124
     * EXAMPLE: <code>
13125
     * UTF8::is_utf8_string('Iñtërnâtiônàlizætiøn']); // true
13126
     * //
13127
     * UTF8::is_utf8_string("Iñtërnâtiônàlizætiøn\xA0\xA1"); // false
13128
     * </code>
13129
     *
13130
     * @see          http://hsivonen.iki.fi/php-utf8/
13131
     *
13132
     * @param string $str    <p>The string to be checked.</p>
13133
     * @param bool   $strict <p>Check also if the string is not UTF-16 or UTF-32.</p>
13134
     *
13135
     * @psalm-pure
13136
     *
13137
     * @return bool
13138
     *
13139
     * @noinspection ReturnTypeCanBeDeclaredInspection
13140
     */
13141
    private static function is_utf8_string(string $str, bool $strict = false)
13142
    {
13143 110
        if ($str === '') {
13144 15
            return true;
13145
        }
13146
13147 103
        if ($strict) {
13148 2
            $is_binary = self::is_binary($str, true);
13149
13150 2
            if ($is_binary && self::is_utf16($str, false) !== false) {
13151 2
                return false;
13152
            }
13153
13154 2
            if ($is_binary && self::is_utf32($str, false) !== false) {
13155
                return false;
13156
            }
13157
        }
13158
13159 103
        if (self::$SUPPORT['pcre_utf8']) {
13160
            // If even just the first character can be matched, when the /u
13161
            // modifier is used, then it's valid UTF-8. If the UTF-8 is somehow
13162
            // invalid, nothing at all will match, even if the string contains
13163
            // some valid sequences
13164 103
            return \preg_match('/^./us', $str) === 1;
13165
        }
13166
13167 2
        $mState = 0; // cached expected number of octets after the current octet
13168
        // until the beginning of the next UTF8 character sequence
13169 2
        $mUcs4 = 0; // cached Unicode character
13170 2
        $mBytes = 1; // cached expected number of octets in the current sequence
13171
13172 2
        if (self::$ORD === null) {
13173
            self::$ORD = self::getData('ord');
13174
        }
13175
13176 2
        $len = \strlen($str);
13177 2
        for ($i = 0; $i < $len; ++$i) {
13178 2
            $in = self::$ORD[$str[$i]];
13179
13180 2
            if ($mState === 0) {
13181
                // When mState is zero we expect either a US-ASCII character or a
13182
                // multi-octet sequence.
13183 2
                if ((0x80 & $in) === 0) {
13184
                    // US-ASCII, pass straight through.
13185 2
                    $mBytes = 1;
13186 2
                } elseif ((0xE0 & $in) === 0xC0) {
13187
                    // First octet of 2 octet sequence.
13188 2
                    $mUcs4 = $in;
13189 2
                    $mUcs4 = ($mUcs4 & 0x1F) << 6;
13190 2
                    $mState = 1;
13191 2
                    $mBytes = 2;
13192 2
                } elseif ((0xF0 & $in) === 0xE0) {
13193
                    // First octet of 3 octet sequence.
13194 2
                    $mUcs4 = $in;
13195 2
                    $mUcs4 = ($mUcs4 & 0x0F) << 12;
13196 2
                    $mState = 2;
13197 2
                    $mBytes = 3;
13198
                } elseif ((0xF8 & $in) === 0xF0) {
13199
                    // First octet of 4 octet sequence.
13200
                    $mUcs4 = $in;
13201
                    $mUcs4 = ($mUcs4 & 0x07) << 18;
13202
                    $mState = 3;
13203
                    $mBytes = 4;
13204
                } elseif ((0xFC & $in) === 0xF8) {
13205
                    /* First octet of 5 octet sequence.
13206
                     *
13207
                     * This is illegal because the encoded codepoint must be either
13208
                     * (a) not the shortest form or
13209
                     * (b) outside the Unicode range of 0-0x10FFFF.
13210
                     * Rather than trying to resynchronize, we will carry on until the end
13211
                     * of the sequence and let the later error handling code catch it.
13212
                     */
13213
                    $mUcs4 = $in;
13214
                    $mUcs4 = ($mUcs4 & 0x03) << 24;
13215
                    $mState = 4;
13216
                    $mBytes = 5;
13217
                } elseif ((0xFE & $in) === 0xFC) {
13218
                    // First octet of 6 octet sequence, see comments for 5 octet sequence.
13219
                    $mUcs4 = $in;
13220
                    $mUcs4 = ($mUcs4 & 1) << 30;
13221
                    $mState = 5;
13222
                    $mBytes = 6;
13223
                } else {
13224
                    // Current octet is neither in the US-ASCII range nor a legal first
13225
                    // octet of a multi-octet sequence.
13226 2
                    return false;
13227
                }
13228 2
            } elseif ((0xC0 & $in) === 0x80) {
13229
13230
                // When mState is non-zero, we expect a continuation of the multi-octet
13231
                // sequence
13232
13233
                // Legal continuation.
13234 2
                $shift = ($mState - 1) * 6;
13235 2
                $tmp = $in;
13236 2
                $tmp = ($tmp & 0x0000003F) << $shift;
13237 2
                $mUcs4 |= $tmp;
13238
                // Prefix: End of the multi-octet sequence. mUcs4 now contains the final
13239
                // Unicode code point to be output.
13240 2
                if (--$mState === 0) {
13241
                    // Check for illegal sequences and code points.
13242
                    //
13243
                    // From Unicode 3.1, non-shortest form is illegal
13244
                    if (
13245 2
                        ($mBytes === 2 && $mUcs4 < 0x0080)
13246
                        ||
13247 2
                        ($mBytes === 3 && $mUcs4 < 0x0800)
13248
                        ||
13249 2
                        ($mBytes === 4 && $mUcs4 < 0x10000)
13250
                        ||
13251 2
                        ($mBytes > 4)
13252
                        ||
13253
                        // From Unicode 3.2, surrogate characters are illegal.
13254 2
                        (($mUcs4 & 0xFFFFF800) === 0xD800)
13255
                        ||
13256
                        // Code points outside the Unicode range are illegal.
13257 2
                        ($mUcs4 > 0x10FFFF)
13258
                    ) {
13259
                        return false;
13260
                    }
13261
                    // initialize UTF8 cache
13262 2
                    $mState = 0;
13263 2
                    $mUcs4 = 0;
13264 2
                    $mBytes = 1;
13265
                }
13266
            } else {
13267
                // ((0xC0 & (*in) != 0x80) && (mState != 0))
13268
                // Incomplete multi-octet sequence.
13269
                return false;
13270
            }
13271
        }
13272
13273 2
        return $mState === 0;
13274
    }
13275
13276
    /**
13277
     * @param string $str
13278
     * @param bool   $use_lowercase      <p>Use uppercase by default, otherwise use lowercase.</p>
13279
     * @param bool   $use_full_case_fold <p>Convert not only common cases.</p>
13280
     *
13281
     * @psalm-pure
13282
     *
13283
     * @return string
13284
     *
13285
     * @noinspection ReturnTypeCanBeDeclaredInspection
13286
     */
13287
    private static function fixStrCaseHelper(
13288
        string $str,
13289
        bool $use_lowercase = false,
13290
        bool $use_full_case_fold = false
13291
    ) {
13292 33
        $upper = self::$COMMON_CASE_FOLD['upper'];
13293 33
        $lower = self::$COMMON_CASE_FOLD['lower'];
13294
13295 33
        if ($use_lowercase) {
13296 2
            $str = \str_replace(
13297 2
                $upper,
13298 2
                $lower,
13299 2
                $str
13300
            );
13301
        } else {
13302 31
            $str = \str_replace(
13303 31
                $lower,
13304 31
                $upper,
13305 31
                $str
13306
            );
13307
        }
13308
13309 33
        if ($use_full_case_fold) {
13310
            /**
13311
             * @psalm-suppress ImpureStaticVariable
13312
             *
13313
             * @var array<mixed>|null
13314
             */
13315 31
            static $FULL_CASE_FOLD = null;
13316 31
            if ($FULL_CASE_FOLD === null) {
13317 1
                $FULL_CASE_FOLD = self::getData('caseFolding_full');
13318
            }
13319
13320 31
            if ($use_lowercase) {
13321 2
                $str = \str_replace($FULL_CASE_FOLD[0], $FULL_CASE_FOLD[1], $str);
13322
            } else {
13323 29
                $str = \str_replace($FULL_CASE_FOLD[1], $FULL_CASE_FOLD[0], $str);
13324
            }
13325
        }
13326
13327 33
        return $str;
13328
    }
13329
13330
    /**
13331
     * get data from "/data/*.php"
13332
     *
13333
     * @param string $file
13334
     *
13335
     * @psalm-pure
13336
     *
13337
     * @return array
13338
     *
13339
     * @noinspection ReturnTypeCanBeDeclaredInspection
13340
     */
13341
    private static function getData(string $file)
13342
    {
13343
        /** @noinspection PhpIncludeInspection */
13344
        /** @noinspection UsingInclusionReturnValueInspection */
13345
        /** @psalm-suppress UnresolvableInclude */
13346 7
        return include __DIR__ . '/data/' . $file . '.php';
13347
    }
13348
13349
    /**
13350
     * @psalm-pure
13351
     *
13352
     * @return true|null
13353
     *
13354
     * @noinspection ReturnTypeCanBeDeclaredInspection
13355
     */
13356
    private static function initEmojiData()
13357
    {
13358 1
        if (self::$EMOJI_KEYS_CACHE === null) {
13359 1
            if (self::$EMOJI === null) {
13360 1
                self::$EMOJI = self::getData('emoji');
13361
            }
13362
13363
            /**
13364
             * @psalm-suppress ImpureFunctionCall - static sort function is used
13365
             */
13366 1
            \uksort(
13367 1
                self::$EMOJI,
13368
                static function (string $a, string $b): int {
13369 1
                    return \strlen($b) <=> \strlen($a);
13370 1
                }
13371
            );
13372
13373 1
            self::$EMOJI_KEYS_CACHE = \array_keys(self::$EMOJI);
13374 1
            self::$EMOJI_VALUES_CACHE = self::$EMOJI;
13375
13376 1
            foreach (self::$EMOJI_KEYS_CACHE as $key) {
13377 1
                $tmp_key = \crc32($key);
13378 1
                self::$EMOJI_KEYS_REVERSIBLE_CACHE[] = '_-_PORTABLE_UTF8_-_' . $tmp_key . '_-_' . \strrev((string) $tmp_key) . '_-_8FTU_ELBATROP_-_';
13379
            }
13380
13381 1
            return true;
13382
        }
13383
13384
        return null;
13385
    }
13386
13387
    /**
13388
     * Checks whether mbstring "overloaded" is active on the server.
13389
     *
13390
     * @psalm-pure
13391
     *
13392
     * @return bool
13393
     */
13394
    private static function mbstring_overloaded(): bool
13395
    {
13396
        /**
13397
         * INI directive 'mbstring.func_overload' is deprecated since PHP 7.2
13398
         */
13399
13400
        /** @noinspection PhpComposerExtensionStubsInspection */
13401
        /** @noinspection PhpUsageOfSilenceOperatorInspection */
13402
        /** @noinspection DeprecatedIniOptionsInspection */
13403
        return \defined('MB_OVERLOAD_STRING')
13404
               &&
13405
               ((int) @\ini_get('mbstring.func_overload') & \MB_OVERLOAD_STRING);
13406
    }
13407
13408
    /**
13409
     * @param array    $strings
13410
     * @param bool     $remove_empty_values
13411
     * @param int|null $remove_short_values
13412
     *
13413
     * @psalm-pure
13414
     *
13415
     * @return array
13416
     *
13417
     * @noinspection ReturnTypeCanBeDeclaredInspection
13418
     */
13419
    private static function reduce_string_array(
13420
        array $strings,
13421
        bool $remove_empty_values,
13422
        int $remove_short_values = null
13423
    ) {
13424
        // init
13425 2
        $return = [];
13426
13427 2
        foreach ($strings as &$str) {
13428
            if (
13429 2
                $remove_short_values !== null
13430
                &&
13431 2
                \mb_strlen($str) <= $remove_short_values
13432
            ) {
13433 2
                continue;
13434
            }
13435
13436
            if (
13437 2
                $remove_empty_values
13438
                &&
13439 2
                \trim($str) === ''
13440
            ) {
13441 2
                continue;
13442
            }
13443
13444 2
            $return[] = $str;
13445
        }
13446
13447 2
        return $return;
13448
    }
13449
13450
    /**
13451
     * rxClass
13452
     *
13453
     * @param string $s
13454
     * @param string $class
13455
     *
13456
     * @return string
13457
     *                    *
13458
     * @psalm-pure
13459
     */
13460
    private static function rxClass(string $s, string $class = '')
13461
    {
13462
        /**
13463
         * @psalm-suppress ImpureStaticVariable
13464
         *
13465
         * @var array<string,string>
13466
         */
13467 36
        static $RX_CLASS_CACHE = [];
13468
13469 36
        $cache_key = $s . '_' . $class;
13470
13471 36
        if (isset($RX_CLASS_CACHE[$cache_key])) {
13472 24
            return $RX_CLASS_CACHE[$cache_key];
13473
        }
13474
13475 16
        $class_array[] = $class;
0 ignored issues
show
Comprehensibility Best Practice introduced by
$class_array was never initialized. Although not strictly required by PHP, it is generally a good practice to add $class_array = array(); before regardless.
Loading history...
13476
13477
        /** @noinspection SuspiciousLoopInspection */
13478
        /** @noinspection AlterInForeachInspection */
13479 16
        foreach (self::str_split($s) as &$s) {
0 ignored issues
show
Bug introduced by
The expression self::str_split($s) cannot be used as a reference.

Let?s assume that you have the following foreach statement:

foreach ($array as &$itemValue) { }

$itemValue is assigned by reference. This is possible because the expression (in the example $array) can be used as a reference target.

However, if we were to replace $array with something different like the result of a function call as in

foreach (getArray() as &$itemValue) { }

then assigning by reference is not possible anymore as there is no target that could be modified.

Available Fixes

1. Do not assign by reference
foreach (getArray() as $itemValue) { }
2. Assign to a local variable first
$array = getArray();
foreach ($array as &$itemValue) {}
3. Return a reference
function &getArray() { $array = array(); return $array; }

foreach (getArray() as &$itemValue) { }
Loading history...
13480 15
            if ($s === '-') {
13481
                $class_array[0] = '-' . $class_array[0];
13482 15
            } elseif (!isset($s[2])) {
13483 15
                $class_array[0] .= \preg_quote($s, '/');
13484 1
            } elseif (self::strlen($s) === 1) {
13485 1
                $class_array[0] .= $s;
13486
            } else {
13487 15
                $class_array[] = $s;
13488
            }
13489
        }
13490
13491 16
        if ($class_array[0]) {
13492 16
            $class_array[0] = '[' . $class_array[0] . ']';
13493
        }
13494
13495 16
        if (\count($class_array) === 1) {
13496 16
            $return = $class_array[0];
13497
        } else {
13498
            $return = '(?:' . \implode('|', $class_array) . ')';
13499
        }
13500
13501 16
        $RX_CLASS_CACHE[$cache_key] = $return;
13502
13503 16
        return $return;
13504
    }
13505
13506
    /**
13507
     * Personal names such as "Marcus Aurelius" are sometimes typed incorrectly using lowercase ("marcus aurelius").
13508
     *
13509
     * @param string $names
13510
     * @param string $delimiter
13511
     * @param string $encoding
13512
     *
13513
     * @psalm-pure
13514
     *
13515
     * @return string
13516
     *
13517
     * @noinspection ReturnTypeCanBeDeclaredInspection
13518
     */
13519
    private static function str_capitalize_name_helper(
13520
        string $names,
13521
        string $delimiter,
13522
        string $encoding = 'UTF-8'
13523
    ) {
13524
        // init
13525 1
        $name_helper_array = \explode($delimiter, $names);
13526 1
        if ($name_helper_array === false) {
13527
            return '';
13528
        }
13529
13530
        $special_cases = [
13531 1
            'names' => [
13532
                'ab',
13533
                'af',
13534
                'al',
13535
                'and',
13536
                'ap',
13537
                'bint',
13538
                'binte',
13539
                'da',
13540
                'de',
13541
                'del',
13542
                'den',
13543
                'der',
13544
                'di',
13545
                'dit',
13546
                'ibn',
13547
                'la',
13548
                'mac',
13549
                'nic',
13550
                'of',
13551
                'ter',
13552
                'the',
13553
                'und',
13554
                'van',
13555
                'von',
13556
                'y',
13557
                'zu',
13558
            ],
13559
            'prefixes' => [
13560
                'al-',
13561
                "d'",
13562
                'ff',
13563
                "l'",
13564
                'mac',
13565
                'mc',
13566
                'nic',
13567
            ],
13568
        ];
13569
13570 1
        foreach ($name_helper_array as &$name) {
13571 1
            if (\in_array($name, $special_cases['names'], true)) {
13572 1
                continue;
13573
            }
13574
13575 1
            $continue = false;
13576
13577 1
            if ($delimiter === '-') {
13578 1
                foreach ((array) $special_cases['names'] as &$beginning) {
13579 1
                    if (\strncmp($name, $beginning, \strlen($beginning)) === 0) {
13580 1
                        $continue = true;
13581
13582 1
                        break;
13583
                    }
13584
                }
13585 1
                unset($beginning);
13586
            }
13587
13588 1
            foreach ((array) $special_cases['prefixes'] as &$beginning) {
13589 1
                if (\strncmp($name, $beginning, \strlen($beginning)) === 0) {
13590 1
                    $continue = true;
13591
13592 1
                    break;
13593
                }
13594
            }
13595 1
            unset($beginning);
13596
13597 1
            if ($continue) {
13598 1
                continue;
13599
            }
13600
13601 1
            $name = self::ucfirst($name, $encoding);
13602
        }
13603
13604 1
        return \implode($delimiter, $name_helper_array);
13605
    }
13606
13607
    /**
13608
     * Generic case-sensitive transformation for collation matching.
13609
     *
13610
     * @param string $str <p>The input string</p>
13611
     *
13612
     * @psalm-pure
13613
     *
13614
     * @return string|null
13615
     *
13616
     * @noinspection ReturnTypeCanBeDeclaredInspection
13617
     */
13618
    private static function strtonatfold(string $str)
13619
    {
13620 6
        $str = \Normalizer::normalize($str, \Normalizer::NFD);
13621 6
        if ($str === false) {
13622 2
            return '';
13623
        }
13624
13625 6
        return \preg_replace(
13626 6
            '/\p{Mn}+/u',
13627 6
            '',
13628 6
            $str
13629
        );
13630
    }
13631
13632
    /**
13633
     * @param int|string $input
13634
     *
13635
     * @psalm-pure
13636
     *
13637
     * @return string
13638
     *
13639
     * @noinspection ReturnTypeCanBeDeclaredInspection
13640
     */
13641
    private static function to_utf8_convert_helper($input)
13642
    {
13643
        // init
13644 30
        $buf = '';
13645
13646 30
        if (self::$ORD === null) {
13647
            self::$ORD = self::getData('ord');
13648
        }
13649
13650 30
        if (self::$CHR === null) {
13651
            self::$CHR = self::getData('chr');
13652
        }
13653
13654 30
        if (self::$WIN1252_TO_UTF8 === null) {
13655 1
            self::$WIN1252_TO_UTF8 = self::getData('win1252_to_utf8');
13656
        }
13657
13658 30
        $ordC1 = self::$ORD[$input];
13659 30
        if (isset(self::$WIN1252_TO_UTF8[$ordC1])) { // found in Windows-1252 special cases
13660 30
            $buf .= self::$WIN1252_TO_UTF8[$ordC1];
13661
        } else {
13662
            /** @noinspection OffsetOperationsInspection */
13663 1
            $cc1 = self::$CHR[$ordC1 / 64] | "\xC0";
13664 1
            $cc2 = ((string) $input & "\x3F") | "\x80";
0 ignored issues
show
Bug introduced by
Are you sure you want to use the bitwise | or did you mean ||?
Loading history...
Bug introduced by
Are you sure you want to use the bitwise & or did you mean &&?
Loading history...
13665 1
            $buf .= $cc1 . $cc2;
13666
        }
13667
13668 30
        return $buf;
13669
    }
13670
13671
    /**
13672
     * @param string $str
13673
     *
13674
     * @psalm-pure
13675
     *
13676
     * @return string
13677
     *
13678
     * @noinspection ReturnTypeCanBeDeclaredInspection
13679
     */
13680
    private static function urldecode_unicode_helper(string $str)
13681
    {
13682 9
        if (\strpos($str, '%u') === false) {
13683 9
            return $str;
13684
        }
13685
13686 7
        $pattern = '/%u([0-9a-fA-F]{3,4})/';
13687 7
        if (\preg_match($pattern, $str)) {
13688 7
            $str = (string) \preg_replace($pattern, '&#x\\1;', $str);
13689
        }
13690
13691 7
        return $str;
13692
    }
13693
}
13694