Passed
Push — master ( 5530eb...39c1dc )
by Lars
04:51 queued 12s
created

UTF8   F

Complexity

Total Complexity 1748

Size/Duplication

Total Lines 13677
Duplicated Lines 0 %

Test Coverage

Coverage 80.88%

Importance

Changes 109
Bugs 52 Features 6
Metric Value
eloc 4228
c 109
b 52
f 6
dl 0
loc 13677
ccs 3101
cts 3834
cp 0.8088
rs 0.8
wmc 1748

272 Methods

Rating   Name   Duplication   Size   Complexity  
A encode_mimeheader() 0 26 5
F extract_text() 0 175 34
B chr_to_decimal() 0 38 8
A add_bom_to_string() 0 7 2
A count_chars() 0 11 1
A ctype_loaded() 0 3 1
D chr() 0 107 19
A chunk_split() 0 3 1
A css_identifier() 0 55 6
A css_stripe_media_queries() 0 6 1
A clean() 0 47 6
A __construct() 0 2 1
B between() 0 48 8
A codepoints() 0 36 5
A chr_map() 0 5 1
A cleanup() 0 24 2
A char_at() 0 7 2
A chars() 0 4 1
A chr_size_list() 0 17 3
A checkForSupport() 0 46 4
A collapse_whitespace() 0 7 2
A access() 0 11 4
A callback() 0 3 1
A binary_to_str() 0 12 3
A bom() 0 3 1
A array_change_key_case() 0 23 5
A emoji_decode() 0 21 3
A decode_mimeheader() 0 8 3
A emoji_encode() 0 21 3
A decimal_to_chr() 0 5 1
F encode() 0 144 37
A chr_to_hex() 0 11 3
A emoji_from_country_code() 0 17 3
A str_substr_after_first_separator() 0 28 6
A file_has_bom() 0 8 2
A max() 0 14 3
B str_camelize() 0 74 10
A parse_str() 0 18 4
A filter_input() 0 16 3
A str_contains() 0 15 3
B str_to_lines() 0 28 8
A substr_in_byte() 0 18 6
A stripos_in_byte() 0 12 4
A is_bom() 0 10 3
A is_hexadecimal() 0 7 2
A get_unique_string() 0 21 3
A strnatcasecmp() 0 5 1
A substr_left() 0 15 4
D strlen() 0 104 19
A str_isubstr_last() 0 25 4
A to_int() 0 7 2
A str_replace_beginning() 0 25 6
A has_uppercase() 0 7 2
A remove_left() 0 28 4
C stripos() 0 67 14
A str_offset_exists() 0 10 2
D strrchr() 0 104 20
A to_filename() 0 9 1
A str_iends_with() 0 11 3
A max_chr_width() 0 8 2
C utf8_decode() 0 59 13
A ltrim() 0 26 5
A is_utf8() 0 13 4
A remove_html() 0 3 1
B str_longest_common_suffix() 0 54 10
C wordwrap() 0 70 14
B ucfirst() 0 57 7
A str_pad_both() 0 12 1
A str_substr_last() 0 33 6
A mbstring_loaded() 0 3 1
A str_limit() 0 26 6
A html_escape() 0 6 1
A string() 0 18 4
B str_obfuscate() 0 47 8
D normalize_encoding() 0 147 16
B rxClass() 0 44 8
B get_file_type() 0 60 7
A str_ensure_right() 0 13 4
B str_titleize_for_humans() 0 170 7
D is_utf16() 0 76 17
C filter() 0 59 14
A normalize_whitespace() 0 11 1
A str_starts_with() 0 16 4
A str_humanize() 0 15 1
A is_html() 0 14 2
C substr_count_in_byte() 0 55 15
A strlen_in_byte() 0 12 3
A str_ireplace_ending() 0 21 6
A rtrim() 0 26 5
C str_longest_common_substring() 0 76 16
A regex_replace() 0 20 3
A titlecase() 0 35 5
A getData() 0 6 1
B strtolower() 0 58 10
A urldecode() 0 35 4
A str_isubstr_before_first_separator() 0 19 5
B strrev() 0 43 10
A replace_all() 0 11 2
D substr_replace() 0 123 27
A strstr_in_byte() 0 15 4
A str_matches_pattern() 0 3 1
A is_alpha() 0 7 2
C str_titleize() 0 69 12
A str_split_array() 0 17 2
B get_random_string() 0 54 10
A ws() 0 3 1
A str_replace_first() 0 20 2
A fix_utf8() 0 30 4
A str_pad_right() 0 12 1
B ucwords() 0 51 9
A first_char() 0 14 4
A to_boolean() 0 35 5
C stristr() 0 79 17
A strncasecmp() 0 10 1
B strwidth() 0 43 8
A trim() 0 26 5
A is_serialized() 0 11 3
A str_upper_camelize() 0 8 1
A is_uppercase() 0 7 2
A substr_compare() 0 33 6
D substr_count() 0 73 17
A strnatcmp() 0 9 2
D str_pad() 0 146 16
A urldecode_unicode_helper() 0 12 3
A str_ireplace() 0 31 5
A str_replace_ending() 0 24 6
A string_has_bom() 0 9 3
B strtr() 0 41 11
B str_contains_all() 0 22 9
A is_ascii() 0 3 1
A normalize_line_ending() 0 3 1
A str_isubstr_after_last_separator() 0 26 5
D range() 0 71 23
B strspn() 0 30 10
A strcasecmp() 0 21 1
A rawurldecode() 0 35 4
B str_capitalize_name_helper() 0 86 10
A utf8_encode() 0 14 3
A normalize_msword() 0 3 1
C str_detect_encoding() 0 111 14
A spaces_to_tabs() 0 11 3
A str_istarts_with() 0 11 3
A is_blank() 0 7 2
A str_replace() 0 18 1
A substr_iright() 0 15 4
D getCharDirection() 0 104 117
A htmlspecialchars() 0 15 3
A replace() 0 11 2
A filter_var_array() 0 15 2
A to_iso8859() 0 16 4
A has_whitespace() 0 7 2
A words_limit() 0 20 5
A strip_tags() 0 18 4
A pcre_utf8_support() 0 4 1
A str_isubstr_before_last_separator() 0 24 6
D str_truncate_safe() 0 86 18
A substr_right() 0 31 6
D str_split() 0 134 29
A str_ends_with_any() 0 13 4
A strrpos_in_byte() 0 12 4
F strrpos() 0 136 31
A remove_right() 0 25 4
A remove_html_breaks() 0 3 1
A showSupport() 0 16 3
A remove_invisible_characters() 0 11 1
A single_chr_html_encode() 0 18 4
A str_replace_last() 0 19 2
A str_substr_before_last_separator() 0 31 6
B is_binary() 0 39 10
A intlChar_loaded() 0 3 1
B strtocasefold() 0 33 7
A lcfirst() 0 44 5
A tabs_to_spaces() 0 11 3
B is_url() 0 40 7
A finfo_loaded() 0 3 1
B str_truncate() 0 43 7
F strripos() 0 113 25
A strpos_in_byte() 0 12 4
A str_ends_with() 0 16 4
A fits_inside() 0 3 1
A to_ascii() 0 6 1
A is_binary_file() 0 16 4
A intl_loaded() 0 3 1
A reduce_string_array() 0 29 6
B str_longest_common_prefix() 0 51 8
A mbstring_overloaded() 0 12 2
A str_pad_left() 0 12 1
A str_substr_first() 0 33 6
A html_stripe_empty_tags() 0 6 1
A remove_bom() 0 22 5
A str_repeat() 0 5 1
A strpbrk() 0 11 4
D to_utf8_string() 0 110 33
A whitespace_table() 0 3 1
A substr_count_simple() 0 31 6
A str_iends_with_any() 0 13 4
A str_substr_after_last_separator() 0 31 6
A str_isubstr_after_first_separator() 0 26 5
A json_loaded() 0 3 1
B str_snakeize() 0 57 6
A is_lowercase() 0 7 2
A str_sort() 0 16 3
A to_utf8() 0 15 3
A str_underscored() 0 3 1
A str_offset_get() 0 14 4
A strip_whitespace() 0 7 2
A str_capitalize_name() 0 8 1
B str_limit_after_word() 0 51 11
A iconv_loaded() 0 3 1
A lcwords() 0 34 6
A swapCase() 0 17 4
A filter_var() 0 15 2
A substr_ileft() 0 15 4
A is_empty() 0 3 1
B html_encode() 0 54 11
A str_dasherize() 0 3 1
A str_ensure_left() 0 11 3
C is_utf32() 0 71 16
C ord() 0 68 16
B to_string() 0 27 8
A is_alphanumeric() 0 7 2
A strtonatfold() 0 11 2
A json_decode() 0 17 3
C strcspn() 0 48 12
A fix_simple_utf8() 0 32 5
B is_json() 0 26 8
A is_printable() 0 3 1
A fixStrCaseHelper() 0 41 5
A int_to_hex() 0 7 2
C str_split_pattern() 0 54 13
D strstr() 0 107 21
A has_lowercase() 0 7 2
A json_encode() 0 13 3
A str_isubstr_first() 0 25 4
A is_base64() 0 17 5
A str_last_char() 0 16 4
A str_ireplace_beginning() 0 22 6
A hex_to_int() 0 14 3
A hex_to_chr() 0 4 1
A htmlentities() 0 28 3
A str_substr_before_first_separator() 0 32 6
F substr() 0 137 31
A wordwrap_per_line() 0 28 5
A str_surround() 0 3 1
A strncmp() 0 19 4
A filter_input_array() 0 15 3
A str_insert() 0 28 4
A getSupportInfo() 0 13 3
A replace_diamond_question_mark() 0 43 5
D is_utf8_string() 0 133 28
B str_delimit() 0 31 8
A to_utf8_convert_helper() 0 28 5
B strtoupper() 0 58 10
A min() 0 14 3
A is_punctuation() 0 3 1
C html_entity_decode() 0 58 13
A str_starts_with_any() 0 17 5
B strrichr() 0 54 11
A str_istarts_with_any() 0 17 5
B str_contains_any() 0 28 8
A initEmojiData() 0 29 4
A remove_duplicates() 0 16 4
B str_slice() 0 33 10
F strpos() 0 151 33
A str_shuffle() 0 33 6
A strcmp() 0 11 2
C file_get_contents() 0 61 12
A str_word_count() 0 23 5
A strripos_in_byte() 0 12 4
A str_to_binary() 0 10 2
A symfony_polyfill_used() 0 16 5
B str_to_words() 0 36 8

How to fix   Complexity   

Complex Class

Complex classes like UTF8 often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.

Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.

While breaking up the class, it is a good idea to analyze how other classes use UTF8, and based on these observations, apply Extract Interface, too.

1
<?php
2
3
declare(strict_types=1);
4
5
namespace voku\helper;
6
7
/**
8
 * @psalm-immutable
9
 */
10
final class UTF8
11
{
12
    /**
13
     * Bom => Byte-Length
14
     *
15
     * INFO: https://en.wikipedia.org/wiki/Byte_order_mark
16
     *
17
     * @var array<string, int>
18
     */
19
    private static $BOM = [
20
        "\xef\xbb\xbf"     => 3, // UTF-8 BOM
21
        ''              => 6, // UTF-8 BOM as "WINDOWS-1252" (one char has [maybe] more then one byte ...)
22
        "\x00\x00\xfe\xff" => 4, // UTF-32 (BE) BOM
23
        '  þÿ'             => 6, // UTF-32 (BE) BOM as "WINDOWS-1252"
24
        "\xff\xfe\x00\x00" => 4, // UTF-32 (LE) BOM
25
        'ÿþ  '             => 6, // UTF-32 (LE) BOM as "WINDOWS-1252"
26
        "\xfe\xff"         => 2, // UTF-16 (BE) BOM
27
        'þÿ'               => 4, // UTF-16 (BE) BOM as "WINDOWS-1252"
28
        "\xff\xfe"         => 2, // UTF-16 (LE) BOM
29
        'ÿþ'               => 4, // UTF-16 (LE) BOM as "WINDOWS-1252"
30
    ];
31
32
    /**
33
     * Numeric code point => UTF-8 Character
34
     *
35
     * url: http://www.w3schools.com/charsets/ref_utf_punctuation.asp
36
     *
37
     * @var array<int, string>
38
     */
39
    private static $WHITESPACE = [
40
        // NULL Byte
41
        0 => "\x0",
42
        // Tab
43
        9 => "\x9",
44
        // New Line
45
        10 => "\xa",
46
        // Vertical Tab
47
        11 => "\xb",
48
        // Carriage Return
49
        13 => "\xd",
50
        // Ordinary Space
51
        32 => "\x20",
52
        // NO-BREAK SPACE
53
        160 => "\xc2\xa0",
54
        // OGHAM SPACE MARK
55
        5760 => "\xe1\x9a\x80",
56
        // MONGOLIAN VOWEL SEPARATOR
57
        6158 => "\xe1\xa0\x8e",
58
        // EN QUAD
59
        8192 => "\xe2\x80\x80",
60
        // EM QUAD
61
        8193 => "\xe2\x80\x81",
62
        // EN SPACE
63
        8194 => "\xe2\x80\x82",
64
        // EM SPACE
65
        8195 => "\xe2\x80\x83",
66
        // THREE-PER-EM SPACE
67
        8196 => "\xe2\x80\x84",
68
        // FOUR-PER-EM SPACE
69
        8197 => "\xe2\x80\x85",
70
        // SIX-PER-EM SPACE
71
        8198 => "\xe2\x80\x86",
72
        // FIGURE SPACE
73
        8199 => "\xe2\x80\x87",
74
        // PUNCTUATION SPACE
75
        8200 => "\xe2\x80\x88",
76
        // THIN SPACE
77
        8201 => "\xe2\x80\x89",
78
        // HAIR SPACE
79
        8202 => "\xe2\x80\x8a",
80
        // LINE SEPARATOR
81
        8232 => "\xe2\x80\xa8",
82
        // PARAGRAPH SEPARATOR
83
        8233 => "\xe2\x80\xa9",
84
        // NARROW NO-BREAK SPACE
85
        8239 => "\xe2\x80\xaf",
86
        // MEDIUM MATHEMATICAL SPACE
87
        8287 => "\xe2\x81\x9f",
88
        // HALFWIDTH HANGUL FILLER
89
        65440 => "\xef\xbe\xa0",
90
        // IDEOGRAPHIC SPACE
91
        12288 => "\xe3\x80\x80",
92
    ];
93
94
    /**
95
     * @var array<string, string>
96
     */
97
    private static $WHITESPACE_TABLE = [
98
        'SPACE'                     => "\x20",
99
        'NO-BREAK SPACE'            => "\xc2\xa0",
100
        'OGHAM SPACE MARK'          => "\xe1\x9a\x80",
101
        'EN QUAD'                   => "\xe2\x80\x80",
102
        'EM QUAD'                   => "\xe2\x80\x81",
103
        'EN SPACE'                  => "\xe2\x80\x82",
104
        'EM SPACE'                  => "\xe2\x80\x83",
105
        'THREE-PER-EM SPACE'        => "\xe2\x80\x84",
106
        'FOUR-PER-EM SPACE'         => "\xe2\x80\x85",
107
        'SIX-PER-EM SPACE'          => "\xe2\x80\x86",
108
        'FIGURE SPACE'              => "\xe2\x80\x87",
109
        'PUNCTUATION SPACE'         => "\xe2\x80\x88",
110
        'THIN SPACE'                => "\xe2\x80\x89",
111
        'HAIR SPACE'                => "\xe2\x80\x8a",
112
        'LINE SEPARATOR'            => "\xe2\x80\xa8",
113
        'PARAGRAPH SEPARATOR'       => "\xe2\x80\xa9",
114
        'ZERO WIDTH SPACE'          => "\xe2\x80\x8b",
115
        'NARROW NO-BREAK SPACE'     => "\xe2\x80\xaf",
116
        'MEDIUM MATHEMATICAL SPACE' => "\xe2\x81\x9f",
117
        'IDEOGRAPHIC SPACE'         => "\xe3\x80\x80",
118
        'HALFWIDTH HANGUL FILLER'   => "\xef\xbe\xa0",
119
    ];
120
121
    /**
122
     * @var array
123
     *
124
     * @phpstan-var array{upper: string[], lower: string[]}
125
     */
126
    private static $COMMON_CASE_FOLD = [
127
        'upper' => [
128
            'µ',
129
            'ſ',
130
            "\xCD\x85",
131
            'ς',
132
            'ẞ',
133
            "\xCF\x90",
134
            "\xCF\x91",
135
            "\xCF\x95",
136
            "\xCF\x96",
137
            "\xCF\xB0",
138
            "\xCF\xB1",
139
            "\xCF\xB5",
140
            "\xE1\xBA\x9B",
141
            "\xE1\xBE\xBE",
142
        ],
143
        'lower' => [
144
            'μ',
145
            's',
146
            'ι',
147
            'σ',
148
            'ß',
149
            'β',
150
            'θ',
151
            'φ',
152
            'π',
153
            'κ',
154
            'ρ',
155
            'ε',
156
            "\xE1\xB9\xA1",
157
            'ι',
158
        ],
159
    ];
160
161
    /**
162
     * @var array
163
     *
164
     * @phpstan-var array<string, mixed>
165
     */
166
    private static $SUPPORT = [];
167
168
    /**
169
     * @var string[]|null
170
     *
171
     * @phpstan-var array<string, string>|null
172
     */
173
    private static $BROKEN_UTF8_FIX;
174
175
    /**
176
     * @var string[]|null
177
     *
178
     * @phpstan-var array<int, string>|null
179
     */
180
    private static $WIN1252_TO_UTF8;
181
182
    /**
183
     * @var string[]|null
184
     *
185
     * @phpstan-var array<int ,string>|null
186
     */
187
    private static $INTL_TRANSLITERATOR_LIST;
188
189
    /**
190
     * @var string[]|null
191
     *
192
     * @phpstan-var array<string>|null
193
     */
194
    private static $ENCODINGS;
195
196
    /**
197
     * @var int[]|null
198
     *
199
     * @phpstan-var array<string ,int>|null
200
     */
201
    private static $ORD;
202
203
    /**
204
     * @var string[]|null
205
     *
206
     * @phpstan-var array<string, string>|null
207
     */
208
    private static $EMOJI;
209
210
    /**
211
     * @var string[]|null
212
     *
213
     * @phpstan-var array<string>|null
214
     */
215
    private static $EMOJI_VALUES_CACHE;
216
217
    /**
218
     * @var string[]|null
219
     *
220
     * @phpstan-var array<string>|null
221
     */
222
    private static $EMOJI_KEYS_CACHE;
223
224
    /**
225
     * @var string[]|null
226
     *
227
     * @phpstan-var array<string>|null
228
     */
229
    private static $EMOJI_KEYS_REVERSIBLE_CACHE;
230
231
    /**
232
     * @var string[]|null
233
     *
234
     * @phpstan-var array<int, string>|null
235
     */
236
    private static $CHR;
237
238
    /**
239
     * __construct()
240
     */
241 34
    public function __construct()
242
    {
243 34
    }
244
245
    /**
246
     * Return the character at the specified position: $str[1] like functionality.
247
     *
248
     * EXAMPLE: <code>UTF8::access('fòô', 1); // 'ò'</code>
249
     *
250
     * @param string $str      <p>A UTF-8 string.</p>
251
     * @param int    $pos      <p>The position of character to return.</p>
252
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
253
     *
254
     * @psalm-pure
255
     *
256
     * @return string
257
     *                <p>Single multi-byte character.</p>
258
     */
259 3
    public static function access(string $str, int $pos, string $encoding = 'UTF-8'): string
260
    {
261 3
        if ($str === '' || $pos < 0) {
262 2
            return '';
263
        }
264
265 3
        if ($encoding === 'UTF-8') {
266 3
            return (string) \mb_substr($str, $pos, 1);
267
        }
268
269
        return (string) self::substr($str, $pos, 1, $encoding);
270
    }
271
272
    /**
273
     * Prepends UTF-8 BOM character to the string and returns the whole string.
274
     *
275
     * INFO: If BOM already existed there, the Input string is returned.
276
     *
277
     * EXAMPLE: <code>UTF8::add_bom_to_string('fòô'); // "\xEF\xBB\xBF" . 'fòô'</code>
278
     *
279
     * @param string $str <p>The input string.</p>
280
     *
281
     * @psalm-pure
282
     *
283
     * @return string
284
     *                <p>The output string that contains BOM.</p>
285
     */
286 2
    public static function add_bom_to_string(string $str): string
287
    {
288 2
        if (!self::string_has_bom($str)) {
289 2
            $str = self::bom() . $str;
290
        }
291
292 2
        return $str;
293
    }
294
295
    /**
296
     * Changes all keys in an array.
297
     *
298
     * @param array<string, mixed> $array    <p>The array to work on</p>
299
     * @param int                  $case     [optional] <p> Either <strong>CASE_UPPER</strong><br>
300
     *                                       or <strong>CASE_LOWER</strong> (default)</p>
301
     * @param string               $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
302
     *
303
     * @psalm-pure
304
     *
305
     * @return string[]
306
     *                  <p>An array with its keys lower- or uppercased.</p>
307
     */
308 2
    public static function array_change_key_case(
309
        array $array,
310
        int $case = \CASE_LOWER,
311
        string $encoding = 'UTF-8'
312
    ): array {
313
        if (
314 2
            $case !== \CASE_LOWER
315
            &&
316 2
            $case !== \CASE_UPPER
317
        ) {
318
            $case = \CASE_LOWER;
319
        }
320
321 2
        $return = [];
322 2
        foreach ($array as $key => &$value) {
323 2
            $key = $case === \CASE_LOWER
324 2
                ? self::strtolower($key, $encoding)
325 2
                : self::strtoupper($key, $encoding);
326
327 2
            $return[$key] = $value;
328
        }
329
330 2
        return $return;
331
    }
332
333
    /**
334
     * Returns the substring between $start and $end, if found, or an empty
335
     * string. An optional offset may be supplied from which to begin the
336
     * search for the start string.
337
     *
338
     * @param string $str
339
     * @param string $start    <p>Delimiter marking the start of the substring.</p>
340
     * @param string $end      <p>Delimiter marking the end of the substring.</p>
341
     * @param int    $offset   [optional] <p>Index from which to begin the search. Default: 0</p>
342
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
343
     *
344
     * @psalm-pure
345
     *
346
     * @return string
347
     */
348 16
    public static function between(
349
        string $str,
350
        string $start,
351
        string $end,
352
        int $offset = 0,
353
        string $encoding = 'UTF-8'
354
    ): string {
355 16
        if ($encoding === 'UTF-8') {
356 8
            $start_position = \mb_strpos($str, $start, $offset);
357 8
            if ($start_position === false) {
358 1
                return '';
359
            }
360
361 7
            $substr_index = $start_position + (int) \mb_strlen($start);
362 7
            $end_position = \mb_strpos($str, $end, $substr_index);
363
            if (
364 7
                $end_position === false
365
                ||
366 7
                $end_position === $substr_index
367
            ) {
368 2
                return '';
369
            }
370
371 5
            return (string) \mb_substr($str, $substr_index, $end_position - $substr_index);
372
        }
373
374 8
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
375
376 8
        $start_position = self::strpos($str, $start, $offset, $encoding);
377 8
        if ($start_position === false) {
378 1
            return '';
379
        }
380
381 7
        $substr_index = $start_position + (int) self::strlen($start, $encoding);
382 7
        $end_position = self::strpos($str, $end, $substr_index, $encoding);
383
        if (
384 7
            $end_position === false
385
            ||
386 7
            $end_position === $substr_index
387
        ) {
388 2
            return '';
389
        }
390
391 5
        return (string) self::substr(
392 5
            $str,
393
            $substr_index,
394 5
            $end_position - $substr_index,
395
            $encoding
396
        );
397
    }
398
399
    /**
400
     * Convert binary into a string.
401
     *
402
     * INFO: opposite to UTF8::str_to_binary()
403
     *
404
     * EXAMPLE: <code>UTF8::binary_to_str('11110000100111111001100010000011'); // '😃'</code>
405
     *
406
     * @param string $bin 1|0
407
     *
408
     * @psalm-pure
409
     *
410
     * @return string
411
     */
412 2
    public static function binary_to_str($bin): string
413
    {
414 2
        if (!isset($bin[0])) {
415
            return '';
416
        }
417
418 2
        $convert = \base_convert($bin, 2, 16);
419 2
        if ($convert === '0') {
420 1
            return '';
421
        }
422
423 2
        return \pack('H*', $convert);
424
    }
425
426
    /**
427
     * Returns the UTF-8 Byte Order Mark Character.
428
     *
429
     * INFO: take a look at UTF8::$bom for e.g. UTF-16 and UTF-32 BOM values
430
     *
431
     * EXAMPLE: <code>UTF8::bom(); // "\xEF\xBB\xBF"</code>
432
     *
433
     * @psalm-pure
434
     *
435
     * @return string
436
     *                <p>UTF-8 Byte Order Mark.</p>
437
     */
438 4
    public static function bom(): string
439
    {
440 4
        return "\xef\xbb\xbf";
441
    }
442
443
    /**
444
     * @alias of UTF8::chr_map()
445
     *
446
     * @param callable $callback
447
     * @param string   $str
448
     *
449
     * @psalm-pure
450
     *
451
     * @return string[]
452
     *
453
     * @see   UTF8::chr_map()
454
     */
455 2
    public static function callback($callback, string $str): array
456
    {
457 2
        return self::chr_map($callback, $str);
458
    }
459
460
    /**
461
     * Returns the character at $index, with indexes starting at 0.
462
     *
463
     * @param string $str      <p>The input string.</p>
464
     * @param int    $index    <p>Position of the character.</p>
465
     * @param string $encoding [optional] <p>Default is UTF-8</p>
466
     *
467
     * @psalm-pure
468
     *
469
     * @return string
470
     *                <p>The character at $index.</p>
471
     */
472 9
    public static function char_at(string $str, int $index, string $encoding = 'UTF-8'): string
473
    {
474 9
        if ($encoding === 'UTF-8') {
475 5
            return (string) \mb_substr($str, $index, 1);
476
        }
477
478 4
        return (string) self::substr($str, $index, 1, $encoding);
479
    }
480
481
    /**
482
     * Returns an array consisting of the characters in the string.
483
     *
484
     * @param string $str <p>The input string.</p>
485
     *
486
     * @psalm-pure
487
     *
488
     * @return string[]
489
     *                  <p>An array of chars.</p>
490
     */
491 4
    public static function chars(string $str): array
492
    {
493
        /** @var string[] */
494 4
        return self::str_split($str);
0 ignored issues
show
Bug Best Practice introduced by
The expression return self::str_split($str) returns an array which contains values of type string[] which are incompatible with the documented value type string.
Loading history...
495
    }
496
497
    /**
498
     * This method will auto-detect your server environment for UTF-8 support.
499
     *
500
     * @return true|null
501
     *
502
     * @internal <p>You don't need to run it manually, it will be triggered if it's needed.</p>
503
     */
504 4
    public static function checkForSupport()
505
    {
506 4
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
507
            self::$SUPPORT['already_checked_via_portable_utf8'] = true;
508
509
            // http://php.net/manual/en/book.mbstring.php
510
            self::$SUPPORT['mbstring'] = self::mbstring_loaded();
511
512
            self::$SUPPORT['mbstring_func_overload'] = self::mbstring_overloaded();
513
            if (self::$SUPPORT['mbstring'] === true) {
514
                \mb_internal_encoding('UTF-8');
515
                \mb_regex_encoding('UTF-8');
516
                self::$SUPPORT['mbstring_internal_encoding'] = 'UTF-8';
517
            }
518
519
            // http://php.net/manual/en/book.iconv.php
520
            self::$SUPPORT['iconv'] = self::iconv_loaded();
521
522
            // http://php.net/manual/en/book.intl.php
523
            self::$SUPPORT['intl'] = self::intl_loaded();
524
525
            // http://php.net/manual/en/class.intlchar.php
526
            self::$SUPPORT['intlChar'] = self::intlChar_loaded();
527
528
            // http://php.net/manual/en/book.ctype.php
529
            self::$SUPPORT['ctype'] = self::ctype_loaded();
530
531
            // http://php.net/manual/en/class.finfo.php
532
            self::$SUPPORT['finfo'] = self::finfo_loaded();
533
534
            // http://php.net/manual/en/book.json.php
535
            self::$SUPPORT['json'] = self::json_loaded();
536
537
            // http://php.net/manual/en/book.pcre.php
538
            self::$SUPPORT['pcre_utf8'] = self::pcre_utf8_support();
539
540
            self::$SUPPORT['symfony_polyfill_used'] = self::symfony_polyfill_used();
541
            if (self::$SUPPORT['symfony_polyfill_used'] === true) {
542
                \mb_internal_encoding('UTF-8');
543
                self::$SUPPORT['mbstring_internal_encoding'] = 'UTF-8';
544
            }
545
546
            return true;
547
        }
548
549 4
        return null;
550
    }
551
552
    /**
553
     * Generates a UTF-8 encoded character from the given code point.
554
     *
555
     * INFO: opposite to UTF8::ord()
556
     *
557
     * EXAMPLE: <code>UTF8::chr(0x2603); // '☃'</code>
558
     *
559
     * @param int    $code_point <p>The code point for which to generate a character.</p>
560
     * @param string $encoding   [optional] <p>Default is UTF-8</p>
561
     *
562
     * @psalm-pure
563
     *
564
     * @return string|null
565
     *                     <p>Multi-byte character, returns null on failure or empty input.</p>
566
     */
567 21
    public static function chr($code_point, string $encoding = 'UTF-8')
568
    {
569
        // init
570
        /**
571
         * @psalm-suppress ImpureStaticVariable
572
         *
573
         * @var array<string,string>
574
         */
575 21
        static $CHAR_CACHE = [];
576
577 21
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
578 5
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
579
        }
580
581
        if (
582 21
            $encoding !== 'UTF-8'
583
            &&
584 21
            $encoding !== 'ISO-8859-1'
585
            &&
586 21
            $encoding !== 'WINDOWS-1252'
587
            &&
588 21
            self::$SUPPORT['mbstring'] === false
589
        ) {
590
            /**
591
             * @psalm-suppress ImpureFunctionCall - is is only a warning
592
             */
593
            \trigger_error('UTF8::chr() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
594
        }
595
596 21
        if (!\is_int($code_point) || $code_point <= 0) {
0 ignored issues
show
introduced by
The condition is_int($code_point) is always true.
Loading history...
597 5
            return null;
598
        }
599
600 21
        $cache_key = $code_point . '_' . $encoding;
601 21
        if (isset($CHAR_CACHE[$cache_key])) {
602 19
            return $CHAR_CACHE[$cache_key];
603
        }
604
605 10
        if ($code_point <= 0x80) { // only for "simple"-chars
606
607 9
            if (self::$CHR === null) {
608 1
                self::$CHR = self::getData('chr');
609
            }
610
611
            /**
612
             * @psalm-suppress PossiblyNullArrayAccess
613
             */
614 9
            $chr = self::$CHR[$code_point];
615
616 9
            if ($encoding !== 'UTF-8') {
617 1
                $chr = self::encode($encoding, $chr);
618
            }
619
620 9
            return $CHAR_CACHE[$cache_key] = $chr;
621
        }
622
623
        //
624
        // fallback via "IntlChar"
625
        //
626
627 6
        if (self::$SUPPORT['intlChar'] === true) {
628 6
            $chr = \IntlChar::chr($code_point);
629
630 6
            if ($encoding !== 'UTF-8') {
631
                $chr = self::encode($encoding, $chr);
632
            }
633
634 6
            return $CHAR_CACHE[$cache_key] = $chr;
635
        }
636
637
        //
638
        // fallback via vanilla php
639
        //
640
641
        if (self::$CHR === null) {
642
            self::$CHR = self::getData('chr');
643
        }
644
645
        $code_point = (int) $code_point;
646
        if ($code_point <= 0x7FF) {
647
            /**
648
             * @psalm-suppress PossiblyNullArrayAccess
649
             */
650
            $chr = self::$CHR[($code_point >> 6) + 0xC0] .
651
                   self::$CHR[($code_point & 0x3F) + 0x80];
652
        } elseif ($code_point <= 0xFFFF) {
653
            /**
654
             * @psalm-suppress PossiblyNullArrayAccess
655
             */
656
            $chr = self::$CHR[($code_point >> 12) + 0xE0] .
657
                   self::$CHR[(($code_point >> 6) & 0x3F) + 0x80] .
658
                   self::$CHR[($code_point & 0x3F) + 0x80];
659
        } else {
660
            /**
661
             * @psalm-suppress PossiblyNullArrayAccess
662
             */
663
            $chr = self::$CHR[($code_point >> 18) + 0xF0] .
664
                   self::$CHR[(($code_point >> 12) & 0x3F) + 0x80] .
665
                   self::$CHR[(($code_point >> 6) & 0x3F) + 0x80] .
666
                   self::$CHR[($code_point & 0x3F) + 0x80];
667
        }
668
669
        if ($encoding !== 'UTF-8') {
670
            $chr = self::encode($encoding, $chr);
671
        }
672
673
        return $CHAR_CACHE[$cache_key] = $chr;
674
    }
675
676
    /**
677
     * Applies callback to all characters of a string.
678
     *
679
     * EXAMPLE: <code>UTF8::chr_map([UTF8::class, 'strtolower'], 'Κόσμε'); // ['κ','ό', 'σ', 'μ', 'ε']</code>
680
     *
681
     * @param callable $callback <p>The callback function.</p>
682
     * @param string   $str      <p>UTF-8 string to run callback on.</p>
683
     *
684
     * @psalm-pure
685
     *
686
     * @return string[]
687
     *                  <p>The outcome of the callback, as array.</p>
688
     */
689 2
    public static function chr_map($callback, string $str): array
690
    {
691 2
        return \array_map(
692 2
            $callback,
693 2
            self::str_split($str)
694
        );
695
    }
696
697
    /**
698
     * Generates an array of byte length of each character of a Unicode string.
699
     *
700
     * 1 byte => U+0000  - U+007F
701
     * 2 byte => U+0080  - U+07FF
702
     * 3 byte => U+0800  - U+FFFF
703
     * 4 byte => U+10000 - U+10FFFF
704
     *
705
     * EXAMPLE: <code>UTF8::chr_size_list('中文空白-test'); // [3, 3, 3, 3, 1, 1, 1, 1, 1]</code>
706
     *
707
     * @param string $str <p>The original unicode string.</p>
708
     *
709
     * @psalm-pure
710
     *
711
     * @return int[]
712
     *               <p>An array of byte lengths of each character.</p>
713
     */
714 4
    public static function chr_size_list(string $str): array
715
    {
716 4
        if ($str === '') {
717 4
            return [];
718
        }
719
720 4
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
721
            return \array_map(
722
                static function (string $data): int {
723
                    // "mb_" is available if overload is used, so use it ...
724
                    return \mb_strlen($data, 'CP850'); // 8-BIT
725
                },
726
                self::str_split($str)
727
            );
728
        }
729
730 4
        return \array_map('\strlen', self::str_split($str));
731
    }
732
733
    /**
734
     * Get a decimal code representation of a specific character.
735
     *
736
     * INFO: opposite to UTF8::decimal_to_chr()
737
     *
738
     * EXAMPLE: <code>UTF8::chr_to_decimal('§'); // 0xa7</code>
739
     *
740
     * @param string $char <p>The input character.</p>
741
     *
742
     * @psalm-pure
743
     *
744
     * @return int
745
     */
746 5
    public static function chr_to_decimal(string $char): int
747
    {
748 5
        if (self::$SUPPORT['iconv'] === true) {
749 5
            $chr_tmp = \iconv('UTF-8', 'UCS-4LE', $char);
750 5
            if ($chr_tmp !== false) {
751
                /** @phpstan-ignore-next-line - "unpack": only false if the format string contains errors */
752 5
                return \unpack('V', $chr_tmp)[1];
753
            }
754
        }
755
756
        $code = self::ord($char[0]);
757
        $bytes = 1;
758
759
        if (!($code & 0x80)) {
760
            // 0xxxxxxx
761
            return $code;
762
        }
763
764
        if (($code & 0xe0) === 0xc0) {
765
            // 110xxxxx
766
            $bytes = 2;
767
            $code &= ~0xc0;
768
        } elseif (($code & 0xf0) === 0xe0) {
769
            // 1110xxxx
770
            $bytes = 3;
771
            $code &= ~0xe0;
772
        } elseif (($code & 0xf8) === 0xf0) {
773
            // 11110xxx
774
            $bytes = 4;
775
            $code &= ~0xf0;
776
        }
777
778
        for ($i = 2; $i <= $bytes; ++$i) {
779
            // 10xxxxxx
780
            $code = ($code << 6) + (self::ord($char[$i - 1]) & ~0x80);
781
        }
782
783
        return $code;
784
    }
785
786
    /**
787
     * Get hexadecimal code point (U+xxxx) of a UTF-8 encoded character.
788
     *
789
     * EXAMPLE: <code>UTF8::chr_to_hex('§'); // U+00a7</code>
790
     *
791
     * @param int|string $char   <p>The input character</p>
792
     * @param string     $prefix [optional]
793
     *
794
     * @psalm-pure
795
     *
796
     * @return string
797
     *                <p>The code point encoded as U+xxxx.</p>
798
     */
799 2
    public static function chr_to_hex($char, string $prefix = 'U+'): string
800
    {
801 2
        if ($char === '') {
802 2
            return '';
803
        }
804
805 2
        if ($char === '&#0;') {
806
            $char = '';
807
        }
808
809 2
        return self::int_to_hex(self::ord((string) $char), $prefix);
810
    }
811
812
    /**
813
     * Splits a string into smaller chunks and multiple lines, using the specified line ending character.
814
     *
815
     * EXAMPLE: <code>UTF8::chunk_split('ABC-ÖÄÜ-中文空白-κόσμε', 3); // "ABC\r\n-ÖÄ\r\nÜ-中\r\n文空白\r\n-κό\r\nσμε"</code>
816
     *
817
     * @param string $body         <p>The original string to be split.</p>
818
     * @param int    $chunk_length [optional] <p>The maximum character length of a chunk.</p>
819
     * @param string $end          [optional] <p>The character(s) to be inserted at the end of each chunk.</p>
820
     *
821
     * @psalm-pure
822
     *
823
     * @return string
824
     *                <p>The chunked string.</p>
825
     */
826 4
    public static function chunk_split(string $body, int $chunk_length = 76, string $end = "\r\n"): string
827
    {
828 4
        return \implode($end, self::str_split($body, $chunk_length));
829
    }
830
831
    /**
832
     * Accepts a string and removes all non-UTF-8 characters from it + extras if needed.
833
     *
834
     * EXAMPLE: <code>UTF8::clean("\xEF\xBB\xBF„Abcdef\xc2\xa0\x20…” — 😃 - Düsseldorf", true, true); // '„Abcdef  …” — 😃 - Düsseldorf'</code>
835
     *
836
     * @param string $str                                     <p>The string to be sanitized.</p>
837
     * @param bool   $remove_bom                              [optional] <p>Set to true, if you need to remove
838
     *                                                        UTF-BOM.</p>
839
     * @param bool   $normalize_whitespace                    [optional] <p>Set to true, if you need to normalize the
840
     *                                                        whitespace.</p>
841
     * @param bool   $normalize_msword                        [optional] <p>Set to true, if you need to normalize MS
842
     *                                                        Word chars e.g.: "…"
843
     *                                                        => "..."</p>
844
     * @param bool   $keep_non_breaking_space                 [optional] <p>Set to true, to keep non-breaking-spaces,
845
     *                                                        in
846
     *                                                        combination with
847
     *                                                        $normalize_whitespace</p>
848
     * @param bool   $replace_diamond_question_mark           [optional] <p>Set to true, if you need to remove diamond
849
     *                                                        question mark e.g.: "�"</p>
850
     * @param bool   $remove_invisible_characters             [optional] <p>Set to false, if you not want to remove
851
     *                                                        invisible characters e.g.: "\0"</p>
852
     * @param bool   $remove_invisible_characters_url_encoded [optional] <p>Set to true, if you not want to remove
853
     *                                                        invisible url encoded characters e.g.: "%0B"<br> WARNING:
854
     *                                                        maybe contains false-positives e.g. aa%0Baa -> aaaa.
855
     *                                                        </p>
856
     *
857
     * @psalm-pure
858
     *
859
     * @return string
860
     *                <p>An clean UTF-8 encoded string.</p>
861
     */
862 90
    public static function clean(
863
        string $str,
864
        bool $remove_bom = false,
865
        bool $normalize_whitespace = false,
866
        bool $normalize_msword = false,
867
        bool $keep_non_breaking_space = false,
868
        bool $replace_diamond_question_mark = false,
869
        bool $remove_invisible_characters = true,
870
        bool $remove_invisible_characters_url_encoded = false
871
    ): string {
872
        // http://stackoverflow.com/questions/1401317/remove-non-utf8-characters-from-string
873
        // caused connection reset problem on larger strings
874
875 90
        $regex = '/
876
          (
877
            (?: [\x00-\x7F]               # single-byte sequences   0xxxxxxx
878
            |   [\xC0-\xDF][\x80-\xBF]    # double-byte sequences   110xxxxx 10xxxxxx
879
            |   [\xE0-\xEF][\x80-\xBF]{2} # triple-byte sequences   1110xxxx 10xxxxxx * 2
880
            |   [\xF0-\xF7][\x80-\xBF]{3} # quadruple-byte sequence 11110xxx 10xxxxxx * 3
881
            ){1,100}                      # ...one or more times
882
          )
883
        | ( [\x80-\xBF] )                 # invalid byte in range 10000000 - 10111111
884
        | ( [\xC0-\xFF] )                 # invalid byte in range 11000000 - 11111111
885
        /x';
886 90
        $str = (string) \preg_replace($regex, '$1', $str);
887
888 90
        if ($replace_diamond_question_mark) {
889 33
            $str = self::replace_diamond_question_mark($str);
890
        }
891
892 90
        if ($remove_invisible_characters) {
893 90
            $str = self::remove_invisible_characters($str, $remove_invisible_characters_url_encoded);
894
        }
895
896 90
        if ($normalize_whitespace) {
897 37
            $str = self::normalize_whitespace($str, $keep_non_breaking_space);
898
        }
899
900 90
        if ($normalize_msword) {
901 4
            $str = self::normalize_msword($str);
902
        }
903
904 90
        if ($remove_bom) {
905 37
            $str = self::remove_bom($str);
906
        }
907
908 90
        return $str;
909
    }
910
911
    /**
912
     * Clean-up a string and show only printable UTF-8 chars at the end  + fix UTF-8 encoding.
913
     *
914
     * EXAMPLE: <code>UTF8::cleanup("\xEF\xBB\xBF„Abcdef\xc2\xa0\x20…” — 😃 - Düsseldorf", true, true); // '„Abcdef  …” — 😃 - Düsseldorf'</code>
915
     *
916
     * @param string $str <p>The input string.</p>
917
     *
918
     * @psalm-pure
919
     *
920
     * @return string
921
     */
922 33
    public static function cleanup($str): string
923
    {
924
        // init
925 33
        $str = (string) $str;
926
927 33
        if ($str === '') {
928 5
            return '';
929
        }
930
931
        // fixed ISO <-> UTF-8 Errors
932 33
        $str = self::fix_simple_utf8($str);
933
934
        // remove all none UTF-8 symbols
935
        // && remove diamond question mark (�)
936
        // && remove remove invisible characters (e.g. "\0")
937
        // && remove BOM
938
        // && normalize whitespace chars (but keep non-breaking-spaces)
939 33
        return self::clean(
940 33
            $str,
941 33
            true,
942 33
            true,
943 33
            false,
944 33
            true,
945 33
            true
946
        );
947
    }
948
949
    /**
950
     * Accepts a string or a array of strings and returns an array of Unicode code points.
951
     *
952
     * INFO: opposite to UTF8::string()
953
     *
954
     * EXAMPLE: <code>
955
     * UTF8::codepoints('κöñ'); // array(954, 246, 241)
956
     * // ... OR ...
957
     * UTF8::codepoints('κöñ', true); // array('U+03ba', 'U+00f6', 'U+00f1')
958
     * </code>
959
     *
960
     * @param string|string[] $arg         <p>A UTF-8 encoded string or an array of such strings.</p>
961
     * @param bool            $use_u_style <p>If True, will return code points in U+xxxx format,
962
     *                                     default, code points will be returned as integers.</p>
963
     *
964
     * @psalm-pure
965
     *
966
     * @return int[]|string[]
967
     *                        <p>
968
     *                        The array of code points:<br>
969
     *                        int[] for $u_style === false<br>
970
     *                        string[] for $u_style === true<br>
971
     *                        </p>
972
     */
973 12
    public static function codepoints($arg, bool $use_u_style = false): array
974
    {
975 12
        if (\is_string($arg)) {
976 12
            $arg = self::str_split($arg);
977
        }
978
979
        /**
980
         * @psalm-suppress DocblockTypeContradiction
981
         */
982 12
        if (!\is_array($arg)) {
0 ignored issues
show
introduced by
The condition is_array($arg) is always true.
Loading history...
983 4
            return [];
984
        }
985
986 12
        if ($arg === []) {
987 7
            return [];
988
        }
989
990 11
        $arg = \array_map(
991
            [
992 11
                self::class,
993
                'ord',
994
            ],
995 11
            $arg
996
        );
997
998 11
        if ($use_u_style) {
999 2
            $arg = \array_map(
1000
                [
1001 2
                    self::class,
1002
                    'int_to_hex',
1003
                ],
1004 2
                $arg
1005
            );
1006
        }
1007
1008 11
        return $arg;
1009
    }
1010
1011
    /**
1012
     * Trims the string and replaces consecutive whitespace characters with a
1013
     * single space. This includes tabs and newline characters, as well as
1014
     * multibyte whitespace such as the thin space and ideographic space.
1015
     *
1016
     * @param string $str <p>The input string.</p>
1017
     *
1018
     * @psalm-pure
1019
     *
1020
     * @return string
1021
     *                <p>A string with trimmed $str and condensed whitespace.</p>
1022
     */
1023 13
    public static function collapse_whitespace(string $str): string
1024
    {
1025 13
        if (self::$SUPPORT['mbstring'] === true) {
1026 13
            return \trim((string) \mb_ereg_replace('[[:space:]]+', ' ', $str));
1027
        }
1028
1029
        return \trim(self::regex_replace($str, '[[:space:]]+', ' '));
1030
    }
1031
1032
    /**
1033
     * Returns count of characters used in a string.
1034
     *
1035
     * EXAMPLE: <code>UTF8::count_chars('κaκbκc'); // array('κ' => 3, 'a' => 1, 'b' => 1, 'c' => 1)</code>
1036
     *
1037
     * @param string $str                     <p>The input string.</p>
1038
     * @param bool   $clean_utf8              [optional] <p>Remove non UTF-8 chars from the string.</p>
1039
     * @param bool   $try_to_use_mb_functions [optional] <p>Set to false, if you don't want to use
1040
     *
1041
     * @psalm-pure
1042
     *
1043
     * @return int[]
1044
     *               <p>An associative array of Character as keys and
1045
     *               their count as values.</p>
1046
     */
1047 19
    public static function count_chars(
1048
        string $str,
1049
        bool $clean_utf8 = false,
1050
        bool $try_to_use_mb_functions = true
1051
    ): array {
1052 19
        return \array_count_values(
1053 19
            self::str_split(
1054 19
                $str,
1055 19
                1,
1056
                $clean_utf8,
1057
                $try_to_use_mb_functions
1058
            )
1059
        );
1060
    }
1061
1062
    /**
1063
     * Create a valid CSS identifier for e.g. "class"- or "id"-attributes.
1064
     *
1065
     * EXAMPLE: <code>UTF8::css_identifier('123foo/bar!!!'); // _23foo-bar</code>
1066
     *
1067
     * copy&past from https://github.com/drupal/core/blob/8.8.x/lib/Drupal/Component/Utility/Html.php#L95
1068
     *
1069
     * @param string   $str        <p>INFO: if no identifier is given e.g. " " or "", we will create a unique string automatically</p>
1070
     * @param string[] $filter
1071
     * @param bool     $strip_tags
1072
     * @param bool     $strtolower
1073
     *
1074
     * @psalm-pure
1075
     *
1076
     * @return string
1077
     *
1078
     * @phpstan-param array<string,string> $filter
1079
     */
1080 1
    public static function css_identifier(
1081
        string $str = '',
1082
        array $filter = [
1083
            ' ' => '-',
1084
            '/' => '-',
1085
            '[' => '',
1086
            ']' => '',
1087
        ],
1088
        bool $strip_tags = false,
1089
        bool $strtolower = true
1090
    ): string {
1091
        // We could also use strtr() here but its much slower than str_replace(). In
1092
        // order to keep '__' to stay '__' we first replace it with a different
1093
        // placeholder after checking that it is not defined as a filter.
1094 1
        $double_underscore_replacements = 0;
1095
1096
        // Fallback ...
1097 1
        if (\trim($str) === '') {
1098 1
            $str = \uniqid('auto-generated-css-class', true);
1099
        } else {
1100 1
            $str = self::clean($str);
1101
        }
1102
1103 1
        if ($strip_tags) {
1104
            $str = \strip_tags($str);
1105
        }
1106
1107 1
        if ($strtolower) {
1108 1
            $str = \strtolower($str);
1109
        }
1110
1111 1
        if (!isset($filter['__'])) {
1112 1
            $str = \str_replace('__', '##', $str, $double_underscore_replacements);
1113
        }
1114
1115 1
        $str = \str_replace(\array_keys($filter), \array_values($filter), $str);
1116
        // Replace temporary placeholder '##' with '__' only if the original
1117
        // $identifier contained '__'.
1118 1
        if ($double_underscore_replacements > 0) {
1119
            $str = \str_replace('##', '__', $str);
1120
        }
1121
1122
        // Valid characters in a CSS identifier are:
1123
        // - the hyphen (U+002D)
1124
        // - a-z (U+0030 - U+0039)
1125
        // - A-Z (U+0041 - U+005A)
1126
        // - the underscore (U+005F)
1127
        // - 0-9 (U+0061 - U+007A)
1128
        // - ISO 10646 characters U+00A1 and higher
1129
        // We strip out any character not in the above list.
1130 1
        $str = (string) \preg_replace('/[^\x{002D}\x{0030}-\x{0039}\x{0041}-\x{005A}\x{005F}\x{0061}-\x{007A}\x{00A1}-\x{FFFF}]/u', '', $str);
1131
        // Identifiers cannot start with a digit, two hyphens, or a hyphen followed by a digit.
1132 1
        $str = (string) \preg_replace(['/^[0-9]/', '/^(-[0-9])|^(--)/'], ['_', '__'], $str);
1133
1134 1
        return \trim($str, '-');
1135
    }
1136
1137
    /**
1138
     * Remove css media-queries.
1139
     *
1140
     * @param string $str
1141
     *
1142
     * @psalm-pure
1143
     *
1144
     * @return string
1145
     */
1146 1
    public static function css_stripe_media_queries(string $str): string
1147
    {
1148 1
        return (string) \preg_replace(
1149 1
            '#@media\\s+(?:only\\s)?(?:[\\s{(]|screen|all)\\s?[^{]+{.*}\\s*}\\s*#isumU',
1150 1
            '',
1151 1
            $str
1152
        );
1153
    }
1154
1155
    /**
1156
     * Checks whether ctype is available on the server.
1157
     *
1158
     * @psalm-pure
1159
     *
1160
     * @return bool
1161
     *              <p><strong>true</strong> if available, <strong>false</strong> otherwise</p>
1162
     *
1163
     * @internal <p>Please do not use it anymore, we will make is private in next major version.</p>
1164
     */
1165
    public static function ctype_loaded(): bool
1166
    {
1167
        return \extension_loaded('ctype');
1168
    }
1169
1170
    /**
1171
     * Converts an int value into a UTF-8 character.
1172
     *
1173
     * INFO: opposite to UTF8::string()
1174
     *
1175
     * EXAMPLE: <code>UTF8::decimal_to_chr(931); // 'Σ'</code>
1176
     *
1177
     * @param int|string $int
1178
     *
1179
     * @phpstan-param int|numeric-string $int
1180
     *
1181
     * @psalm-pure
1182
     *
1183
     * @return string
1184
     */
1185 20
    public static function decimal_to_chr($int): string
1186
    {
1187
        // We cannot use html_entity_decode() here, as it will not return
1188
        // characters for many values < 160.
1189 20
        return mb_convert_encoding('&#' . $int . ';', 'UTF-8', 'HTML-ENTITIES');
0 ignored issues
show
Bug Best Practice introduced by
The expression return mb_convert_encodi...TF-8', 'HTML-ENTITIES') could return the type array which is incompatible with the type-hinted return string. Consider adding an additional type-check to rule them out.
Loading history...
1190
    }
1191
1192
    /**
1193
     * Decodes a MIME header field
1194
     *
1195
     * @param string $str
1196
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
1197
     *
1198
     * @psalm-pure
1199
     *
1200
     * @return false|string
1201
     *                      <p>A decoded MIME field on success,
1202
     *                      or false if an error occurs during the decoding.</p>
1203
     */
1204 2
    public static function decode_mimeheader($str, string $encoding = 'UTF-8')
1205
    {
1206 2
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
1207 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
1208
        }
1209
1210
        // always fallback via symfony polyfill
1211 2
        return \iconv_mime_decode($str, \ICONV_MIME_DECODE_CONTINUE_ON_ERROR, $encoding);
1212
    }
1213
1214
    /**
1215
     * Convert any two-letter country code (ISO 3166-1) to the corresponding Emoji.
1216
     *
1217
     * @see https://en.wikipedia.org/wiki/ISO_3166-1
1218
     *
1219
     * @param string $country_code_iso_3166_1 <p>e.g. DE</p>
1220
     *
1221
     * @return string
1222
     *                <p>Emoji or empty string on error.</p>
1223
     */
1224 1
    public static function emoji_from_country_code(string $country_code_iso_3166_1): string
1225
    {
1226 1
        if ($country_code_iso_3166_1 === '') {
1227 1
            return '';
1228
        }
1229
1230 1
        if (self::strlen($country_code_iso_3166_1) !== 2) {
1231 1
            return '';
1232
        }
1233
1234 1
        $country_code_iso_3166_1 = \strtoupper($country_code_iso_3166_1);
1235
1236 1
        $flagOffset = 0x1F1E6;
1237 1
        $asciiOffset = 0x41;
1238
1239 1
        return (self::chr((self::ord($country_code_iso_3166_1[0]) - $asciiOffset + $flagOffset)) ?? '') .
1240 1
               (self::chr((self::ord($country_code_iso_3166_1[1]) - $asciiOffset + $flagOffset)) ?? '');
1241
    }
1242
1243
    /**
1244
     * Decodes a string which was encoded by "UTF8::emoji_encode()".
1245
     *
1246
     * INFO: opposite to UTF8::emoji_encode()
1247
     *
1248
     * EXAMPLE: <code>
1249
     * UTF8::emoji_decode('foo CHARACTER_OGRE', false); // 'foo 👹'
1250
     * //
1251
     * UTF8::emoji_decode('foo _-_PORTABLE_UTF8_-_308095726_-_627590803_-_8FTU_ELBATROP_-_', true); // 'foo 👹'
1252
     * </code>
1253
     *
1254
     * @param string $str                            <p>The input string.</p>
1255
     * @param bool   $use_reversible_string_mappings [optional] <p>
1256
     *                                               When <b>TRUE</b>, we se a reversible string mapping
1257
     *                                               between "emoji_encode" and "emoji_decode".</p>
1258
     *
1259
     * @psalm-pure
1260
     *
1261
     * @return string
1262
     */
1263 9
    public static function emoji_decode(
1264
        string $str,
1265
        bool $use_reversible_string_mappings = false
1266
    ): string {
1267 9
        if (self::$EMOJI_KEYS_CACHE === null) {
1268
            /** @phpstan-ignore-next-line - we need to load the data first */
1269
            self::initEmojiData();
1270
        }
1271
1272 9
        if ($use_reversible_string_mappings) {
1273 9
            return (string) \str_replace(
1274 9
                (array) self::$EMOJI_KEYS_REVERSIBLE_CACHE,
1275 9
                (array) self::$EMOJI_VALUES_CACHE,
1276 9
                $str
1277
            );
1278
        }
1279
1280 1
        return (string) \str_replace(
1281 1
            (array) self::$EMOJI_KEYS_CACHE,
1282 1
            (array) self::$EMOJI_VALUES_CACHE,
1283 1
            $str
1284
        );
1285
    }
1286
1287
    /**
1288
     * Encode a string with emoji chars into a non-emoji string.
1289
     *
1290
     * INFO: opposite to UTF8::emoji_decode()
1291
     *
1292
     * EXAMPLE: <code>
1293
     * UTF8::emoji_encode('foo 👹', false)); // 'foo CHARACTER_OGRE'
1294
     * //
1295
     * UTF8::emoji_encode('foo 👹', true)); // 'foo _-_PORTABLE_UTF8_-_308095726_-_627590803_-_8FTU_ELBATROP_-_'
1296
     * </code>
1297
     *
1298
     * @param string $str                            <p>The input string</p>
1299
     * @param bool   $use_reversible_string_mappings [optional] <p>
1300
     *                                               when <b>TRUE</b>, we use a reversible string mapping
1301
     *                                               between "emoji_encode" and "emoji_decode"</p>
1302
     *
1303
     * @psalm-pure
1304
     *
1305
     * @return string
1306
     */
1307 12
    public static function emoji_encode(
1308
        string $str,
1309
        bool $use_reversible_string_mappings = false
1310
    ): string {
1311 12
        if (self::$EMOJI_KEYS_CACHE === null) {
1312
            /** @phpstan-ignore-next-line - we need to load the data first */
1313 1
            self::initEmojiData();
1314
        }
1315
1316 12
        if ($use_reversible_string_mappings) {
1317 9
            return (string) \str_replace(
1318 9
                (array) self::$EMOJI_VALUES_CACHE,
1319 9
                (array) self::$EMOJI_KEYS_REVERSIBLE_CACHE,
1320 9
                $str
1321
            );
1322
        }
1323
1324 4
        return (string) \str_replace(
1325 4
            (array) self::$EMOJI_VALUES_CACHE,
1326 4
            (array) self::$EMOJI_KEYS_CACHE,
1327 4
            $str
1328
        );
1329
    }
1330
1331
    /**
1332
     * Encode a string with a new charset-encoding.
1333
     *
1334
     * INFO:  This function will also try to fix broken / double encoding,
1335
     *        so you can call this function also on a UTF-8 string and you don't mess up the string.
1336
     *
1337
     * EXAMPLE: <code>
1338
     * UTF8::encode('ISO-8859-1', '-ABC-中文空白-'); // '-ABC-????-'
1339
     * //
1340
     * UTF8::encode('UTF-8', '-ABC-中文空白-'); // '-ABC-中文空白-'
1341
     * //
1342
     * UTF8::encode('HTML', '-ABC-中文空白-'); // '-ABC-&#20013;&#25991;&#31354;&#30333;-'
1343
     * //
1344
     * UTF8::encode('BASE64', '-ABC-中文空白-'); // 'LUFCQy3kuK3mlofnqbrnmb0t'
1345
     * </code>
1346
     *
1347
     * @param string $to_encoding                   <p>e.g. 'UTF-16', 'UTF-8', 'ISO-8859-1', etc.</p>
1348
     * @param string $str                           <p>The input string</p>
1349
     * @param bool   $auto_detect_the_from_encoding [optional] <p>Force the new encoding (we try to fix broken / double
1350
     *                                              encoding for UTF-8)<br> otherwise we auto-detect the current
1351
     *                                              string-encoding</p>
1352
     * @param string $from_encoding                 [optional] <p>e.g. 'UTF-16', 'UTF-8', 'ISO-8859-1', etc.<br>
1353
     *                                              A empty string will trigger the autodetect anyway.</p>
1354
     *
1355
     * @psalm-pure
1356
     *
1357
     * @return string
1358
     *
1359
     * @psalm-suppress InvalidReturnStatement
1360
     */
1361 28
    public static function encode(
1362
        string $to_encoding,
1363
        string $str,
1364
        bool $auto_detect_the_from_encoding = true,
1365
        string $from_encoding = ''
1366
    ): string {
1367 28
        if ($str === '' || $to_encoding === '') {
1368 13
            return $str;
1369
        }
1370
1371 28
        if ($to_encoding !== 'UTF-8' && $to_encoding !== 'CP850') {
1372 7
            $to_encoding = self::normalize_encoding($to_encoding, 'UTF-8');
1373
        }
1374
1375 28
        if ($from_encoding && $from_encoding !== 'UTF-8' && $from_encoding !== 'CP850') {
1376 2
            $from_encoding = self::normalize_encoding($from_encoding);
1377
        }
1378
1379
        if (
1380 28
            $to_encoding
1381
            &&
1382 28
            $from_encoding
1383
            &&
1384 28
            $from_encoding === $to_encoding
1385
        ) {
1386
            return $str;
1387
        }
1388
1389 28
        if ($to_encoding === 'JSON') {
1390 1
            $return = self::json_encode($str);
1391 1
            if ($return === false) {
1392
                throw new \InvalidArgumentException('The input string [' . $str . '] can not be used for json_encode().');
1393
            }
1394
1395 1
            return $return;
1396
        }
1397 28
        if ($from_encoding === 'JSON') {
1398 1
            $str = self::json_decode($str);
1399 1
            $from_encoding = '';
1400
        }
1401
1402 28
        if ($to_encoding === 'BASE64') {
1403 2
            return \base64_encode($str);
1404
        }
1405 28
        if ($from_encoding === 'BASE64') {
1406 2
            $str = \base64_decode($str, true);
1407 2
            $from_encoding = '';
1408
        }
1409
1410 28
        if ($to_encoding === 'HTML-ENTITIES') {
1411 2
            return self::html_encode($str, true);
1412
        }
1413 28
        if ($from_encoding === 'HTML-ENTITIES') {
1414 2
            $str = self::html_entity_decode($str, \ENT_COMPAT);
1415 2
            $from_encoding = '';
1416
        }
1417
1418 28
        $from_encoding_auto_detected = false;
1419
        if (
1420 28
            $auto_detect_the_from_encoding
1421
            ||
1422 28
            !$from_encoding
1423
        ) {
1424 28
            $from_encoding_auto_detected = self::str_detect_encoding($str);
1425
        }
1426
1427
        // DEBUG
1428
        //var_dump($to_encoding, $from_encoding, $from_encoding_auto_detected, $str, "\n\n");
1429
1430 28
        if ($from_encoding_auto_detected !== false) {
1431 25
            $from_encoding = $from_encoding_auto_detected;
1432 6
        } elseif ($auto_detect_the_from_encoding) {
1433
            // fallback for the "autodetect"-mode
1434 6
            return self::to_utf8($str);
1435
        }
1436
1437
        if (
1438 25
            !$from_encoding
1439
            ||
1440 25
            $from_encoding === $to_encoding
1441
        ) {
1442 15
            return $str;
1443
        }
1444
1445
        if (
1446 20
            $to_encoding === 'UTF-8'
1447
            &&
1448
            (
1449 18
                $from_encoding === 'WINDOWS-1252'
1450
                ||
1451 20
                $from_encoding === 'ISO-8859-1'
1452
            )
1453
        ) {
1454 14
            return self::to_utf8($str);
1455
        }
1456
1457
        if (
1458 12
            $to_encoding === 'ISO-8859-1'
1459
            &&
1460
            (
1461 6
                $from_encoding === 'WINDOWS-1252'
1462
                ||
1463 12
                $from_encoding === 'UTF-8'
1464
            )
1465
        ) {
1466 6
            return self::to_iso8859($str);
1467
        }
1468
1469
        if (
1470 10
            $to_encoding !== 'UTF-8'
1471
            &&
1472 10
            $to_encoding !== 'ISO-8859-1'
1473
            &&
1474 10
            $to_encoding !== 'WINDOWS-1252'
1475
            &&
1476 10
            self::$SUPPORT['mbstring'] === false
1477
        ) {
1478
            /**
1479
             * @psalm-suppress ImpureFunctionCall - is is only a warning
1480
             */
1481
            \trigger_error('UTF8::encode() without mbstring cannot handle "' . $to_encoding . '" encoding', \E_USER_WARNING);
1482
        }
1483
1484 10
        if (self::$SUPPORT['mbstring'] === true) {
1485 10
            $str_encoded = \mb_convert_encoding(
1486 10
                $str,
1487 10
                $to_encoding,
1488 10
                $from_encoding
1489
            );
1490
1491 10
            if ($str_encoded) {
1492
                \assert(\is_string($str_encoded));
1493
1494 10
                return $str_encoded;
1495
            }
1496
        }
1497
1498
        /** @noinspection PhpUsageOfSilenceOperatorInspection - Detected an incomplete multibyte character in input string */
1499
        $return = @\iconv($from_encoding, $to_encoding, $str);
1500
        if ($return !== false) {
1501
            return $return;
1502
        }
1503
1504
        return $str;
1505
    }
1506
1507
    /**
1508
     * @param string $str
1509
     * @param string $from_charset      [optional] <p>Set the input charset.</p>
1510
     * @param string $to_charset        [optional] <p>Set the output charset.</p>
1511
     * @param string $transfer_encoding [optional] <p>Set the transfer encoding.</p>
1512
     * @param string $linefeed          [optional] <p>Set the used linefeed.</p>
1513
     * @param int    $indent            [optional] <p>Set the max length indent.</p>
1514
     *
1515
     * @psalm-pure
1516
     *
1517
     * @return false|string
1518
     *                      <p>An encoded MIME field on success,
1519
     *                      or false if an error occurs during the encoding.</p>
1520
     */
1521 1
    public static function encode_mimeheader(
1522
        string $str,
1523
        string $from_charset = 'UTF-8',
1524
        string $to_charset = 'UTF-8',
1525
        string $transfer_encoding = 'Q',
1526
        string $linefeed = "\r\n",
1527
        int $indent = 76
1528
    ) {
1529 1
        if ($from_charset !== 'UTF-8' && $from_charset !== 'CP850') {
1530
            $from_charset = self::normalize_encoding($from_charset, 'UTF-8');
1531
        }
1532
1533 1
        if ($to_charset !== 'UTF-8' && $to_charset !== 'CP850') {
1534 1
            $to_charset = self::normalize_encoding($to_charset, 'UTF-8');
1535
        }
1536
1537
        // always fallback via symfony polyfill
1538 1
        return \iconv_mime_encode(
1539 1
            '',
1540 1
            $str,
1541
            [
1542 1
                'scheme'           => $transfer_encoding,
1543 1
                'line-length'      => $indent,
1544 1
                'input-charset'    => $from_charset,
1545 1
                'output-charset'   => $to_charset,
1546 1
                'line-break-chars' => $linefeed,
1547
            ]
1548
        );
1549
    }
1550
1551
    /**
1552
     * Create an extract from a sentence, so if the search-string was found, it try to centered in the output.
1553
     *
1554
     * @param string   $str                       <p>The input string.</p>
1555
     * @param string   $search                    <p>The searched string.</p>
1556
     * @param int|null $length                    [optional] <p>Default: null === text->length / 2</p>
1557
     * @param string   $replacer_for_skipped_text [optional] <p>Default: …</p>
1558
     * @param string   $encoding                  [optional] <p>Set the charset for e.g. "mb_" function</p>
1559
     *
1560
     * @psalm-pure
1561
     *
1562
     * @return string
1563
     */
1564 1
    public static function extract_text(
1565
        string $str,
1566
        string $search = '',
1567
        int $length = null,
1568
        string $replacer_for_skipped_text = '…',
1569
        string $encoding = 'UTF-8'
1570
    ): string {
1571 1
        if ($str === '') {
1572 1
            return '';
1573
        }
1574
1575 1
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
1576
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
1577
        }
1578
1579 1
        $trim_chars = "\t\r\n -_()!~?=+/*\\,.:;\"'[]{}`&";
1580
1581 1
        if ($length === null) {
1582 1
            $length = (int) \round((int) self::strlen($str, $encoding) / 2);
1583
        }
1584
1585 1
        if ($search === '') {
1586 1
            if ($encoding === 'UTF-8') {
1587 1
                if ($length > 0) {
1588 1
                    $string_length = (int) \mb_strlen($str);
1589 1
                    $end = ($length - 1) > $string_length ? $string_length : ($length - 1);
1590
                } else {
1591 1
                    $end = 0;
1592
                }
1593
1594 1
                $pos = (int) \min(
1595 1
                    \mb_strpos($str, ' ', $end),
1596 1
                    \mb_strpos($str, '.', $end)
1597
                );
1598
            } else {
1599
                if ($length > 0) {
1600
                    $string_length = (int) self::strlen($str, $encoding);
1601
                    $end = ($length - 1) > $string_length ? $string_length : ($length - 1);
1602
                } else {
1603
                    $end = 0;
1604
                }
1605
1606
                $pos = (int) \min(
1607
                    self::strpos($str, ' ', $end, $encoding),
1608
                    self::strpos($str, '.', $end, $encoding)
1609
                );
1610
            }
1611
1612 1
            if ($pos) {
1613 1
                if ($encoding === 'UTF-8') {
1614 1
                    $str_sub = \mb_substr($str, 0, $pos);
1615
                } else {
1616
                    $str_sub = self::substr($str, 0, $pos, $encoding);
1617
                }
1618
1619 1
                if ($str_sub === false) {
1620
                    return '';
1621
                }
1622
1623 1
                return \rtrim($str_sub, $trim_chars) . $replacer_for_skipped_text;
1624
            }
1625
1626
            return $str;
1627
        }
1628
1629 1
        if ($encoding === 'UTF-8') {
1630 1
            $word_position = (int) \mb_stripos($str, $search);
1631 1
            $half_side = (int) ($word_position - $length / 2 + (int) \mb_strlen($search) / 2);
1632
        } else {
1633
            $word_position = (int) self::stripos($str, $search, 0, $encoding);
1634
            $half_side = (int) ($word_position - $length / 2 + (int) self::strlen($search, $encoding) / 2);
1635
        }
1636
1637 1
        $pos_start = 0;
1638 1
        if ($half_side > 0) {
1639 1
            if ($encoding === 'UTF-8') {
1640 1
                $half_text = \mb_substr($str, 0, $half_side);
1641
            } else {
1642
                $half_text = self::substr($str, 0, $half_side, $encoding);
1643
            }
1644 1
            if ($half_text !== false) {
1645 1
                if ($encoding === 'UTF-8') {
1646 1
                    $pos_start = (int) \max(
1647 1
                        \mb_strrpos($half_text, ' '),
1648 1
                        \mb_strrpos($half_text, '.')
1649
                    );
1650
                } else {
1651
                    $pos_start = (int) \max(
1652
                        self::strrpos($half_text, ' ', 0, $encoding),
1653
                        self::strrpos($half_text, '.', 0, $encoding)
1654
                    );
1655
                }
1656
            }
1657
        }
1658
1659 1
        if ($word_position && $half_side > 0) {
1660 1
            $offset = $pos_start + $length - 1;
1661 1
            $real_length = (int) self::strlen($str, $encoding);
1662
1663 1
            if ($offset > $real_length) {
1664
                $offset = $real_length;
1665
            }
1666
1667 1
            if ($encoding === 'UTF-8') {
1668 1
                $pos_end = (int) \min(
1669 1
                    \mb_strpos($str, ' ', $offset),
1670 1
                    \mb_strpos($str, '.', $offset)
1671 1
                ) - $pos_start;
1672
            } else {
1673
                $pos_end = (int) \min(
1674
                    self::strpos($str, ' ', $offset, $encoding),
1675
                    self::strpos($str, '.', $offset, $encoding)
1676
                ) - $pos_start;
1677
            }
1678
1679 1
            if (!$pos_end || $pos_end <= 0) {
1680 1
                if ($encoding === 'UTF-8') {
1681 1
                    $str_sub = \mb_substr($str, $pos_start, (int) \mb_strlen($str));
1682
                } else {
1683
                    $str_sub = self::substr($str, $pos_start, (int) self::strlen($str, $encoding), $encoding);
1684
                }
1685 1
                if ($str_sub !== false) {
1686 1
                    $extract = $replacer_for_skipped_text . \ltrim($str_sub, $trim_chars);
1687
                } else {
1688 1
                    $extract = '';
1689
                }
1690
            } else {
1691 1
                if ($encoding === 'UTF-8') {
1692 1
                    $str_sub = \mb_substr($str, $pos_start, $pos_end);
1693
                } else {
1694
                    $str_sub = self::substr($str, $pos_start, $pos_end, $encoding);
1695
                }
1696 1
                if ($str_sub !== false) {
1697 1
                    $extract = $replacer_for_skipped_text . \trim($str_sub, $trim_chars) . $replacer_for_skipped_text;
1698
                } else {
1699 1
                    $extract = '';
1700
                }
1701
            }
1702
        } else {
1703 1
            $offset = $length - 1;
1704 1
            $true_length = (int) self::strlen($str, $encoding);
1705
1706 1
            if ($offset > $true_length) {
1707
                $offset = $true_length;
1708
            }
1709
1710 1
            if ($encoding === 'UTF-8') {
1711 1
                $pos_end = (int) \min(
1712 1
                    \mb_strpos($str, ' ', $offset),
1713 1
                    \mb_strpos($str, '.', $offset)
1714
                );
1715
            } else {
1716
                $pos_end = (int) \min(
1717
                    self::strpos($str, ' ', $offset, $encoding),
1718
                    self::strpos($str, '.', $offset, $encoding)
1719
                );
1720
            }
1721
1722 1
            if ($pos_end) {
1723 1
                if ($encoding === 'UTF-8') {
1724 1
                    $str_sub = \mb_substr($str, 0, $pos_end);
1725
                } else {
1726
                    $str_sub = self::substr($str, 0, $pos_end, $encoding);
1727
                }
1728 1
                if ($str_sub !== false) {
1729 1
                    $extract = \rtrim($str_sub, $trim_chars) . $replacer_for_skipped_text;
1730
                } else {
1731 1
                    $extract = '';
1732
                }
1733
            } else {
1734 1
                $extract = $str;
1735
            }
1736
        }
1737
1738 1
        return $extract;
1739
    }
1740
1741
    /**
1742
     * Reads entire file into a string.
1743
     *
1744
     * EXAMPLE: <code>UTF8::file_get_contents('utf16le.txt'); // ...</code>
1745
     *
1746
     * WARNING: Do not use UTF-8 Option ($convert_to_utf8) for binary files (e.g.: images) !!!
1747
     *
1748
     * @see http://php.net/manual/en/function.file-get-contents.php
1749
     *
1750
     * @param string        $filename         <p>
1751
     *                                        Name of the file to read.
1752
     *                                        </p>
1753
     * @param bool          $use_include_path [optional] <p>
1754
     *                                        Prior to PHP 5, this parameter is called
1755
     *                                        use_include_path and is a bool.
1756
     *                                        As of PHP 5 the FILE_USE_INCLUDE_PATH can be used
1757
     *                                        to trigger include path
1758
     *                                        search.
1759
     *                                        </p>
1760
     * @param resource|null $context          [optional] <p>
1761
     *                                        A valid context resource created with
1762
     *                                        stream_context_create. If you don't need to use a
1763
     *                                        custom context, you can skip this parameter by &null;.
1764
     *                                        </p>
1765
     * @param int|null      $offset           [optional] <p>
1766
     *                                        The offset where the reading starts.
1767
     *                                        </p>
1768
     * @param int|null      $max_length       [optional] <p>
1769
     *                                        Maximum length of data read. The default is to read until end
1770
     *                                        of file is reached.
1771
     *                                        </p>
1772
     * @param int           $timeout          <p>The time in seconds for the timeout.</p>
1773
     * @param bool          $convert_to_utf8  <strong>WARNING!!!</strong> <p>Maybe you can't use this option for
1774
     *                                        some files, because they used non default utf-8 chars. Binary files
1775
     *                                        like images or pdf will not be converted.</p>
1776
     * @param string        $from_encoding    [optional] <p>e.g. 'UTF-16', 'UTF-8', 'ISO-8859-1', etc.<br>
1777
     *                                        A empty string will trigger the autodetect anyway.</p>
1778
     *
1779
     * @psalm-pure
1780
     *
1781
     * @return false|string
1782
     *                      <p>The function returns the read data as string or <b>false</b> on failure.</p>
1783
     */
1784 12
    public static function file_get_contents(
1785
        string $filename,
1786
        bool $use_include_path = false,
1787
        $context = null,
1788
        int $offset = null,
1789
        int $max_length = null,
1790
        int $timeout = 10,
1791
        bool $convert_to_utf8 = true,
1792
        string $from_encoding = ''
1793
    ) {
1794
        // init
1795
        /** @noinspection CallableParameterUseCaseInTypeContextInspection - is ok here */
1796 12
        $filename = Bootup::filter_sanitize_string_polyfill($filename);
1797 12
        if ($filename === false) {
1798
            return false;
1799
        }
1800
1801 12
        if ($timeout && $context === null) {
1802 9
            $context = \stream_context_create(
1803
                [
1804
                    'http' => [
1805 9
                        'timeout' => $timeout,
1806
                    ],
1807
                ]
1808
            );
1809
        }
1810
1811 12
        if ($offset === null) {
1812 12
            $offset = 0;
1813
        }
1814
1815 12
        if (\is_int($max_length)) {
1816
1817 2
            if ($max_length < 0) {
1818
                $max_length = 0;
1819
            }
1820
1821 2
            $data = \file_get_contents($filename, $use_include_path, $context, $offset, $max_length);
1822
        } else {
1823 12
            $data = \file_get_contents($filename, $use_include_path, $context, $offset);
1824
        }
1825
1826
        // return false on error
1827 12
        if ($data === false) {
1828
            return false;
1829
        }
1830
1831 12
        if ($convert_to_utf8) {
1832
            if (
1833 12
                !self::is_binary($data, true)
1834
                ||
1835 9
                self::is_utf16($data, false) !== false
1836
                ||
1837 12
                self::is_utf32($data, false) !== false
1838
            ) {
1839 9
                $data = self::encode('UTF-8', $data, false, $from_encoding);
1840 9
                $data = self::cleanup($data);
1841
            }
1842
        }
1843
1844 12
        return $data;
1845
    }
1846
1847
    /**
1848
     * Checks if a file starts with BOM (Byte Order Mark) character.
1849
     *
1850
     * EXAMPLE: <code>UTF8::file_has_bom('utf8_with_bom.txt'); // true</code>
1851
     *
1852
     * @param string $file_path <p>Path to a valid file.</p>
1853
     *
1854
     * @throws \RuntimeException if file_get_contents() returned false
1855
     *
1856
     * @return bool
1857
     *              <p><strong>true</strong> if the file has BOM at the start, <strong>false</strong> otherwise</p>
1858
     *
1859
     * @psalm-pure
1860
     */
1861 2
    public static function file_has_bom(string $file_path): bool
1862
    {
1863 2
        $file_content = \file_get_contents($file_path);
1864 2
        if ($file_content === false) {
1865
            throw new \RuntimeException('file_get_contents() returned false for:' . $file_path);
1866
        }
1867
1868 2
        return self::string_has_bom($file_content);
1869
    }
1870
1871
    /**
1872
     * Normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1873
     *
1874
     * EXAMPLE: <code>UTF8::filter(array("\xE9", 'à', 'a')); // array('é', 'à', 'a')</code>
1875
     *
1876
     * @param array|object|string $var
1877
     * @param int                 $normalization_form
1878
     * @param string              $leading_combining
1879
     *
1880
     * @psalm-pure
1881
     *
1882
     * @return mixed
1883
     *
1884
     * @template TFilter
1885
     * @phpstan-param TFilter $var
1886
     * @phpstan-return TFilter
1887
     */
1888 64
    public static function filter(
1889
        $var,
1890
        int $normalization_form = \Normalizer::NFC,
1891
        string $leading_combining = '◌'
1892
    ) {
1893 64
        switch (\gettype($var)) {
1894 64
            case 'object':
1895 64
            case 'array':
1896 6
                foreach ($var as &$v) {
1897 6
                    $v = self::filter($v, $normalization_form, $leading_combining);
1898
                }
1899 6
                unset($v);
1900
1901 6
                break;
1902 64
            case 'string':
1903
1904 62
                if (\strpos($var, "\r") !== false) {
1905 2
                    $var = self::normalize_line_ending($var);
1906
                }
1907
1908 62
                if (!ASCII::is_ascii($var)) {
1909 32
                    if (\Normalizer::isNormalized($var, $normalization_form)) {
1910 26
                        $n = '-';
1911
                    } else {
1912 12
                        $n = \Normalizer::normalize($var, $normalization_form);
1913
1914 12
                        if ($n && isset($n[0])) {
1915 6
                            $var = $n;
1916
                        } else {
1917 8
                            $var = self::encode('UTF-8', $var);
1918
                        }
1919
                    }
1920
1921
                    \assert(\is_string($var));
1922
                    if (
1923 32
                        $n
1924
                        &&
1925 32
                        $var[0] >= "\x80"
1926
                        &&
1927 32
                        isset($n[0], $leading_combining[0])
1928
                        &&
1929 32
                        \preg_match('/^\\p{Mn}/u', $var)
1930
                    ) {
1931
                        // Prevent leading combining chars
1932
                        // for NFC-safe concatenations.
1933 2
                        $var = $leading_combining . $var;
1934
                    }
1935
                }
1936
1937 62
                break;
1938
            default:
1939
                // nothing
1940
        }
1941
1942
        /** @noinspection PhpSillyAssignmentInspection */
1943
        /** @phpstan-var TFilter $var */
1944 64
        $var = $var;
1945
1946 64
        return $var;
1947
    }
1948
1949
    /**
1950
     * "filter_input()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1951
     *
1952
     * Gets a specific external variable by name and optionally filters it.
1953
     *
1954
     * EXAMPLE: <code>
1955
     * // _GET['foo'] = 'bar';
1956
     * UTF8::filter_input(INPUT_GET, 'foo', FILTER_UNSAFE_RAW)); // 'bar'
1957
     * </code>
1958
     *
1959
     * @see http://php.net/manual/en/function.filter-input.php
1960
     *
1961
     * @param int            $type          <p>
1962
     *                                      One of <b>INPUT_GET</b>, <b>INPUT_POST</b>,
1963
     *                                      <b>INPUT_COOKIE</b>, <b>INPUT_SERVER</b>, or
1964
     *                                      <b>INPUT_ENV</b>.
1965
     *                                      </p>
1966
     * @param string         $variable_name <p>
1967
     *                                      Name of a variable to get.
1968
     *                                      </p>
1969
     * @param int            $filter        [optional] <p>
1970
     *                                      The ID of the filter to apply. The
1971
     *                                      manual page lists the available filters.
1972
     *                                      </p>
1973
     * @param int|int[]|null $options       [optional] <p>
1974
     *                                      Associative array of options or bitwise disjunction of flags. If filter
1975
     *                                      accepts options, flags can be provided in "flags" field of array.
1976
     *                                      </p>
1977
     *
1978
     * @psalm-pure
1979
     *
1980
     * @return mixed
1981
     *               <p>
1982
     *               Value of the requested variable on success, <b>FALSE</b> if the filter fails, or <b>NULL</b> if the
1983
     *               <i>variable_name</i> variable is not set. If the flag <b>FILTER_NULL_ON_FAILURE</b> is used, it
1984
     *               returns <b>FALSE</b> if the variable is not set and <b>NULL</b> if the filter fails.
1985
     *               </p>
1986
     */
1987 1
    public static function filter_input(
1988
        int $type,
1989
        string $variable_name,
1990
        int $filter = \FILTER_DEFAULT,
1991
        $options = null
1992
    ) {
1993
        /**
1994
         * @psalm-suppress ImpureFunctionCall - we use func_num_args only for args count matching here
1995
         */
1996 1
        if ($options === null || \func_num_args() < 4) {
1997 1
            $var = \filter_input($type, $variable_name, $filter);
1998
        } else {
1999
            $var = \filter_input($type, $variable_name, $filter, $options);
2000
        }
2001
2002 1
        return self::filter($var);
2003
    }
2004
2005
    /**
2006
     * "filter_input_array()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
2007
     *
2008
     * Gets external variables and optionally filters them.
2009
     *
2010
     * EXAMPLE: <code>
2011
     * // _GET['foo'] = 'bar';
2012
     * UTF8::filter_input_array(INPUT_GET, array('foo' => 'FILTER_UNSAFE_RAW')); // array('bar')
2013
     * </code>
2014
     *
2015
     * @see http://php.net/manual/en/function.filter-input-array.php
2016
     *
2017
     * @param int        $type       <p>
2018
     *                               One of <b>INPUT_GET</b>, <b>INPUT_POST</b>,
2019
     *                               <b>INPUT_COOKIE</b>, <b>INPUT_SERVER</b>, or
2020
     *                               <b>INPUT_ENV</b>.
2021
     *                               </p>
2022
     * @param array|null $definition [optional] <p>
2023
     *                               An array defining the arguments. A valid key is a string
2024
     *                               containing a variable name and a valid value is either a filter type, or an array
2025
     *                               optionally specifying the filter, flags and options. If the value is an
2026
     *                               array, valid keys are filter which specifies the
2027
     *                               filter type,
2028
     *                               flags which specifies any flags that apply to the
2029
     *                               filter, and options which specifies any options that
2030
     *                               apply to the filter. See the example below for a better understanding.
2031
     *                               </p>
2032
     *                               <p>
2033
     *                               This parameter can be also an integer holding a filter constant. Then all values in the
2034
     *                               input array are filtered by this filter.
2035
     *                               </p>
2036
     * @param bool       $add_empty  [optional] <p>
2037
     *                               Add missing keys as <b>NULL</b> to the return value.
2038
     *                               </p>
2039
     *
2040
     * @psalm-pure
2041
     *
2042
     * @return mixed
2043
     *               <p>
2044
     *               An array containing the values of the requested variables on success, or <b>FALSE</b> on failure.
2045
     *               An array value will be <b>FALSE</b> if the filter fails, or <b>NULL</b> if the variable is not
2046
     *               set. Or if the flag <b>FILTER_NULL_ON_FAILURE</b> is used, it returns <b>FALSE</b> if the variable
2047
     *               is not set and <b>NULL</b> if the filter fails.
2048
     *               </p>
2049
     */
2050 1
    public static function filter_input_array(
2051
        int $type,
2052
        $definition = null,
2053
        bool $add_empty = true
2054
    ) {
2055
        /**
2056
         * @psalm-suppress ImpureFunctionCall - we use func_num_args only for args count matching here
2057
         */
2058 1
        if ($definition === null || \func_num_args() < 2) {
2059
            $a = \filter_input_array($type);
2060
        } else {
2061 1
            $a = \filter_input_array($type, $definition, $add_empty);
2062
        }
2063
2064 1
        return self::filter($a);
2065
    }
2066
2067
    /**
2068
     * "filter_var()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
2069
     *
2070
     * Filters a variable with a specified filter.
2071
     *
2072
     * EXAMPLE: <code>UTF8::filter_var('-ABC-中文空白-', FILTER_VALIDATE_URL); // false</code>
2073
     *
2074
     * @see http://php.net/manual/en/function.filter-var.php
2075
     *
2076
     * @param float|int|string|null $variable <p>
2077
     *                                        Value to filter.
2078
     *                                        </p>
2079
     * @param int                   $filter   [optional] <p>
2080
     *                                        The ID of the filter to apply. The
2081
     *                                        manual page lists the available filters.
2082
     *                                        </p>
2083
     * @param int|int[]|null        $options  [optional] <p>
2084
     *                                        Associative array of options or bitwise disjunction of flags. If filter
2085
     *                                        accepts options, flags can be provided in "flags" field of array. For
2086
     *                                        the "callback" filter, callable type should be passed. The
2087
     *                                        callback must accept one argument, the value to be filtered, and return
2088
     *                                        the value after filtering/sanitizing it.
2089
     *                                        </p>
2090
     *                                        <p>
2091
     *                                        <code>
2092
     *                                        // for filters that accept options, use this format
2093
     *                                        $options = array(
2094
     *                                        'options' => array(
2095
     *                                        'default' => 3, // value to return if the filter fails
2096
     *                                        // other options here
2097
     *                                        'min_range' => 0
2098
     *                                        ),
2099
     *                                        'flags' => FILTER_FLAG_ALLOW_OCTAL,
2100
     *                                        );
2101
     *                                        $var = filter_var('0755', FILTER_VALIDATE_INT, $options);
2102
     *                                        // for filter that only accept flags, you can pass them directly
2103
     *                                        $var = filter_var('oops', FILTER_VALIDATE_BOOLEAN, FILTER_NULL_ON_FAILURE);
2104
     *                                        // for filter that only accept flags, you can also pass as an array
2105
     *                                        $var = filter_var('oops', FILTER_VALIDATE_BOOLEAN,
2106
     *                                        array('flags' => FILTER_NULL_ON_FAILURE));
2107
     *                                        // callback validate filter
2108
     *                                        function foo($value)
2109
     *                                        {
2110
     *                                        // Expected format: Surname, GivenNames
2111
     *                                        if (strpos($value, ", ") === false) return false;
2112
     *                                        list($surname, $givennames) = explode(", ", $value, 2);
2113
     *                                        $empty = (empty($surname) || empty($givennames));
2114
     *                                        $notstrings = (!is_string($surname) || !is_string($givennames));
2115
     *                                        if ($empty || $notstrings) {
2116
     *                                        return false;
2117
     *                                        } else {
2118
     *                                        return $value;
2119
     *                                        }
2120
     *                                        }
2121
     *                                        $var = filter_var('Doe, Jane Sue', FILTER_CALLBACK, array('options' => 'foo'));
2122
     *                                        </code>
2123
     *                                        </p>
2124
     *
2125
     * @psalm-pure
2126
     *
2127
     * @return mixed
2128
     *               <p>The filtered data, or <b>FALSE</b> if the filter fails.</p>
2129
     */
2130 2
    public static function filter_var(
2131
        $variable,
2132
        int $filter = \FILTER_DEFAULT,
2133
        $options = null
2134
    ) {
2135
        /**
2136
         * @psalm-suppress ImpureFunctionCall - we use func_num_args only for args count matching here
2137
         */
2138 2
        if (\func_num_args() < 3) {
2139 2
            $variable = \filter_var($variable, $filter);
2140
        } else {
2141 2
            $variable = \filter_var($variable, $filter, $options);
0 ignored issues
show
Bug introduced by
It seems like $options can also be of type null; however, parameter $options of filter_var() does only seem to accept array|integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

2141
            $variable = \filter_var($variable, $filter, /** @scrutinizer ignore-type */ $options);
Loading history...
2142
        }
2143
2144 2
        return self::filter($variable);
2145
    }
2146
2147
    /**
2148
     * "filter_var_array()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
2149
     *
2150
     * Gets multiple variables and optionally filters them.
2151
     *
2152
     * EXAMPLE: <code>
2153
     * $filters = [
2154
     *     'name'  => ['filter'  => FILTER_CALLBACK, 'options' => [UTF8::class, 'ucwords']],
2155
     *     'age'   => ['filter'  => FILTER_VALIDATE_INT, 'options' => ['min_range' => 1, 'max_range' => 120]],
2156
     *     'email' => FILTER_VALIDATE_EMAIL,
2157
     * ];
2158
     *
2159
     * $data = [
2160
     *     'name' => 'κόσμε',
2161
     *     'age' => '18',
2162
     *     'email' => '[email protected]'
2163
     * ];
2164
     *
2165
     * UTF8::filter_var_array($data, $filters, true); // ['name' => 'Κόσμε', 'age' => 18, 'email' => '[email protected]']
2166
     * </code>
2167
     *
2168
     * @see http://php.net/manual/en/function.filter-var-array.php
2169
     *
2170
     * @param array<mixed>   $data       <p>
2171
     *                                   An array with string keys containing the data to filter.
2172
     *                                   </p>
2173
     * @param array|int|null $definition [optional] <p>
2174
     *                                   An array defining the arguments. A valid key is a string
2175
     *                                   containing a variable name and a valid value is either a
2176
     *                                   filter type, or an
2177
     *                                   array optionally specifying the filter, flags and options.
2178
     *                                   If the value is an array, valid keys are filter
2179
     *                                   which specifies the filter type,
2180
     *                                   flags which specifies any flags that apply to the
2181
     *                                   filter, and options which specifies any options that
2182
     *                                   apply to the filter. See the example below for a better understanding.
2183
     *                                   </p>
2184
     *                                   <p>
2185
     *                                   This parameter can be also an integer holding a filter constant. Then all values
2186
     *                                   in the input array are filtered by this filter.
2187
     *                                   </p>
2188
     * @param bool           $add_empty  [optional] <p>
2189
     *                                   Add missing keys as <b>NULL</b> to the return value.
2190
     *                                   </p>
2191
     *
2192
     * @psalm-pure
2193
     *
2194
     * @return mixed
2195
     *               <p>
2196
     *               An array containing the values of the requested variables on success, or <b>FALSE</b> on failure.
2197
     *               An array value will be <b>FALSE</b> if the filter fails, or <b>NULL</b> if the variable is not
2198
     *               set.
2199
     *               </p>
2200
     */
2201 2
    public static function filter_var_array(
2202
        array $data,
2203
        $definition = null,
2204
        bool $add_empty = true
2205
    ) {
2206
        /**
2207
         * @psalm-suppress ImpureFunctionCall - we use func_num_args only for args count matching here
2208
         */
2209 2
        if (\func_num_args() < 2) {
2210 2
            $a = \filter_var_array($data);
2211
        } else {
2212 2
            $a = \filter_var_array($data, $definition, $add_empty);
0 ignored issues
show
Bug introduced by
It seems like $definition can also be of type null; however, parameter $options of filter_var_array() does only seem to accept array|integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

2212
            $a = \filter_var_array($data, /** @scrutinizer ignore-type */ $definition, $add_empty);
Loading history...
2213
        }
2214
2215 2
        return self::filter($a);
2216
    }
2217
2218
    /**
2219
     * Checks whether finfo is available on the server.
2220
     *
2221
     * @psalm-pure
2222
     *
2223
     * @return bool
2224
     *              <p><strong>true</strong> if available, <strong>false</strong> otherwise</p>
2225
     *
2226
     * @internal <p>Please do not use it anymore, we will make is private in next major version.</p>
2227
     */
2228
    public static function finfo_loaded(): bool
2229
    {
2230
        return \class_exists('finfo');
2231
    }
2232
2233
    /**
2234
     * Returns the first $n characters of the string.
2235
     *
2236
     * @param string $str      <p>The input string.</p>
2237
     * @param int    $n        <p>Number of characters to retrieve from the start.</p>
2238
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
2239
     *
2240
     * @psalm-pure
2241
     *
2242
     * @return string
2243
     */
2244 13
    public static function first_char(
2245
        string $str,
2246
        int $n = 1,
2247
        string $encoding = 'UTF-8'
2248
    ): string {
2249 13
        if ($str === '' || $n <= 0) {
2250 5
            return '';
2251
        }
2252
2253 8
        if ($encoding === 'UTF-8') {
2254 4
            return (string) \mb_substr($str, 0, $n);
2255
        }
2256
2257 4
        return (string) self::substr($str, 0, $n, $encoding);
2258
    }
2259
2260
    /**
2261
     * Check if the number of Unicode characters isn't greater than the specified integer.
2262
     *
2263
     * EXAMPLE: <code>UTF8::fits_inside('κόσμε', 6); // false</code>
2264
     *
2265
     * @param string $str      the original string to be checked
2266
     * @param int    $box_size the size in number of chars to be checked against string
2267
     *
2268
     * @psalm-pure
2269
     *
2270
     * @return bool
2271
     *              <p><strong>TRUE</strong> if string is less than or equal to $box_size, <strong>FALSE</strong> otherwise.</p>
2272
     */
2273 2
    public static function fits_inside(string $str, int $box_size): bool
2274
    {
2275 2
        return (int) self::strlen($str) <= $box_size;
2276
    }
2277
2278
    /**
2279
     * Try to fix simple broken UTF-8 strings.
2280
     *
2281
     * INFO: Take a look at "UTF8::fix_utf8()" if you need a more advanced fix for broken UTF-8 strings.
2282
     *
2283
     * EXAMPLE: <code>UTF8::fix_simple_utf8('Düsseldorf'); // 'Düsseldorf'</code>
2284
     *
2285
     * If you received an UTF-8 string that was converted from Windows-1252 as it was ISO-8859-1
2286
     * (ignoring Windows-1252 chars from 80 to 9F) use this function to fix it.
2287
     * See: http://en.wikipedia.org/wiki/Windows-1252
2288
     *
2289
     * @param string $str <p>The input string</p>
2290
     *
2291
     * @psalm-pure
2292
     *
2293
     * @return string
2294
     */
2295 46
    public static function fix_simple_utf8(string $str): string
2296
    {
2297 46
        if ($str === '') {
2298 4
            return '';
2299
        }
2300
2301
        /**
2302
         * @psalm-suppress ImpureStaticVariable
2303
         *
2304
         * @var array<mixed>|null
2305
         */
2306 46
        static $BROKEN_UTF8_TO_UTF8_KEYS_CACHE = null;
2307
2308
        /**
2309
         * @psalm-suppress ImpureStaticVariable
2310
         *
2311
         * @var array<mixed>|null
2312
         */
2313 46
        static $BROKEN_UTF8_TO_UTF8_VALUES_CACHE = null;
2314
2315 46
        if ($BROKEN_UTF8_TO_UTF8_KEYS_CACHE === null) {
2316 1
            if (self::$BROKEN_UTF8_FIX === null) {
2317 1
                self::$BROKEN_UTF8_FIX = self::getData('utf8_fix');
2318
            }
2319
2320 1
            $BROKEN_UTF8_TO_UTF8_KEYS_CACHE = \array_keys(self::$BROKEN_UTF8_FIX ?: []);
2321 1
            $BROKEN_UTF8_TO_UTF8_VALUES_CACHE = self::$BROKEN_UTF8_FIX;
2322
        }
2323
2324
        \assert(\is_array($BROKEN_UTF8_TO_UTF8_VALUES_CACHE));
2325
2326 46
        return \str_replace($BROKEN_UTF8_TO_UTF8_KEYS_CACHE, $BROKEN_UTF8_TO_UTF8_VALUES_CACHE, $str);
2327
    }
2328
2329
    /**
2330
     * Fix a double (or multiple) encoded UTF8 string.
2331
     *
2332
     * EXAMPLE: <code>UTF8::fix_utf8('Fédération'); // 'Fédération'</code>
2333
     *
2334
     * @param string|string[] $str you can use a string or an array of strings
2335
     *
2336
     * @psalm-pure
2337
     *
2338
     * @return string|string[]
2339
     *                         <p>Will return the fixed input-"array" or
2340
     *                         the fixed input-"string".</p>
2341
     *
2342
     * @template TFixUtf8
2343
     * @phpstan-param TFixUtf8 $str
2344
     * @phpstan-return TFixUtf8
2345
     */
2346 2
    public static function fix_utf8($str)
2347
    {
2348 2
        if (\is_array($str)) {
2349 2
            foreach ($str as &$v) {
2350 2
                $v = self::fix_utf8($v);
2351
            }
2352 2
            unset($v);
2353
2354
            /**
2355
             * @psalm-suppress InvalidReturnStatement
2356
             */
2357 2
            return $str;
2358
        }
2359
2360 2
        $str = (string) $str;
2361 2
        $last = '';
2362 2
        while ($last !== $str) {
2363 2
            $last = $str;
2364
            /**
2365
             * @psalm-suppress PossiblyInvalidArgument
2366
             */
2367 2
            $str = self::to_utf8(
2368 2
                self::utf8_decode($str, true)
2369
            );
2370
        }
2371
2372
        /**
2373
         * @psalm-suppress InvalidReturnStatement
2374
         */
2375 2
        return $str;
2376
    }
2377
2378
    /**
2379
     * Get character of a specific character.
2380
     *
2381
     * EXAMPLE: <code>UTF8::getCharDirection('ا'); // 'RTL'</code>
2382
     *
2383
     * @param string $char
2384
     *
2385
     * @psalm-pure
2386
     *
2387
     * @return string
2388
     *                <p>'RTL' or 'LTR'.</p>
2389
     */
2390 2
    public static function getCharDirection(string $char): string
2391
    {
2392 2
        if (self::$SUPPORT['intlChar'] === true) {
2393 2
            $tmp_return = \IntlChar::charDirection($char);
2394
2395
            // from "IntlChar"-Class
2396
            $char_direction = [
2397 2
                'RTL' => [1, 13, 14, 15, 21],
2398
                'LTR' => [0, 11, 12, 20],
2399
            ];
2400
2401 2
            if (\in_array($tmp_return, $char_direction['LTR'], true)) {
2402
                return 'LTR';
2403
            }
2404
2405 2
            if (\in_array($tmp_return, $char_direction['RTL'], true)) {
2406 2
                return 'RTL';
2407
            }
2408
        }
2409
2410 2
        $c = static::chr_to_decimal($char);
2411
2412 2
        if (!($c >= 0x5be && $c <= 0x10b7f)) {
2413 2
            return 'LTR';
2414
        }
2415
2416 2
        if ($c <= 0x85e) {
2417 2
            if ($c === 0x5be ||
2418 2
                $c === 0x5c0 ||
2419 2
                $c === 0x5c3 ||
2420 2
                $c === 0x5c6 ||
2421 2
                ($c >= 0x5d0 && $c <= 0x5ea) ||
2422 2
                ($c >= 0x5f0 && $c <= 0x5f4) ||
2423 2
                $c === 0x608 ||
2424 2
                $c === 0x60b ||
2425 2
                $c === 0x60d ||
2426 2
                $c === 0x61b ||
2427 2
                ($c >= 0x61e && $c <= 0x64a) ||
2428
                ($c >= 0x66d && $c <= 0x66f) ||
2429
                ($c >= 0x671 && $c <= 0x6d5) ||
2430
                ($c >= 0x6e5 && $c <= 0x6e6) ||
2431
                ($c >= 0x6ee && $c <= 0x6ef) ||
2432
                ($c >= 0x6fa && $c <= 0x70d) ||
2433
                $c === 0x710 ||
2434
                ($c >= 0x712 && $c <= 0x72f) ||
2435
                ($c >= 0x74d && $c <= 0x7a5) ||
2436
                $c === 0x7b1 ||
2437
                ($c >= 0x7c0 && $c <= 0x7ea) ||
2438
                ($c >= 0x7f4 && $c <= 0x7f5) ||
2439
                $c === 0x7fa ||
2440
                ($c >= 0x800 && $c <= 0x815) ||
2441
                $c === 0x81a ||
2442
                $c === 0x824 ||
2443
                $c === 0x828 ||
2444
                ($c >= 0x830 && $c <= 0x83e) ||
2445
                ($c >= 0x840 && $c <= 0x858) ||
2446 2
                $c === 0x85e
2447
            ) {
2448 2
                return 'RTL';
2449
            }
2450 2
        } elseif ($c === 0x200f) {
2451
            return 'RTL';
2452 2
        } elseif ($c >= 0xfb1d) {
2453 2
            if ($c === 0xfb1d ||
2454 2
                ($c >= 0xfb1f && $c <= 0xfb28) ||
2455 2
                ($c >= 0xfb2a && $c <= 0xfb36) ||
2456 2
                ($c >= 0xfb38 && $c <= 0xfb3c) ||
2457 2
                $c === 0xfb3e ||
2458 2
                ($c >= 0xfb40 && $c <= 0xfb41) ||
2459 2
                ($c >= 0xfb43 && $c <= 0xfb44) ||
2460 2
                ($c >= 0xfb46 && $c <= 0xfbc1) ||
2461 2
                ($c >= 0xfbd3 && $c <= 0xfd3d) ||
2462 2
                ($c >= 0xfd50 && $c <= 0xfd8f) ||
2463 2
                ($c >= 0xfd92 && $c <= 0xfdc7) ||
2464 2
                ($c >= 0xfdf0 && $c <= 0xfdfc) ||
2465 2
                ($c >= 0xfe70 && $c <= 0xfe74) ||
2466 2
                ($c >= 0xfe76 && $c <= 0xfefc) ||
2467 2
                ($c >= 0x10800 && $c <= 0x10805) ||
2468 2
                $c === 0x10808 ||
2469 2
                ($c >= 0x1080a && $c <= 0x10835) ||
2470 2
                ($c >= 0x10837 && $c <= 0x10838) ||
2471 2
                $c === 0x1083c ||
2472 2
                ($c >= 0x1083f && $c <= 0x10855) ||
2473 2
                ($c >= 0x10857 && $c <= 0x1085f) ||
2474 2
                ($c >= 0x10900 && $c <= 0x1091b) ||
2475 2
                ($c >= 0x10920 && $c <= 0x10939) ||
2476 2
                $c === 0x1093f ||
2477 2
                $c === 0x10a00 ||
2478 2
                ($c >= 0x10a10 && $c <= 0x10a13) ||
2479 2
                ($c >= 0x10a15 && $c <= 0x10a17) ||
2480 2
                ($c >= 0x10a19 && $c <= 0x10a33) ||
2481 2
                ($c >= 0x10a40 && $c <= 0x10a47) ||
2482 2
                ($c >= 0x10a50 && $c <= 0x10a58) ||
2483 2
                ($c >= 0x10a60 && $c <= 0x10a7f) ||
2484 2
                ($c >= 0x10b00 && $c <= 0x10b35) ||
2485 2
                ($c >= 0x10b40 && $c <= 0x10b55) ||
2486 2
                ($c >= 0x10b58 && $c <= 0x10b72) ||
2487 2
                ($c >= 0x10b78)
2488
            ) {
2489 2
                return 'RTL';
2490
            }
2491
        }
2492
2493 2
        return 'LTR';
2494
    }
2495
2496
    /**
2497
     * Check for php-support.
2498
     *
2499
     * @param string|null $key
2500
     *
2501
     * @psalm-pure
2502
     *
2503
     * @return mixed
2504
     *               Return the full support-"array", if $key === null<br>
2505
     *               return bool-value, if $key is used and available<br>
2506
     *               otherwise return <strong>null</strong>
2507
     */
2508 27
    public static function getSupportInfo(string $key = null)
2509
    {
2510 27
        if ($key === null) {
2511 4
            return self::$SUPPORT;
2512
        }
2513
2514 25
        if (self::$INTL_TRANSLITERATOR_LIST === null) {
2515 1
            self::$INTL_TRANSLITERATOR_LIST = self::getData('transliterator_list');
2516
        }
2517
        // compatibility fix for old versions
2518 25
        self::$SUPPORT['intl__transliterator_list_ids'] = self::$INTL_TRANSLITERATOR_LIST;
2519
2520 25
        return self::$SUPPORT[$key] ?? null;
2521
    }
2522
2523
    /**
2524
     * Warning: this method only works for some file-types (png, jpg)
2525
     *          if you need more supported types, please use e.g. "finfo"
2526
     *
2527
     * @param string $str
2528
     * @param array  $fallback <p>with this keys: 'ext', 'mime', 'type'
2529
     *
2530
     * @psalm-pure
2531
     *
2532
     * @return null[]|string[]
2533
     *                         <p>with this keys: 'ext', 'mime', 'type'</p>
2534
     *
2535
     * @phpstan-param array{ext: null|string, mime: null|string, type: null|string} $fallback
2536
     */
2537 39
    public static function get_file_type(
2538
        string $str,
2539
        array $fallback = [
2540
            'ext'  => null,
2541
            'mime' => 'application/octet-stream',
2542
            'type' => null,
2543
        ]
2544
    ): array {
2545 39
        if ($str === '') {
2546
            return $fallback;
2547
        }
2548
2549
        /** @var false|string $str_info - needed for PhpStan (stubs error) */
2550 39
        $str_info = \substr($str, 0, 2);
2551 39
        if ($str_info === false || \strlen($str_info) !== 2) {
2552 10
            return $fallback;
2553
        }
2554
2555
        // DEBUG
2556
        //var_dump($str_info);
2557
2558 36
        $str_info = \unpack('C2chars', $str_info);
2559
2560 36
        if ($str_info === false) {
2561
            return $fallback;
2562
        }
2563 36
        $type_code = (int) ($str_info['chars1'] . $str_info['chars2']);
2564
2565
        // DEBUG
2566
        //var_dump($type_code);
2567
2568
        //
2569
        // info: https://en.wikipedia.org/wiki/Magic_number_%28programming%29#Format_indicator
2570
        //
2571
        switch ($type_code) {
2572
            // WARNING: do not add too simple comparisons, because of false-positive results:
2573
            //
2574
            // 3780 => 'pdf', 7790 => 'exe', 7784 => 'midi', 8075 => 'zip',
2575
            // 8297 => 'rar', 7173 => 'gif', 7373 => 'tiff' 6677 => 'bmp', ...
2576
            //
2577 36
            case 255216:
2578
                $ext = 'jpg';
2579
                $mime = 'image/jpeg';
2580
                $type = 'binary';
2581
2582
                break;
2583 36
            case 13780:
2584 7
                $ext = 'png';
2585 7
                $mime = 'image/png';
2586 7
                $type = 'binary';
2587
2588 7
                break;
2589
            default:
2590 35
                return $fallback;
2591
        }
2592
2593
        return [
2594 7
            'ext'  => $ext,
2595 7
            'mime' => $mime,
2596 7
            'type' => $type,
2597
        ];
2598
    }
2599
2600
    /**
2601
     * @param int    $length         <p>Length of the random string.</p>
2602
     * @param string $possible_chars [optional] <p>Characters string for the random selection.</p>
2603
     * @param string $encoding       [optional] <p>Set the charset for e.g. "mb_" function</p>
2604
     *
2605
     * @return string
2606
     */
2607 1
    public static function get_random_string(
2608
        int $length,
2609
        string $possible_chars = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789',
2610
        string $encoding = 'UTF-8'
2611
    ): string {
2612
        // init
2613 1
        $i = 0;
2614 1
        $str = '';
2615
2616
        //
2617
        // add random chars
2618
        //
2619
2620 1
        if ($encoding === 'UTF-8') {
2621 1
            $max_length = (int) \mb_strlen($possible_chars);
2622 1
            if ($max_length === 0) {
2623 1
                return '';
2624
            }
2625
2626 1
            while ($i < $length) {
2627
                try {
2628 1
                    $rand_int = \random_int(0, $max_length - 1);
2629
                } catch (\Exception $e) {
2630
                    $rand_int = \mt_rand(0, $max_length - 1);
2631
                }
2632 1
                $char = \mb_substr($possible_chars, $rand_int, 1);
2633 1
                if ($char !== false) {
2634 1
                    $str .= $char;
2635 1
                    ++$i;
2636
                }
2637
            }
2638
        } else {
2639
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
2640
2641
            $max_length = (int) self::strlen($possible_chars, $encoding);
2642
            if ($max_length === 0) {
2643
                return '';
2644
            }
2645
2646
            while ($i < $length) {
2647
                try {
2648
                    $rand_int = \random_int(0, $max_length - 1);
2649
                } catch (\Exception $e) {
2650
                    $rand_int = \mt_rand(0, $max_length - 1);
2651
                }
2652
                $char = self::substr($possible_chars, $rand_int, 1, $encoding);
2653
                if ($char !== false) {
2654
                    $str .= $char;
2655
                    ++$i;
2656
                }
2657
            }
2658
        }
2659
2660 1
        return $str;
2661
    }
2662
2663
    /**
2664
     * @param int|string $extra_entropy [optional] <p>Extra entropy via a string or int value.</p>
2665
     * @param bool       $use_md5       [optional] <p>Return the unique identifier as md5-hash? Default: true</p>
2666
     *
2667
     * @return string
2668
     */
2669 1
    public static function get_unique_string($extra_entropy = '', bool $use_md5 = true): string
2670
    {
2671
        try {
2672 1
            $rand_int = \random_int(0, \mt_getrandmax());
2673
        } catch (\Exception $e) {
2674
            $rand_int = \mt_rand(0, \mt_getrandmax());
2675
        }
2676
2677
        $unique_helper = $rand_int .
2678 1
                         \session_id() .
2679 1
                         ($_SERVER['REMOTE_ADDR'] ?? '') .
2680 1
                         ($_SERVER['SERVER_ADDR'] ?? '') .
2681 1
                         $extra_entropy;
2682
2683 1
        $unique_string = \uniqid($unique_helper, true);
2684
2685 1
        if ($use_md5) {
2686 1
            $unique_string = \md5($unique_string . $unique_helper);
2687
        }
2688
2689 1
        return $unique_string;
2690
    }
2691
2692
    /**
2693
     * Returns true if the string contains a lower case char, false otherwise.
2694
     *
2695
     * @param string $str <p>The input string.</p>
2696
     *
2697
     * @psalm-pure
2698
     *
2699
     * @return bool
2700
     *              <p>Whether or not the string contains a lower case character.</p>
2701
     */
2702 47
    public static function has_lowercase(string $str): bool
2703
    {
2704 47
        if (self::$SUPPORT['mbstring'] === true) {
2705 47
            return \mb_ereg_match('.*[[:lower:]]', $str);
2706
        }
2707
2708
        return self::str_matches_pattern($str, '.*[[:lower:]]');
2709
    }
2710
2711
    /**
2712
     * Returns true if the string contains whitespace, false otherwise.
2713
     *
2714
     * @param string $str <p>The input string.</p>
2715
     *
2716
     * @psalm-pure
2717
     *
2718
     * @return bool
2719
     *              <p>Whether or not the string contains whitespace.</p>
2720
     */
2721 11
    public static function has_whitespace(string $str): bool
2722
    {
2723 11
        if (self::$SUPPORT['mbstring'] === true) {
2724 11
            return \mb_ereg_match('.*[[:space:]]', $str);
2725
        }
2726
2727
        return self::str_matches_pattern($str, '.*[[:space:]]');
2728
    }
2729
2730
    /**
2731
     * Returns true if the string contains an upper case char, false otherwise.
2732
     *
2733
     * @param string $str <p>The input string.</p>
2734
     *
2735
     * @psalm-pure
2736
     *
2737
     * @return bool
2738
     *              <p>Whether or not the string contains an upper case character.</p>
2739
     */
2740 12
    public static function has_uppercase(string $str): bool
2741
    {
2742 12
        if (self::$SUPPORT['mbstring'] === true) {
2743 12
            return \mb_ereg_match('.*[[:upper:]]', $str);
2744
        }
2745
2746
        return self::str_matches_pattern($str, '.*[[:upper:]]');
2747
    }
2748
2749
    /**
2750
     * Converts a hexadecimal value into a UTF-8 character.
2751
     *
2752
     * INFO: opposite to UTF8::chr_to_hex()
2753
     *
2754
     * EXAMPLE: <code>UTF8::hex_to_chr('U+00a7'); // '§'</code>
2755
     *
2756
     * @param string $hexdec <p>The hexadecimal value.</p>
2757
     *
2758
     * @psalm-pure
2759
     *
2760
     * @return false|string one single UTF-8 character
2761
     */
2762 4
    public static function hex_to_chr(string $hexdec)
2763
    {
2764
        /** @noinspection PhpUsageOfSilenceOperatorInspection - Invalid characters passed for attempted conversion, these have been ignored */
2765 4
        return self::decimal_to_chr((int) @\hexdec($hexdec));
2766
    }
2767
2768
    /**
2769
     * Converts hexadecimal U+xxxx code point representation to integer.
2770
     *
2771
     * INFO: opposite to UTF8::int_to_hex()
2772
     *
2773
     * EXAMPLE: <code>UTF8::hex_to_int('U+00f1'); // 241</code>
2774
     *
2775
     * @param string $hexdec <p>The hexadecimal code point representation.</p>
2776
     *
2777
     * @psalm-pure
2778
     *
2779
     * @return false|int
2780
     *                   <p>The code point, or false on failure.</p>
2781
     */
2782 2
    public static function hex_to_int($hexdec)
2783
    {
2784
        // init
2785 2
        $hexdec = (string) $hexdec;
2786
2787 2
        if ($hexdec === '') {
2788 2
            return false;
2789
        }
2790
2791 2
        if (\preg_match('/^(?:\\\u|U\+|)([a-zA-Z0-9]{4,6})$/', $hexdec, $match)) {
2792 2
            return \intval($match[1], 16);
2793
        }
2794
2795 2
        return false;
2796
    }
2797
2798
    /**
2799
     * Converts a UTF-8 string to a series of HTML numbered entities.
2800
     *
2801
     * INFO: opposite to UTF8::html_decode()
2802
     *
2803
     * EXAMPLE: <code>UTF8::html_encode('中文空白'); // '&#20013;&#25991;&#31354;&#30333;'</code>
2804
     *
2805
     * @param string $str              <p>The Unicode string to be encoded as numbered entities.</p>
2806
     * @param bool   $keep_ascii_chars [optional] <p>Keep ASCII chars.</p>
2807
     * @param string $encoding         [optional] <p>Set the charset for e.g. "mb_" function</p>
2808
     *
2809
     * @psalm-pure
2810
     *
2811
     * @return string HTML numbered entities
2812
     */
2813 14
    public static function html_encode(
2814
        string $str,
2815
        bool $keep_ascii_chars = false,
2816
        string $encoding = 'UTF-8'
2817
    ): string {
2818 14
        if ($str === '') {
2819 4
            return '';
2820
        }
2821
2822 14
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
2823 4
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
2824
        }
2825
2826
        // INFO: http://stackoverflow.com/questions/35854535/better-explanation-of-convmap-in-mb-encode-numericentity
2827 14
        if (self::$SUPPORT['mbstring'] === true) {
2828 14
            if ($keep_ascii_chars) {
2829 13
                $start_code = 0x80;
2830
            } else {
2831 3
                $start_code = 0x00;
2832
            }
2833
2834 14
            if ($encoding === 'UTF-8') {
2835
                /** @var false|string|null $return - needed for PhpStan (stubs error) */
2836 14
                $return = \mb_encode_numericentity(
0 ignored issues
show
Bug introduced by
The call to mb_encode_numericentity() has too few arguments starting with encoding. ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-call  annotation

2836
                $return = /** @scrutinizer ignore-call */ \mb_encode_numericentity(

This check compares calls to functions or methods with their respective definitions. If the call has less arguments than are defined, it raises an issue.

If a function is defined several times with a different number of parameters, the check may pick up the wrong definition and report false positives. One codebase where this has been known to happen is Wordpress. Please note the @ignore annotation hint above.

Loading history...
2837 14
                    $str,
2838 14
                    [$start_code, 0xfffff, 0, 0xfffff]
2839
                );
2840 14
                if ($return !== null && $return !== false) {
2841 14
                    return $return;
2842
                }
2843
            }
2844
2845
            /** @var false|string|null $return - needed for PhpStan (stubs error) */
2846 4
            $return = \mb_encode_numericentity(
2847 4
                $str,
2848 4
                [$start_code, 0xfffff, 0, 0xfffff],
2849 4
                $encoding
2850
            );
2851 4
            if ($return !== null && $return !== false) {
2852 4
                return $return;
2853
            }
2854
        }
2855
2856
        //
2857
        // fallback via vanilla php
2858
        //
2859
2860
        return \implode(
2861
            '',
2862
            \array_map(
2863
                static function (string $chr) use ($keep_ascii_chars, $encoding): string {
2864
                    return self::single_chr_html_encode($chr, $keep_ascii_chars, $encoding);
2865
                },
2866
                self::str_split($str)
2867
            )
2868
        );
2869
    }
2870
2871
    /**
2872
     * UTF-8 version of html_entity_decode()
2873
     *
2874
     * The reason we are not using html_entity_decode() by itself is because
2875
     * while it is not technically correct to leave out the semicolon
2876
     * at the end of an entity most browsers will still interpret the entity
2877
     * correctly. html_entity_decode() does not convert entities without
2878
     * semicolons, so we are left with our own little solution here. Bummer.
2879
     *
2880
     * Convert all HTML entities to their applicable characters.
2881
     *
2882
     * INFO: opposite to UTF8::html_encode()
2883
     *
2884
     * EXAMPLE: <code>UTF8::html_entity_decode('&#20013;&#25991;&#31354;&#30333;'); // '中文空白'</code>
2885
     *
2886
     * @see http://php.net/manual/en/function.html-entity-decode.php
2887
     *
2888
     * @param string   $str      <p>
2889
     *                           The input string.
2890
     *                           </p>
2891
     * @param int|null $flags    [optional] <p>
2892
     *                           A bitmask of one or more of the following flags, which specify how to handle quotes
2893
     *                           and which document type to use. The default is ENT_COMPAT | ENT_HTML401.
2894
     *                           <table>
2895
     *                           Available <i>flags</i> constants
2896
     *                           <tr valign="top">
2897
     *                           <td>Constant Name</td>
2898
     *                           <td>Description</td>
2899
     *                           </tr>
2900
     *                           <tr valign="top">
2901
     *                           <td><b>ENT_COMPAT</b></td>
2902
     *                           <td>Will convert double-quotes and leave single-quotes alone.</td>
2903
     *                           </tr>
2904
     *                           <tr valign="top">
2905
     *                           <td><b>ENT_QUOTES</b></td>
2906
     *                           <td>Will convert both double and single quotes.</td>
2907
     *                           </tr>
2908
     *                           <tr valign="top">
2909
     *                           <td><b>ENT_NOQUOTES</b></td>
2910
     *                           <td>Will leave both double and single quotes unconverted.</td>
2911
     *                           </tr>
2912
     *                           <tr valign="top">
2913
     *                           <td><b>ENT_HTML401</b></td>
2914
     *                           <td>
2915
     *                           Handle code as HTML 4.01.
2916
     *                           </td>
2917
     *                           </tr>
2918
     *                           <tr valign="top">
2919
     *                           <td><b>ENT_XML1</b></td>
2920
     *                           <td>
2921
     *                           Handle code as XML 1.
2922
     *                           </td>
2923
     *                           </tr>
2924
     *                           <tr valign="top">
2925
     *                           <td><b>ENT_XHTML</b></td>
2926
     *                           <td>
2927
     *                           Handle code as XHTML.
2928
     *                           </td>
2929
     *                           </tr>
2930
     *                           <tr valign="top">
2931
     *                           <td><b>ENT_HTML5</b></td>
2932
     *                           <td>
2933
     *                           Handle code as HTML 5.
2934
     *                           </td>
2935
     *                           </tr>
2936
     *                           </table>
2937
     *                           </p>
2938
     * @param string   $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
2939
     *
2940
     * @psalm-pure
2941
     *
2942
     * @return string the decoded string
2943
     */
2944 34
    public static function html_entity_decode(
2945
        string $str,
2946
        int $flags = null,
2947
        string $encoding = 'UTF-8'
2948
    ): string {
2949
        if (
2950 34
            !isset($str[3]) // examples: &; || &x;
2951
            ||
2952 34
            \strpos($str, '&') === false // no "&"
2953
        ) {
2954 23
            return $str;
2955
        }
2956
2957 34
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
2958 9
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
2959
        }
2960
2961 34
        if ($flags === null) {
2962 11
            $flags = \ENT_QUOTES | \ENT_HTML5;
2963
        }
2964
2965
        if (
2966 34
            $encoding !== 'UTF-8'
2967
            &&
2968 34
            $encoding !== 'ISO-8859-1'
2969
            &&
2970 34
            $encoding !== 'WINDOWS-1252'
2971
            &&
2972 34
            self::$SUPPORT['mbstring'] === false
2973
        ) {
2974
            /**
2975
             * @psalm-suppress ImpureFunctionCall - is is only a warning
2976
             */
2977
            \trigger_error('UTF8::html_entity_decode() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
2978
        }
2979
2980
        do {
2981 34
            $str_compare = $str;
2982
2983 34
            if (\strpos($str, '&') !== false) {
2984 34
                if (\strpos($str, '&#') !== false) {
2985
                    // decode also numeric & UTF16 two byte entities
2986 25
                    $str = (string) \preg_replace(
2987 25
                        '/(&#(?:x0*[0-9a-fA-F]{2,6}(?![0-9a-fA-F;])|(?:0*\d{2,6}(?![0-9;]))))/S',
2988 25
                        '$1;',
2989 25
                        $str
2990
                    );
2991
                }
2992
2993 34
                $str = \html_entity_decode(
2994 34
                    $str,
2995 34
                    $flags,
2996 34
                    $encoding
2997
                );
2998
            }
2999 34
        } while ($str_compare !== $str);
3000
3001 34
        return $str;
3002
    }
3003
3004
    /**
3005
     * Create a escape html version of the string via "UTF8::htmlspecialchars()".
3006
     *
3007
     * @param string $str
3008
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
3009
     *
3010
     * @psalm-pure
3011
     *
3012
     * @return string
3013
     */
3014 6
    public static function html_escape(string $str, string $encoding = 'UTF-8'): string
3015
    {
3016 6
        return self::htmlspecialchars(
3017 6
            $str,
3018 6
            \ENT_QUOTES | \ENT_SUBSTITUTE,
3019
            $encoding
3020
        );
3021
    }
3022
3023
    /**
3024
     * Remove empty html-tag.
3025
     *
3026
     * e.g.: <pre><tag></tag></pre>
3027
     *
3028
     * @param string $str
3029
     *
3030
     * @psalm-pure
3031
     *
3032
     * @return string
3033
     */
3034 1
    public static function html_stripe_empty_tags(string $str): string
3035
    {
3036 1
        return (string) \preg_replace(
3037 1
            '/<[^\\/>]*?>\\s*?<\\/[^>]*?>/u',
3038 1
            '',
3039 1
            $str
3040
        );
3041
    }
3042
3043
    /**
3044
     * Convert all applicable characters to HTML entities: UTF-8 version of htmlentities().
3045
     *
3046
     * EXAMPLE: <code>UTF8::htmlentities('<白-öäü>'); // '&lt;&#30333;-&ouml;&auml;&uuml;&gt;'</code>
3047
     *
3048
     * @see http://php.net/manual/en/function.htmlentities.php
3049
     *
3050
     * @param string $str           <p>
3051
     *                              The input string.
3052
     *                              </p>
3053
     * @param int    $flags         [optional] <p>
3054
     *                              A bitmask of one or more of the following flags, which specify how to handle
3055
     *                              quotes, invalid code unit sequences and the used document type. The default is
3056
     *                              ENT_COMPAT | ENT_HTML401.
3057
     *                              <table>
3058
     *                              Available <i>flags</i> constants
3059
     *                              <tr valign="top">
3060
     *                              <td>Constant Name</td>
3061
     *                              <td>Description</td>
3062
     *                              </tr>
3063
     *                              <tr valign="top">
3064
     *                              <td><b>ENT_COMPAT</b></td>
3065
     *                              <td>Will convert double-quotes and leave single-quotes alone.</td>
3066
     *                              </tr>
3067
     *                              <tr valign="top">
3068
     *                              <td><b>ENT_QUOTES</b></td>
3069
     *                              <td>Will convert both double and single quotes.</td>
3070
     *                              </tr>
3071
     *                              <tr valign="top">
3072
     *                              <td><b>ENT_NOQUOTES</b></td>
3073
     *                              <td>Will leave both double and single quotes unconverted.</td>
3074
     *                              </tr>
3075
     *                              <tr valign="top">
3076
     *                              <td><b>ENT_IGNORE</b></td>
3077
     *                              <td>
3078
     *                              Silently discard invalid code unit sequences instead of returning
3079
     *                              an empty string. Using this flag is discouraged as it
3080
     *                              may have security implications.
3081
     *                              </td>
3082
     *                              </tr>
3083
     *                              <tr valign="top">
3084
     *                              <td><b>ENT_SUBSTITUTE</b></td>
3085
     *                              <td>
3086
     *                              Replace invalid code unit sequences with a Unicode Replacement Character
3087
     *                              U+FFFD (UTF-8) or &#38;#38;#FFFD; (otherwise) instead of returning an empty
3088
     *                              string.
3089
     *                              </td>
3090
     *                              </tr>
3091
     *                              <tr valign="top">
3092
     *                              <td><b>ENT_DISALLOWED</b></td>
3093
     *                              <td>
3094
     *                              Replace invalid code points for the given document type with a
3095
     *                              Unicode Replacement Character U+FFFD (UTF-8) or &#38;#38;#FFFD;
3096
     *                              (otherwise) instead of leaving them as is. This may be useful, for
3097
     *                              instance, to ensure the well-formedness of XML documents with
3098
     *                              embedded external content.
3099
     *                              </td>
3100
     *                              </tr>
3101
     *                              <tr valign="top">
3102
     *                              <td><b>ENT_HTML401</b></td>
3103
     *                              <td>
3104
     *                              Handle code as HTML 4.01.
3105
     *                              </td>
3106
     *                              </tr>
3107
     *                              <tr valign="top">
3108
     *                              <td><b>ENT_XML1</b></td>
3109
     *                              <td>
3110
     *                              Handle code as XML 1.
3111
     *                              </td>
3112
     *                              </tr>
3113
     *                              <tr valign="top">
3114
     *                              <td><b>ENT_XHTML</b></td>
3115
     *                              <td>
3116
     *                              Handle code as XHTML.
3117
     *                              </td>
3118
     *                              </tr>
3119
     *                              <tr valign="top">
3120
     *                              <td><b>ENT_HTML5</b></td>
3121
     *                              <td>
3122
     *                              Handle code as HTML 5.
3123
     *                              </td>
3124
     *                              </tr>
3125
     *                              </table>
3126
     *                              </p>
3127
     * @param string $encoding      [optional] <p>
3128
     *                              Like <b>htmlspecialchars</b>,
3129
     *                              <b>htmlentities</b> takes an optional third argument
3130
     *                              <i>encoding</i> which defines encoding used in
3131
     *                              conversion.
3132
     *                              Although this argument is technically optional, you are highly
3133
     *                              encouraged to specify the correct value for your code.
3134
     *                              </p>
3135
     * @param bool   $double_encode [optional] <p>
3136
     *                              When <i>double_encode</i> is turned off PHP will not
3137
     *                              encode existing html entities. The default is to convert everything.
3138
     *                              </p>
3139
     *
3140
     * @psalm-pure
3141
     *
3142
     * @return string
3143
     *                <p>
3144
     *                The encoded string.
3145
     *                <br><br>
3146
     *                If the input <i>string</i> contains an invalid code unit
3147
     *                sequence within the given <i>encoding</i> an empty string
3148
     *                will be returned, unless either the <b>ENT_IGNORE</b> or
3149
     *                <b>ENT_SUBSTITUTE</b> flags are set.
3150
     *                </p>
3151
     */
3152 9
    public static function htmlentities(
3153
        string $str,
3154
        int $flags = \ENT_COMPAT,
3155
        string $encoding = 'UTF-8',
3156
        bool $double_encode = true
3157
    ): string {
3158 9
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
3159 7
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
3160
        }
3161
3162 9
        $str = \htmlentities(
3163 9
            $str,
3164 9
            $flags,
3165 9
            $encoding,
3166 9
            $double_encode
3167
        );
3168
3169
        /**
3170
         * PHP doesn't replace a backslash to its html entity since this is something
3171
         * that's mostly used to escape characters when inserting in a database. Since
3172
         * we're using a decent database layer, we don't need this shit and we're replacing
3173
         * the double backslashes by its' html entity equivalent.
3174
         *
3175
         * https://github.com/forkcms/library/blob/master/spoon/filter/filter.php#L303
3176
         */
3177 9
        $str = \str_replace('\\', '&#92;', $str);
3178
3179 9
        return self::html_encode($str, true, $encoding);
3180
    }
3181
3182
    /**
3183
     * Convert only special characters to HTML entities: UTF-8 version of htmlspecialchars()
3184
     *
3185
     * INFO: Take a look at "UTF8::htmlentities()"
3186
     *
3187
     * EXAMPLE: <code>UTF8::htmlspecialchars('<白-öäü>'); // '&lt;白-öäü&gt;'</code>
3188
     *
3189
     * @see http://php.net/manual/en/function.htmlspecialchars.php
3190
     *
3191
     * @param string $str           <p>
3192
     *                              The string being converted.
3193
     *                              </p>
3194
     * @param int    $flags         [optional] <p>
3195
     *                              A bitmask of one or more of the following flags, which specify how to handle
3196
     *                              quotes, invalid code unit sequences and the used document type. The default is
3197
     *                              ENT_COMPAT | ENT_HTML401.
3198
     *                              <table>
3199
     *                              Available <i>flags</i> constants
3200
     *                              <tr valign="top">
3201
     *                              <td>Constant Name</td>
3202
     *                              <td>Description</td>
3203
     *                              </tr>
3204
     *                              <tr valign="top">
3205
     *                              <td><b>ENT_COMPAT</b></td>
3206
     *                              <td>Will convert double-quotes and leave single-quotes alone.</td>
3207
     *                              </tr>
3208
     *                              <tr valign="top">
3209
     *                              <td><b>ENT_QUOTES</b></td>
3210
     *                              <td>Will convert both double and single quotes.</td>
3211
     *                              </tr>
3212
     *                              <tr valign="top">
3213
     *                              <td><b>ENT_NOQUOTES</b></td>
3214
     *                              <td>Will leave both double and single quotes unconverted.</td>
3215
     *                              </tr>
3216
     *                              <tr valign="top">
3217
     *                              <td><b>ENT_IGNORE</b></td>
3218
     *                              <td>
3219
     *                              Silently discard invalid code unit sequences instead of returning
3220
     *                              an empty string. Using this flag is discouraged as it
3221
     *                              may have security implications.
3222
     *                              </td>
3223
     *                              </tr>
3224
     *                              <tr valign="top">
3225
     *                              <td><b>ENT_SUBSTITUTE</b></td>
3226
     *                              <td>
3227
     *                              Replace invalid code unit sequences with a Unicode Replacement Character
3228
     *                              U+FFFD (UTF-8) or &#38;#38;#FFFD; (otherwise) instead of returning an empty
3229
     *                              string.
3230
     *                              </td>
3231
     *                              </tr>
3232
     *                              <tr valign="top">
3233
     *                              <td><b>ENT_DISALLOWED</b></td>
3234
     *                              <td>
3235
     *                              Replace invalid code points for the given document type with a
3236
     *                              Unicode Replacement Character U+FFFD (UTF-8) or &#38;#38;#FFFD;
3237
     *                              (otherwise) instead of leaving them as is. This may be useful, for
3238
     *                              instance, to ensure the well-formedness of XML documents with
3239
     *                              embedded external content.
3240
     *                              </td>
3241
     *                              </tr>
3242
     *                              <tr valign="top">
3243
     *                              <td><b>ENT_HTML401</b></td>
3244
     *                              <td>
3245
     *                              Handle code as HTML 4.01.
3246
     *                              </td>
3247
     *                              </tr>
3248
     *                              <tr valign="top">
3249
     *                              <td><b>ENT_XML1</b></td>
3250
     *                              <td>
3251
     *                              Handle code as XML 1.
3252
     *                              </td>
3253
     *                              </tr>
3254
     *                              <tr valign="top">
3255
     *                              <td><b>ENT_XHTML</b></td>
3256
     *                              <td>
3257
     *                              Handle code as XHTML.
3258
     *                              </td>
3259
     *                              </tr>
3260
     *                              <tr valign="top">
3261
     *                              <td><b>ENT_HTML5</b></td>
3262
     *                              <td>
3263
     *                              Handle code as HTML 5.
3264
     *                              </td>
3265
     *                              </tr>
3266
     *                              </table>
3267
     *                              </p>
3268
     * @param string $encoding      [optional] <p>
3269
     *                              Defines encoding used in conversion.
3270
     *                              </p>
3271
     *                              <p>
3272
     *                              For the purposes of this function, the encodings
3273
     *                              ISO-8859-1, ISO-8859-15,
3274
     *                              UTF-8, cp866,
3275
     *                              cp1251, cp1252, and
3276
     *                              KOI8-R are effectively equivalent, provided the
3277
     *                              <i>string</i> itself is valid for the encoding, as
3278
     *                              the characters affected by <b>htmlspecialchars</b> occupy
3279
     *                              the same positions in all of these encodings.
3280
     *                              </p>
3281
     * @param bool   $double_encode [optional] <p>
3282
     *                              When <i>double_encode</i> is turned off PHP will not
3283
     *                              encode existing html entities, the default is to convert everything.
3284
     *                              </p>
3285
     *
3286
     * @psalm-pure
3287
     *
3288
     * @return string the converted string.
3289
     *                </p>
3290
     *                <p>
3291
     *                If the input <i>string</i> contains an invalid code unit
3292
     *                sequence within the given <i>encoding</i> an empty string
3293
     *                will be returned, unless either the <b>ENT_IGNORE</b> or
3294
     *                <b>ENT_SUBSTITUTE</b> flags are set
3295
     */
3296 8
    public static function htmlspecialchars(
3297
        string $str,
3298
        int $flags = \ENT_COMPAT,
3299
        string $encoding = 'UTF-8',
3300
        bool $double_encode = true
3301
    ): string {
3302 8
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
3303 8
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
3304
        }
3305
3306 8
        return \htmlspecialchars(
3307 8
            $str,
3308 8
            $flags,
3309 8
            $encoding,
3310 8
            $double_encode
3311
        );
3312
    }
3313
3314
    /**
3315
     * Checks whether iconv is available on the server.
3316
     *
3317
     * @psalm-pure
3318
     *
3319
     * @return bool
3320
     *              <p><strong>true</strong> if available, <strong>false</strong> otherwise</p>
3321
     *
3322
     * @internal <p>Please do not use it anymore, we will make is private in next major version.</p>
3323
     */
3324
    public static function iconv_loaded(): bool
3325
    {
3326
        return \extension_loaded('iconv');
3327
    }
3328
3329
    /**
3330
     * Converts Integer to hexadecimal U+xxxx code point representation.
3331
     *
3332
     * INFO: opposite to UTF8::hex_to_int()
3333
     *
3334
     * EXAMPLE: <code>UTF8::int_to_hex(241); // 'U+00f1'</code>
3335
     *
3336
     * @param int    $int    <p>The integer to be converted to hexadecimal code point.</p>
3337
     * @param string $prefix [optional]
3338
     *
3339
     * @psalm-pure
3340
     *
3341
     * @return string the code point, or empty string on failure
3342
     */
3343 6
    public static function int_to_hex(int $int, string $prefix = 'U+'): string
3344
    {
3345 6
        $hex = \dechex($int);
3346
3347 6
        $hex = (\strlen($hex) < 4 ? \substr('0000' . $hex, -4) : $hex);
3348
3349 6
        return $prefix . $hex . '';
3350
    }
3351
3352
    /**
3353
     * Checks whether intl-char is available on the server.
3354
     *
3355
     * @psalm-pure
3356
     *
3357
     * @return bool
3358
     *              <p><strong>true</strong> if available, <strong>false</strong> otherwise</p>
3359
     *
3360
     * @internal <p>Please do not use it anymore, we will make is private in next major version.</p>
3361
     */
3362
    public static function intlChar_loaded(): bool
3363
    {
3364
        return \class_exists('IntlChar');
3365
    }
3366
3367
    /**
3368
     * Checks whether intl is available on the server.
3369
     *
3370
     * @psalm-pure
3371
     *
3372
     * @return bool
3373
     *              <p><strong>true</strong> if available, <strong>false</strong> otherwise</p>
3374
     *
3375
     * @internal <p>Please do not use it anymore, we will make is private in next major version.</p>
3376
     */
3377 5
    public static function intl_loaded(): bool
3378
    {
3379 5
        return \extension_loaded('intl');
3380
    }
3381
3382
    /**
3383
     * Returns true if the string contains only alphabetic chars, false otherwise.
3384
     *
3385
     * @param string $str <p>The input string.</p>
3386
     *
3387
     * @psalm-pure
3388
     *
3389
     * @return bool
3390
     *              <p>Whether or not $str contains only alphabetic chars.</p>
3391
     */
3392 10
    public static function is_alpha(string $str): bool
3393
    {
3394 10
        if (self::$SUPPORT['mbstring'] === true) {
3395 10
            return \mb_ereg_match('^[[:alpha:]]*$', $str);
3396
        }
3397
3398
        return self::str_matches_pattern($str, '^[[:alpha:]]*$');
3399
    }
3400
3401
    /**
3402
     * Returns true if the string contains only alphabetic and numeric chars, false otherwise.
3403
     *
3404
     * @param string $str <p>The input string.</p>
3405
     *
3406
     * @psalm-pure
3407
     *
3408
     * @return bool
3409
     *              <p>Whether or not $str contains only alphanumeric chars.</p>
3410
     */
3411 13
    public static function is_alphanumeric(string $str): bool
3412
    {
3413 13
        if (self::$SUPPORT['mbstring'] === true) {
3414 13
            return \mb_ereg_match('^[[:alnum:]]*$', $str);
3415
        }
3416
3417
        return self::str_matches_pattern($str, '^[[:alnum:]]*$');
3418
    }
3419
3420
    /**
3421
     * Returns true if the string contains only punctuation chars, false otherwise.
3422
     *
3423
     * @param string $str <p>The input string.</p>
3424
     *
3425
     * @psalm-pure
3426
     *
3427
     * @return bool
3428
     *              <p>Whether or not $str contains only punctuation chars.</p>
3429
     */
3430 10
    public static function is_punctuation(string $str): bool
3431
    {
3432 10
        return self::str_matches_pattern($str, '^[[:punct:]]*$');
3433
    }
3434
3435
    /**
3436
     * Returns true if the string contains only printable (non-invisible) chars, false otherwise.
3437
     *
3438
     * @param string $str                       <p>The input string.</p>
3439
     * @param bool   $ignore_control_characters [optional] <p>Ignore control characters like [LRM] or [LSEP].</p>
3440
     *
3441
     * @psalm-pure
3442
     *
3443
     * @return bool
3444
     *              <p>Whether or not $str contains only printable (non-invisible) chars.</p>
3445
     */
3446 1
    public static function is_printable(string $str, bool $ignore_control_characters = false): bool
3447
    {
3448 1
        return self::remove_invisible_characters($str, false, '', $ignore_control_characters) === $str;
3449
    }
3450
3451
    /**
3452
     * Checks if a string is 7 bit ASCII.
3453
     *
3454
     * EXAMPLE: <code>UTF8::is_ascii('白'); // false</code>
3455
     *
3456
     * @param string $str <p>The string to check.</p>
3457
     *
3458
     * @psalm-pure
3459
     *
3460
     * @return bool
3461
     *              <p>
3462
     *              <strong>true</strong> if it is ASCII<br>
3463
     *              <strong>false</strong> otherwise
3464
     *              </p>
3465
     */
3466 8
    public static function is_ascii(string $str): bool
3467
    {
3468 8
        return ASCII::is_ascii($str);
3469
    }
3470
3471
    /**
3472
     * Returns true if the string is base64 encoded, false otherwise.
3473
     *
3474
     * EXAMPLE: <code>UTF8::is_base64('4KSu4KWL4KSo4KS/4KSa'); // true</code>
3475
     *
3476
     * @param string|null $str                   <p>The input string.</p>
3477
     * @param bool        $empty_string_is_valid [optional] <p>Is an empty string valid base64 or not?</p>
3478
     *
3479
     * @psalm-pure
3480
     *
3481
     * @return bool
3482
     *              <p>Whether or not $str is base64 encoded.</p>
3483
     */
3484 16
    public static function is_base64($str, bool $empty_string_is_valid = false): bool
3485
    {
3486
        if (
3487 16
            !$empty_string_is_valid
3488
            &&
3489 16
            $str === ''
3490
        ) {
3491 3
            return false;
3492
        }
3493
3494 15
        if (!\is_string($str)) {
3495 2
            return false;
3496
        }
3497
3498 15
        $base64String = \base64_decode($str, true);
3499
3500 15
        return $base64String !== false && \base64_encode($base64String) === $str;
3501
    }
3502
3503
    /**
3504
     * Check if the input is binary... (is look like a hack).
3505
     *
3506
     * EXAMPLE: <code>UTF8::is_binary(01); // true</code>
3507
     *
3508
     * @param int|string $input
3509
     * @param bool       $strict
3510
     *
3511
     * @psalm-pure
3512
     *
3513
     * @return bool
3514
     */
3515 39
    public static function is_binary($input, bool $strict = false): bool
3516
    {
3517 39
        $input = (string) $input;
3518 39
        if ($input === '') {
3519 10
            return false;
3520
        }
3521
3522 39
        if (\preg_match('~^[01]+$~', $input)) {
3523 13
            return true;
3524
        }
3525
3526 39
        $ext = self::get_file_type($input);
3527 39
        if ($ext['type'] === 'binary') {
3528 7
            return true;
3529
        }
3530
3531 38
        if (!$strict) {
3532 7
            $test_length = \strlen($input);
3533 7
            $test_null_counting = \substr_count($input, "\x0", 0, $test_length);
3534 7
            if (($test_null_counting / $test_length) > 0.25) {
3535 5
                return true;
3536
            }
3537
        }
3538
3539 38
        if ($strict) {
3540 38
            if (self::$SUPPORT['finfo'] === false) {
3541
                throw new \RuntimeException('ext-fileinfo: is not installed');
3542
            }
3543
3544
            /**
3545
             * @psalm-suppress ImpureMethodCall - it will return the same result for the same file ...
3546
             */
3547 38
            $finfo_encoding = (new \finfo(\FILEINFO_MIME_ENCODING))->buffer($input);
3548 38
            if ($finfo_encoding && $finfo_encoding === 'binary') {
3549 20
                return true;
3550
            }
3551
        }
3552
3553 33
        return false;
3554
    }
3555
3556
    /**
3557
     * Check if the file is binary.
3558
     *
3559
     * EXAMPLE: <code>UTF8::is_binary('./utf32.txt'); // true</code>
3560
     *
3561
     * @param string $file
3562
     *
3563
     * @return bool
3564
     */
3565 6
    public static function is_binary_file($file): bool
3566
    {
3567
        // init
3568 6
        $block = '';
3569
3570 6
        $fp = \fopen($file, 'rb');
3571 6
        if (\is_resource($fp)) {
3572 6
            $block = \fread($fp, 512);
3573 6
            \fclose($fp);
3574
        }
3575
3576 6
        if ($block === '' || $block === false) {
3577 2
            return false;
3578
        }
3579
3580 6
        return self::is_binary($block, true);
3581
    }
3582
3583
    /**
3584
     * Returns true if the string contains only whitespace chars, false otherwise.
3585
     *
3586
     * @param string $str <p>The input string.</p>
3587
     *
3588
     * @psalm-pure
3589
     *
3590
     * @return bool
3591
     *              <p>Whether or not $str contains only whitespace characters.</p>
3592
     */
3593 15
    public static function is_blank(string $str): bool
3594
    {
3595 15
        if (self::$SUPPORT['mbstring'] === true) {
3596 15
            return \mb_ereg_match('^[[:space:]]*$', $str);
3597
        }
3598
3599
        return self::str_matches_pattern($str, '^[[:space:]]*$');
3600
    }
3601
3602
    /**
3603
     * Checks if the given string is equal to any "Byte Order Mark".
3604
     *
3605
     * WARNING: Use "UTF8::string_has_bom()" if you will check BOM in a string.
3606
     *
3607
     * EXAMPLE: <code>UTF8::is_bom("\xef\xbb\xbf"); // true</code>
3608
     *
3609
     * @param string $str <p>The input string.</p>
3610
     *
3611
     * @psalm-pure
3612
     *
3613
     * @return bool
3614
     *              <p><strong>true</strong> if the $utf8_chr is Byte Order Mark, <strong>false</strong> otherwise.</p>
3615
     */
3616 2
    public static function is_bom($str): bool
3617
    {
3618
        /** @noinspection PhpUnusedLocalVariableInspection */
3619 2
        foreach (self::$BOM as $bom_string => &$bom_byte_length) {
3620 2
            if ($str === $bom_string) {
3621 2
                return true;
3622
            }
3623
        }
3624
3625 2
        return false;
3626
    }
3627
3628
    /**
3629
     * Determine whether the string is considered to be empty.
3630
     *
3631
     * A variable is considered empty if it does not exist or if its value equals FALSE.
3632
     * empty() does not generate a warning if the variable does not exist.
3633
     *
3634
     * @param array|float|int|string $str
3635
     *
3636
     * @psalm-pure
3637
     *
3638
     * @return bool
3639
     *              <p>Whether or not $str is empty().</p>
3640
     */
3641 1
    public static function is_empty($str): bool
3642
    {
3643 1
        return empty($str);
3644
    }
3645
3646
    /**
3647
     * Returns true if the string contains only hexadecimal chars, false otherwise.
3648
     *
3649
     * @param string $str <p>The input string.</p>
3650
     *
3651
     * @psalm-pure
3652
     *
3653
     * @return bool
3654
     *              <p>Whether or not $str contains only hexadecimal chars.</p>
3655
     */
3656 13
    public static function is_hexadecimal(string $str): bool
3657
    {
3658 13
        if (self::$SUPPORT['mbstring'] === true) {
3659 13
            return \mb_ereg_match('^[[:xdigit:]]*$', $str);
3660
        }
3661
3662
        return self::str_matches_pattern($str, '^[[:xdigit:]]*$');
3663
    }
3664
3665
    /**
3666
     * Check if the string contains any HTML tags.
3667
     *
3668
     * EXAMPLE: <code>UTF8::is_html('<b>lall</b>'); // true</code>
3669
     *
3670
     * @param string $str <p>The input string.</p>
3671
     *
3672
     * @psalm-pure
3673
     *
3674
     * @return bool
3675
     *              <p>Whether or not $str contains html elements.</p>
3676
     */
3677 3
    public static function is_html(string $str): bool
3678
    {
3679 3
        if ($str === '') {
3680 3
            return false;
3681
        }
3682
3683
        // init
3684 3
        $matches = [];
3685
3686 3
        $str = self::emoji_encode($str); // hack for emoji support :/
3687
3688 3
        \preg_match("/<\\/?\\w+(?:(?:\\s+\\w+(?:\\s*=\\s*(?:\".*?\"|'.*?'|[^'\">\\s]+))?)*\\s*|\\s*)\\/?>/u", $str, $matches);
3689
3690 3
        return $matches !== [];
3691
    }
3692
3693
    /**
3694
     * Check if $url is an correct url.
3695
     *
3696
     * @param string $url
3697
     * @param bool   $disallow_localhost
3698
     *
3699
     * @psalm-pure
3700
     *
3701
     * @return bool
3702
     */
3703 1
    public static function is_url(string $url, bool $disallow_localhost = false): bool
3704
    {
3705 1
        if ($url === '') {
3706 1
            return false;
3707
        }
3708
3709
        // WARNING: keep this as hack protection
3710 1
        if (!self::str_istarts_with_any($url, ['http://', 'https://'])) {
3711 1
            return false;
3712
        }
3713
3714
        // e.g. -> the server itself connect to "https://foo.localhost/phpmyadmin/...
3715 1
        if ($disallow_localhost) {
3716 1
            if (self::str_istarts_with_any(
3717 1
                $url,
3718
                [
3719 1
                    'http://localhost',
3720
                    'https://localhost',
3721
                    'http://127.0.0.1',
3722
                    'https://127.0.0.1',
3723
                    'http://::1',
3724
                    'https://::1',
3725
                ]
3726
            )) {
3727 1
                return false;
3728
            }
3729
3730 1
            $regex = '/^(?:http(?:s)?:\/\/).*?(?:\.localhost)/iu';
3731 1
            if (\preg_match($regex, $url)) {
3732 1
                return false;
3733
            }
3734
        }
3735
3736
        // INFO: this is needed for e.g. "http://müller.de/" (internationalized domain names) and non ASCII-parameters
3737 1
        $regex = '/^(?:http(?:s)?:\\/\\/)(?:[\p{L}0-9][\p{L}0-9_-]*(?:\\.[\p{L}0-9][\p{L}0-9_-]*))(?:\\d+)?(?:\\/\\.*)?/iu';
3738 1
        if (\preg_match($regex, $url)) {
3739 1
            return true;
3740
        }
3741
3742 1
        return \filter_var($url, \FILTER_VALIDATE_URL) !== false;
3743
    }
3744
3745
    /**
3746
     * Try to check if "$str" is a JSON-string.
3747
     *
3748
     * EXAMPLE: <code>UTF8::is_json('{"array":[1,"¥","ä"]}'); // true</code>
3749
     *
3750
     * @param string $str                                    <p>The input string.</p>
3751
     * @param bool   $only_array_or_object_results_are_valid [optional] <p>Only array and objects are valid json
3752
     *                                                       results.</p>
3753
     *
3754
     * @return bool
3755
     *              <p>Whether or not the $str is in JSON format.</p>
3756
     */
3757 42
    public static function is_json(string $str, bool $only_array_or_object_results_are_valid = true): bool
3758
    {
3759 42
        if ($str === '') {
3760 4
            return false;
3761
        }
3762
3763 40
        if (self::$SUPPORT['json'] === false) {
3764
            throw new \RuntimeException('ext-json: is not installed');
3765
        }
3766
3767 40
        $jsonOrNull = self::json_decode($str);
3768 40
        if ($jsonOrNull === null && \strtoupper($str) !== 'NULL') {
3769 18
            return false;
3770
        }
3771
3772
        if (
3773 24
            $only_array_or_object_results_are_valid
3774
            &&
3775 24
            !\is_object($jsonOrNull)
3776
            &&
3777 24
            !\is_array($jsonOrNull)
3778
        ) {
3779 5
            return false;
3780
        }
3781
3782 19
        return \json_last_error() === \JSON_ERROR_NONE;
3783
    }
3784
3785
    /**
3786
     * @param string $str <p>The input string.</p>
3787
     *
3788
     * @psalm-pure
3789
     *
3790
     * @return bool
3791
     *              <p>Whether or not $str contains only lowercase chars.</p>
3792
     */
3793 8
    public static function is_lowercase(string $str): bool
3794
    {
3795 8
        if (self::$SUPPORT['mbstring'] === true) {
3796 8
            return \mb_ereg_match('^[[:lower:]]*$', $str);
3797
        }
3798
3799
        return self::str_matches_pattern($str, '^[[:lower:]]*$');
3800
    }
3801
3802
    /**
3803
     * Returns true if the string is serialized, false otherwise.
3804
     *
3805
     * @param string $str <p>The input string.</p>
3806
     *
3807
     * @psalm-pure
3808
     *
3809
     * @return bool
3810
     *              <p>Whether or not $str is serialized.</p>
3811
     */
3812 7
    public static function is_serialized(string $str): bool
3813
    {
3814 7
        if ($str === '') {
3815 1
            return false;
3816
        }
3817
3818
        /** @noinspection PhpUsageOfSilenceOperatorInspection */
3819
        /** @noinspection UnserializeExploitsInspection */
3820 6
        return $str === 'b:0;'
3821
               ||
3822 6
               @\unserialize($str, []) !== false;
3823
    }
3824
3825
    /**
3826
     * Returns true if the string contains only lower case chars, false
3827
     * otherwise.
3828
     *
3829
     * @param string $str <p>The input string.</p>
3830
     *
3831
     * @psalm-pure
3832
     *
3833
     * @return bool
3834
     *              <p>Whether or not $str contains only lower case characters.</p>
3835
     */
3836 8
    public static function is_uppercase(string $str): bool
3837
    {
3838 8
        if (self::$SUPPORT['mbstring'] === true) {
3839 8
            return \mb_ereg_match('^[[:upper:]]*$', $str);
3840
        }
3841
3842
        return self::str_matches_pattern($str, '^[[:upper:]]*$');
3843
    }
3844
3845
    /**
3846
     * Check if the string is UTF-16.
3847
     *
3848
     * EXAMPLE: <code>
3849
     * UTF8::is_utf16(file_get_contents('utf-16-le.txt')); // 1
3850
     * //
3851
     * UTF8::is_utf16(file_get_contents('utf-16-be.txt')); // 2
3852
     * //
3853
     * UTF8::is_utf16(file_get_contents('utf-8.txt')); // false
3854
     * </code>
3855
     *
3856
     * @param string $str                       <p>The input string.</p>
3857
     * @param bool   $check_if_string_is_binary
3858
     *
3859
     * @psalm-pure
3860
     *
3861
     * @return false|int
3862
     *                   <strong>false</strong> if is't not UTF-16,<br>
3863
     *                   <strong>1</strong> for UTF-16LE,<br>
3864
     *                   <strong>2</strong> for UTF-16BE
3865
     */
3866 21
    public static function is_utf16($str, bool $check_if_string_is_binary = true)
3867
    {
3868
        // init
3869 21
        $str = (string) $str;
3870 21
        $str_chars = [];
3871
3872
        // fix for the "binary"-check
3873 21
        if (self::string_has_bom($str)) {
3874 3
            $check_if_string_is_binary = false;
3875
        }
3876
3877
        if (
3878 21
            $check_if_string_is_binary
3879
            &&
3880 21
            !self::is_binary($str, true)
3881
        ) {
3882 2
            return false;
3883
        }
3884
3885 21
        if (self::$SUPPORT['mbstring'] === false) {
3886
            /**
3887
             * @psalm-suppress ImpureFunctionCall - is is only a warning
3888
             */
3889 3
            \trigger_error('UTF8::is_utf16() without mbstring may did not work correctly', \E_USER_WARNING);
3890
        }
3891
3892 21
        $str = self::remove_bom($str);
3893
3894 21
        $maybe_utf16le = 0;
3895 21
        $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-16LE');
3896 21
        if ($test) {
3897 15
            $test2 = \mb_convert_encoding($test, 'UTF-16LE', 'UTF-8');
3898 15
            $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-16LE');
3899 15
            if ($test3 === $test) {
3900
                /**
3901
                 * @psalm-suppress RedundantCondition
3902
                 */
3903 15
                if ($str_chars === []) {
3904 15
                    $str_chars = self::count_chars($str, true, false);
3905
                }
3906 15
                foreach (self::count_chars($test3) as $test3char => &$test3charEmpty) {
0 ignored issues
show
Bug introduced by
It seems like $test3 can also be of type array; however, parameter $str of voku\helper\UTF8::count_chars() does only seem to accept string, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

3906
                foreach (self::count_chars(/** @scrutinizer ignore-type */ $test3) as $test3char => &$test3charEmpty) {
Loading history...
Bug introduced by
The expression self::count_chars($test3) cannot be used as a reference.

Let?s assume that you have the following foreach statement:

foreach ($array as &$itemValue) { }

$itemValue is assigned by reference. This is possible because the expression (in the example $array) can be used as a reference target.

However, if we were to replace $array with something different like the result of a function call as in

foreach (getArray() as &$itemValue) { }

then assigning by reference is not possible anymore as there is no target that could be modified.

Available Fixes

1. Do not assign by reference
foreach (getArray() as $itemValue) { }
2. Assign to a local variable first
$array = getArray();
foreach ($array as &$itemValue) {}
3. Return a reference
function &getArray() { $array = array(); return $array; }

foreach (getArray() as &$itemValue) { }
Loading history...
3907 15
                    if (\in_array($test3char, $str_chars, true)) {
3908 5
                        ++$maybe_utf16le;
3909
                    }
3910
                }
3911 15
                unset($test3charEmpty);
3912
            }
3913
        }
3914
3915 21
        $maybe_utf16be = 0;
3916 21
        $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-16BE');
3917 21
        if ($test) {
3918 15
            $test2 = \mb_convert_encoding($test, 'UTF-16BE', 'UTF-8');
3919 15
            $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-16BE');
3920 15
            if ($test3 === $test) {
3921 15
                if ($str_chars === []) {
3922 7
                    $str_chars = self::count_chars($str, true, false);
3923
                }
3924 15
                foreach (self::count_chars($test3) as $test3char => &$test3charEmpty) {
0 ignored issues
show
Bug introduced by
The expression self::count_chars($test3) cannot be used as a reference.

Let?s assume that you have the following foreach statement:

foreach ($array as &$itemValue) { }

$itemValue is assigned by reference. This is possible because the expression (in the example $array) can be used as a reference target.

However, if we were to replace $array with something different like the result of a function call as in

foreach (getArray() as &$itemValue) { }

then assigning by reference is not possible anymore as there is no target that could be modified.

Available Fixes

1. Do not assign by reference
foreach (getArray() as $itemValue) { }
2. Assign to a local variable first
$array = getArray();
foreach ($array as &$itemValue) {}
3. Return a reference
function &getArray() { $array = array(); return $array; }

foreach (getArray() as &$itemValue) { }
Loading history...
3925 15
                    if (\in_array($test3char, $str_chars, true)) {
3926 6
                        ++$maybe_utf16be;
3927
                    }
3928
                }
3929 15
                unset($test3charEmpty);
3930
            }
3931
        }
3932
3933 21
        if ($maybe_utf16be !== $maybe_utf16le) {
3934 7
            if ($maybe_utf16le > $maybe_utf16be) {
3935 5
                return 1;
3936
            }
3937
3938 6
            return 2;
3939
        }
3940
3941 17
        return false;
3942
    }
3943
3944
    /**
3945
     * Check if the string is UTF-32.
3946
     *
3947
     * EXAMPLE: <code>
3948
     * UTF8::is_utf32(file_get_contents('utf-32-le.txt')); // 1
3949
     * //
3950
     * UTF8::is_utf32(file_get_contents('utf-32-be.txt')); // 2
3951
     * //
3952
     * UTF8::is_utf32(file_get_contents('utf-8.txt')); // false
3953
     * </code>
3954
     *
3955
     * @param string $str                       <p>The input string.</p>
3956
     * @param bool   $check_if_string_is_binary
3957
     *
3958
     * @psalm-pure
3959
     *
3960
     * @return false|int
3961
     *                   <strong>false</strong> if is't not UTF-32,<br>
3962
     *                   <strong>1</strong> for UTF-32LE,<br>
3963
     *                   <strong>2</strong> for UTF-32BE
3964
     */
3965 19
    public static function is_utf32($str, bool $check_if_string_is_binary = true)
3966
    {
3967
        // init
3968 19
        $str = (string) $str;
3969 19
        $str_chars = [];
3970
3971
        if (
3972 19
            $check_if_string_is_binary
3973
            &&
3974 19
            !self::is_binary($str, true)
3975
        ) {
3976 2
            return false;
3977
        }
3978
3979 19
        if (self::$SUPPORT['mbstring'] === false) {
3980
            /**
3981
             * @psalm-suppress ImpureFunctionCall - is is only a warning
3982
             */
3983 3
            \trigger_error('UTF8::is_utf32() without mbstring may did not work correctly', \E_USER_WARNING);
3984
        }
3985
3986 19
        $str = self::remove_bom($str);
3987
3988 19
        $maybe_utf32le = 0;
3989 19
        $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-32LE');
3990 19
        if ($test) {
3991 13
            $test2 = \mb_convert_encoding($test, 'UTF-32LE', 'UTF-8');
3992 13
            $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-32LE');
3993 13
            if ($test3 === $test) {
3994
                /**
3995
                 * @psalm-suppress RedundantCondition
3996
                 */
3997 13
                if ($str_chars === []) {
3998 13
                    $str_chars = self::count_chars($str, true, false);
3999
                }
4000 13
                foreach (self::count_chars($test3) as $test3char => &$test3charEmpty) {
0 ignored issues
show
Bug introduced by
The expression self::count_chars($test3) cannot be used as a reference.

Let?s assume that you have the following foreach statement:

foreach ($array as &$itemValue) { }

$itemValue is assigned by reference. This is possible because the expression (in the example $array) can be used as a reference target.

However, if we were to replace $array with something different like the result of a function call as in

foreach (getArray() as &$itemValue) { }

then assigning by reference is not possible anymore as there is no target that could be modified.

Available Fixes

1. Do not assign by reference
foreach (getArray() as $itemValue) { }
2. Assign to a local variable first
$array = getArray();
foreach ($array as &$itemValue) {}
3. Return a reference
function &getArray() { $array = array(); return $array; }

foreach (getArray() as &$itemValue) { }
Loading history...
Bug introduced by
It seems like $test3 can also be of type array; however, parameter $str of voku\helper\UTF8::count_chars() does only seem to accept string, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

4000
                foreach (self::count_chars(/** @scrutinizer ignore-type */ $test3) as $test3char => &$test3charEmpty) {
Loading history...
4001 13
                    if (\in_array($test3char, $str_chars, true)) {
4002 2
                        ++$maybe_utf32le;
4003
                    }
4004
                }
4005 13
                unset($test3charEmpty);
4006
            }
4007
        }
4008
4009 19
        $maybe_utf32be = 0;
4010 19
        $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-32BE');
4011 19
        if ($test) {
4012 13
            $test2 = \mb_convert_encoding($test, 'UTF-32BE', 'UTF-8');
4013 13
            $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-32BE');
4014 13
            if ($test3 === $test) {
4015 13
                if ($str_chars === []) {
4016 7
                    $str_chars = self::count_chars($str, true, false);
4017
                }
4018 13
                foreach (self::count_chars($test3) as $test3char => &$test3charEmpty) {
0 ignored issues
show
Bug introduced by
The expression self::count_chars($test3) cannot be used as a reference.

Let?s assume that you have the following foreach statement:

foreach ($array as &$itemValue) { }

$itemValue is assigned by reference. This is possible because the expression (in the example $array) can be used as a reference target.

However, if we were to replace $array with something different like the result of a function call as in

foreach (getArray() as &$itemValue) { }

then assigning by reference is not possible anymore as there is no target that could be modified.

Available Fixes

1. Do not assign by reference
foreach (getArray() as $itemValue) { }
2. Assign to a local variable first
$array = getArray();
foreach ($array as &$itemValue) {}
3. Return a reference
function &getArray() { $array = array(); return $array; }

foreach (getArray() as &$itemValue) { }
Loading history...
4019 13
                    if (\in_array($test3char, $str_chars, true)) {
4020 3
                        ++$maybe_utf32be;
4021
                    }
4022
                }
4023 13
                unset($test3charEmpty);
4024
            }
4025
        }
4026
4027 19
        if ($maybe_utf32be !== $maybe_utf32le) {
4028 3
            if ($maybe_utf32le > $maybe_utf32be) {
4029 2
                return 1;
4030
            }
4031
4032 3
            return 2;
4033
        }
4034
4035 19
        return false;
4036
    }
4037
4038
    /**
4039
     * Checks whether the passed input contains only byte sequences that appear valid UTF-8.
4040
     *
4041
     * EXAMPLE: <code>
4042
     * UTF8::is_utf8(['Iñtërnâtiônàlizætiøn', 'foo']); // true
4043
     * //
4044
     * UTF8::is_utf8(["Iñtërnâtiônàlizætiøn\xA0\xA1", 'bar']); // false
4045
     * </code>
4046
     *
4047
     * @param int|string|string[]|null $str    <p>The input to be checked.</p>
4048
     * @param bool                     $strict <p>Check also if the string is not UTF-16 or UTF-32.</p>
4049
     *
4050
     * @psalm-pure
4051
     *
4052
     * @return bool
4053
     */
4054 83
    public static function is_utf8($str, bool $strict = false): bool
4055
    {
4056 83
        if (\is_array($str)) {
4057 2
            foreach ($str as &$v) {
4058 2
                if (!self::is_utf8($v, $strict)) {
4059 2
                    return false;
4060
                }
4061
            }
4062
4063
            return true;
4064
        }
4065
4066 83
        return self::is_utf8_string((string) $str, $strict);
4067
    }
4068
4069
    /**
4070
     * (PHP 5 &gt;= 5.2.0, PECL json &gt;= 1.2.0)<br/>
4071
     * Decodes a JSON string
4072
     *
4073
     * EXAMPLE: <code>UTF8::json_decode('[1,"\u00a5","\u00e4"]'); // array(1, '¥', 'ä')</code>
4074
     *
4075
     * @see http://php.net/manual/en/function.json-decode.php
4076
     *
4077
     * @param string $json    <p>
4078
     *                        The <i>json</i> string being decoded.
4079
     *                        </p>
4080
     *                        <p>
4081
     *                        This function only works with UTF-8 encoded strings.
4082
     *                        </p>
4083
     *                        <p>PHP implements a superset of
4084
     *                        JSON - it will also encode and decode scalar types and <b>NULL</b>. The JSON standard
4085
     *                        only supports these values when they are nested inside an array or an object.
4086
     *                        </p>
4087
     * @param bool   $assoc   [optional] <p>
4088
     *                        When <b>TRUE</b>, returned objects will be converted into
4089
     *                        associative arrays.
4090
     *                        </p>
4091
     * @param int    $depth   [optional] <p>
4092
     *                        User specified recursion depth.
4093
     *                        </p>
4094
     * @param int    $options [optional] <p>
4095
     *                        Bitmask of JSON decode options. Currently only
4096
     *                        <b>JSON_BIGINT_AS_STRING</b>
4097
     *                        is supported (default is to cast large integers as floats)
4098
     *                        </p>
4099
     *
4100
     * @psalm-pure
4101
     *
4102
     * @return mixed
4103
     *               <p>The value encoded in <i>json</i> in appropriate PHP type. Values true, false and
4104
     *               null (case-insensitive) are returned as <b>TRUE</b>, <b>FALSE</b> and <b>NULL</b> respectively.
4105
     *               <b>NULL</b> is returned if the <i>json</i> cannot be decoded or if the encoded data
4106
     *               is deeper than the recursion limit.</p>
4107
     */
4108 43
    public static function json_decode(
4109
        string $json,
4110
        bool $assoc = false,
4111
        int $depth = 512,
4112
        int $options = 0
4113
    ) {
4114 43
        $json = self::filter($json);
4115
4116 43
        if (self::$SUPPORT['json'] === false) {
4117
            throw new \RuntimeException('ext-json: is not installed');
4118
        }
4119
4120 43
        if ($depth < 1) {
4121
            $depth = 1;
4122
        }
4123
4124 43
        return \json_decode($json, $assoc, $depth, $options);
4125
    }
4126
4127
    /**
4128
     * (PHP 5 &gt;= 5.2.0, PECL json &gt;= 1.2.0)<br/>
4129
     * Returns the JSON representation of a value.
4130
     *
4131
     * EXAMPLE: <code>UTF8::json_enocde(array(1, '¥', 'ä')); // '[1,"\u00a5","\u00e4"]'</code>
4132
     *
4133
     * @see http://php.net/manual/en/function.json-encode.php
4134
     *
4135
     * @param mixed $value   <p>
4136
     *                       The <i>value</i> being encoded. Can be any type except
4137
     *                       a resource.
4138
     *                       </p>
4139
     *                       <p>
4140
     *                       All string data must be UTF-8 encoded.
4141
     *                       </p>
4142
     *                       <p>PHP implements a superset of
4143
     *                       JSON - it will also encode and decode scalar types and <b>NULL</b>. The JSON standard
4144
     *                       only supports these values when they are nested inside an array or an object.
4145
     *                       </p>
4146
     * @param int   $options [optional] <p>
4147
     *                       Bitmask consisting of <b>JSON_HEX_QUOT</b>,
4148
     *                       <b>JSON_HEX_TAG</b>,
4149
     *                       <b>JSON_HEX_AMP</b>,
4150
     *                       <b>JSON_HEX_APOS</b>,
4151
     *                       <b>JSON_NUMERIC_CHECK</b>,
4152
     *                       <b>JSON_PRETTY_PRINT</b>,
4153
     *                       <b>JSON_UNESCAPED_SLASHES</b>,
4154
     *                       <b>JSON_FORCE_OBJECT</b>,
4155
     *                       <b>JSON_UNESCAPED_UNICODE</b>. The behaviour of these
4156
     *                       constants is described on
4157
     *                       the JSON constants page.
4158
     *                       </p>
4159
     * @param int   $depth   [optional] <p>
4160
     *                       Set the maximum depth. Must be greater than zero.
4161
     *                       </p>
4162
     *
4163
     * @psalm-pure
4164
     *
4165
     * @return false|string
4166
     *                      A JSON encoded <strong>string</strong> on success or<br>
4167
     *                      <strong>FALSE</strong> on failure
4168
     */
4169 5
    public static function json_encode($value, int $options = 0, int $depth = 512)
4170
    {
4171 5
        $value = self::filter($value);
4172
4173 5
        if (self::$SUPPORT['json'] === false) {
4174
            throw new \RuntimeException('ext-json: is not installed');
4175
        }
4176
4177 5
        if ($depth < 1) {
4178
            $depth = 1;
4179
        }
4180
4181 5
        return \json_encode($value, $options, $depth);
4182
    }
4183
4184
    /**
4185
     * Checks whether JSON is available on the server.
4186
     *
4187
     * @psalm-pure
4188
     *
4189
     * @return bool
4190
     *              <p><strong>true</strong> if available, <strong>false</strong> otherwise</p>
4191
     *
4192
     * @internal <p>Please do not use it anymore, we will make is private in next major version.</p>
4193
     */
4194
    public static function json_loaded(): bool
4195
    {
4196
        return \function_exists('json_decode');
4197
    }
4198
4199
    /**
4200
     * Makes string's first char lowercase.
4201
     *
4202
     * EXAMPLE: <code>UTF8::lcfirst('ÑTËRNÂTIÔNÀLIZÆTIØN'); // ñTËRNÂTIÔNÀLIZÆTIØN</code>
4203
     *
4204
     * @param string      $str                           <p>The input string</p>
4205
     * @param string      $encoding                      [optional] <p>Set the charset for e.g. "mb_" function</p>
4206
     * @param bool        $clean_utf8                    [optional] <p>Remove non UTF-8 chars from the string.</p>
4207
     * @param string|null $lang                          [optional] <p>Set the language for special cases: az, el, lt,
4208
     *                                                   tr</p>
4209
     * @param bool        $try_to_keep_the_string_length [optional] <p>true === try to keep the string length: e.g. ẞ
4210
     *                                                   -> ß</p>
4211
     *
4212
     * @psalm-pure
4213
     *
4214
     * @return string the resulting string
4215
     */
4216 46
    public static function lcfirst(
4217
        string $str,
4218
        string $encoding = 'UTF-8',
4219
        bool $clean_utf8 = false,
4220
        string $lang = null,
4221
        bool $try_to_keep_the_string_length = false
4222
    ): string {
4223 46
        if ($clean_utf8) {
4224
            $str = self::clean($str);
4225
        }
4226
4227 46
        $use_mb_functions = ($lang === null && !$try_to_keep_the_string_length);
4228
4229 46
        if ($encoding === 'UTF-8') {
4230 43
            $str_part_two = (string) \mb_substr($str, 1);
4231
4232 43
            if ($use_mb_functions) {
4233 43
                $str_part_one = \mb_strtolower(
4234 43
                    (string) \mb_substr($str, 0, 1)
4235
                );
4236
            } else {
4237
                $str_part_one = self::strtolower(
4238 43
                    (string) \mb_substr($str, 0, 1),
4239
                    $encoding,
4240
                    false,
4241
                    $lang,
4242
                    $try_to_keep_the_string_length
4243
                );
4244
            }
4245
        } else {
4246 3
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
4247
4248 3
            $str_part_two = (string) self::substr($str, 1, null, $encoding);
4249
4250 3
            $str_part_one = self::strtolower(
4251 3
                (string) self::substr($str, 0, 1, $encoding),
4252
                $encoding,
4253 3
                false,
4254
                $lang,
4255
                $try_to_keep_the_string_length
4256
            );
4257
        }
4258
4259 46
        return $str_part_one . $str_part_two;
4260
    }
4261
4262
    /**
4263
     * Lowercase for all words in the string.
4264
     *
4265
     * @param string      $str                           <p>The input string.</p>
4266
     * @param string[]    $exceptions                    [optional] <p>Exclusion for some words.</p>
4267
     * @param string      $char_list                     [optional] <p>Additional chars that contains to words and do
4268
     *                                                   not start a new word.</p>
4269
     * @param string      $encoding                      [optional] <p>Set the charset.</p>
4270
     * @param bool        $clean_utf8                    [optional] <p>Remove non UTF-8 chars from the string.</p>
4271
     * @param string|null $lang                          [optional] <p>Set the language for special cases: az, el, lt,
4272
     *                                                   tr</p>
4273
     * @param bool        $try_to_keep_the_string_length [optional] <p>true === try to keep the string length: e.g. ẞ
4274
     *                                                   -> ß</p>
4275
     *
4276
     * @psalm-pure
4277
     *
4278
     * @return string
4279
     */
4280 4
    public static function lcwords(
4281
        string $str,
4282
        array $exceptions = [],
4283
        string $char_list = '',
4284
        string $encoding = 'UTF-8',
4285
        bool $clean_utf8 = false,
4286
        string $lang = null,
4287
        bool $try_to_keep_the_string_length = false
4288
    ): string {
4289 4
        if (!$str) {
4290 2
            return '';
4291
        }
4292
4293 4
        $words = self::str_to_words($str, $char_list);
4294 4
        $use_exceptions = $exceptions !== [];
4295
4296 4
        $words_str = '';
4297 4
        foreach ($words as &$word) {
4298 4
            if (!$word) {
4299 4
                continue;
4300
            }
4301
4302
            if (
4303 4
                !$use_exceptions
4304
                ||
4305 4
                !\in_array($word, $exceptions, true)
4306
            ) {
4307 4
                $words_str .= self::lcfirst($word, $encoding, $clean_utf8, $lang, $try_to_keep_the_string_length);
4308
            } else {
4309 2
                $words_str .= $word;
4310
            }
4311
        }
4312
4313 4
        return $words_str;
4314
    }
4315
4316
    /**
4317
     * Strip whitespace or other characters from the beginning of a UTF-8 string.
4318
     *
4319
     * EXAMPLE: <code>UTF8::ltrim(' 中文空白  '); // '中文空白  '</code>
4320
     *
4321
     * @param string      $str   <p>The string to be trimmed</p>
4322
     * @param string|null $chars <p>Optional characters to be stripped</p>
4323
     *
4324
     * @psalm-pure
4325
     *
4326
     * @return string the string with unwanted characters stripped from the left
4327
     */
4328 23
    public static function ltrim(string $str = '', string $chars = null): string
4329
    {
4330 23
        if ($str === '') {
4331 3
            return '';
4332
        }
4333
4334 22
        if (self::$SUPPORT['mbstring'] === true) {
4335 22
            if ($chars !== null) {
4336
                /** @noinspection PregQuoteUsageInspection */
4337 11
                $chars = \preg_quote($chars);
4338 11
                $pattern = "^[${chars}]+";
4339
            } else {
4340 14
                $pattern = '^[\\s]+';
4341
            }
4342
4343 22
            return (string) \mb_ereg_replace($pattern, '', $str);
4344
        }
4345
4346
        if ($chars !== null) {
4347
            $chars = \preg_quote($chars, '/');
4348
            $pattern = "^[${chars}]+";
4349
        } else {
4350
            $pattern = '^[\\s]+';
4351
        }
4352
4353
        return self::regex_replace($str, $pattern, '');
4354
    }
4355
4356
    /**
4357
     * Returns the UTF-8 character with the maximum code point in the given data.
4358
     *
4359
     * EXAMPLE: <code>UTF8::max('abc-äöü-中文空白'); // 'ø'</code>
4360
     *
4361
     * @param array<string>|string $arg <p>A UTF-8 encoded string or an array of such strings.</p>
4362
     *
4363
     * @psalm-pure
4364
     *
4365
     * @return string|null the character with the highest code point than others, returns null on failure or empty input
4366
     */
4367 2
    public static function max($arg)
4368
    {
4369 2
        if (\is_array($arg)) {
4370 2
            $arg = \implode('', $arg);
4371
        }
4372
4373 2
        $codepoints = self::codepoints($arg);
4374 2
        if ($codepoints === []) {
4375 2
            return null;
4376
        }
4377
4378 2
        $codepoint_max = \max($codepoints);
4379
4380 2
        return self::chr((int) $codepoint_max);
4381
    }
4382
4383
    /**
4384
     * Calculates and returns the maximum number of bytes taken by any
4385
     * UTF-8 encoded character in the given string.
4386
     *
4387
     * EXAMPLE: <code>UTF8::max_chr_width('Intërnâtiônàlizætiøn'); // 2</code>
4388
     *
4389
     * @param string $str <p>The original Unicode string.</p>
4390
     *
4391
     * @psalm-pure
4392
     *
4393
     * @return int
4394
     *             <p>Max byte lengths of the given chars.</p>
4395
     */
4396 2
    public static function max_chr_width(string $str): int
4397
    {
4398 2
        $bytes = self::chr_size_list($str);
4399 2
        if ($bytes !== []) {
4400 2
            return (int) \max($bytes);
4401
        }
4402
4403 2
        return 0;
4404
    }
4405
4406
    /**
4407
     * Checks whether mbstring is available on the server.
4408
     *
4409
     * @psalm-pure
4410
     *
4411
     * @return bool
4412
     *              <p><strong>true</strong> if available, <strong>false</strong> otherwise</p>
4413
     *
4414
     * @internal <p>Please do not use it anymore, we will make is private in next major version.</p>
4415
     */
4416 29
    public static function mbstring_loaded(): bool
4417
    {
4418 29
        return \extension_loaded('mbstring');
4419
    }
4420
4421
    /**
4422
     * Returns the UTF-8 character with the minimum code point in the given data.
4423
     *
4424
     * EXAMPLE: <code>UTF8::min('abc-äöü-中文空白'); // '-'</code>
4425
     *
4426
     * @param string|string[] $arg <strong>A UTF-8 encoded string or an array of such strings.</strong>
4427
     *
4428
     * @psalm-pure
4429
     *
4430
     * @return string|null
4431
     *                     <p>The character with the lowest code point than others, returns null on failure or empty input.</p>
4432
     */
4433 2
    public static function min($arg)
4434
    {
4435 2
        if (\is_array($arg)) {
4436 2
            $arg = \implode('', $arg);
4437
        }
4438
4439 2
        $codepoints = self::codepoints($arg);
4440 2
        if ($codepoints === []) {
4441 2
            return null;
4442
        }
4443
4444 2
        $codepoint_min = \min($codepoints);
4445
4446 2
        return self::chr((int) $codepoint_min);
4447
    }
4448
4449
    /**
4450
     * Normalize the encoding-"name" input.
4451
     *
4452
     * EXAMPLE: <code>UTF8::normalize_encoding('UTF8'); // 'UTF-8'</code>
4453
     *
4454
     * @param mixed $encoding <p>e.g.: ISO, UTF8, WINDOWS-1251 etc.</p>
4455
     * @param mixed $fallback <p>e.g.: UTF-8</p>
4456
     *
4457
     * @psalm-pure
4458
     *
4459
     * @return mixed|string
4460
     *                      <p>e.g.: ISO-8859-1, UTF-8, WINDOWS-1251 etc.<br>Will return a empty string as fallback (by default)</p>
4461
     *
4462
     * @template TNormalizeEncodingFallback
4463
     * @phpstan-param string|TNormalizeEncodingFallback $fallback
4464
     * @phpstan-return string|TNormalizeEncodingFallback
4465
     */
4466 339
    public static function normalize_encoding($encoding, $fallback = '')
4467
    {
4468
        /**
4469
         * @psalm-suppress ImpureStaticVariable
4470
         *
4471
         * @var array<string,string>
4472
         */
4473 339
        static $STATIC_NORMALIZE_ENCODING_CACHE = [];
4474
4475
        // init
4476 339
        $encoding = (string) $encoding;
4477
4478 339
        if (!$encoding) {
4479 290
            return $fallback;
4480
        }
4481
4482
        if (
4483 53
            $encoding === 'UTF-8'
4484
            ||
4485 53
            $encoding === 'UTF8'
4486
        ) {
4487 29
            return 'UTF-8';
4488
        }
4489
4490
        if (
4491 44
            $encoding === '8BIT'
4492
            ||
4493 44
            $encoding === 'BINARY'
4494
        ) {
4495
            return 'CP850';
4496
        }
4497
4498
        if (
4499 44
            $encoding === 'HTML'
4500
            ||
4501 44
            $encoding === 'HTML-ENTITIES'
4502
        ) {
4503 2
            return 'HTML-ENTITIES';
4504
        }
4505
4506
        if (
4507 44
            $encoding === 'ISO'
4508
            ||
4509 44
            $encoding === 'ISO-8859-1'
4510
        ) {
4511 41
            return 'ISO-8859-1';
4512
        }
4513
4514
        if (
4515 11
            $encoding === '1' // only a fallback, for non "strict_types" usage ...
4516
            ||
4517 11
            $encoding === '0' // only a fallback, for non "strict_types" usage ...
4518
        ) {
4519
            return $fallback;
4520
        }
4521
4522 11
        if (isset($STATIC_NORMALIZE_ENCODING_CACHE[$encoding])) {
4523 8
            return $STATIC_NORMALIZE_ENCODING_CACHE[$encoding];
4524
        }
4525
4526 5
        if (self::$ENCODINGS === null) {
4527 1
            self::$ENCODINGS = self::getData('encodings');
4528
        }
4529
4530 5
        if (\in_array($encoding, self::$ENCODINGS, true)) {
4531 3
            $STATIC_NORMALIZE_ENCODING_CACHE[$encoding] = $encoding;
4532
4533 3
            return $encoding;
4534
        }
4535
4536 4
        $encoding_original = $encoding;
4537 4
        $encoding = \strtoupper($encoding);
4538 4
        $encoding_upper_helper = (string) \preg_replace('/[^a-zA-Z0-9]/u', '', $encoding);
4539
4540
        $equivalences = [
4541 4
            'ISO8859'     => 'ISO-8859-1',
4542
            'ISO88591'    => 'ISO-8859-1',
4543
            'ISO'         => 'ISO-8859-1',
4544
            'LATIN'       => 'ISO-8859-1',
4545
            'LATIN1'      => 'ISO-8859-1', // Western European
4546
            'ISO88592'    => 'ISO-8859-2',
4547
            'LATIN2'      => 'ISO-8859-2', // Central European
4548
            'ISO88593'    => 'ISO-8859-3',
4549
            'LATIN3'      => 'ISO-8859-3', // Southern European
4550
            'ISO88594'    => 'ISO-8859-4',
4551
            'LATIN4'      => 'ISO-8859-4', // Northern European
4552
            'ISO88595'    => 'ISO-8859-5',
4553
            'ISO88596'    => 'ISO-8859-6', // Greek
4554
            'ISO88597'    => 'ISO-8859-7',
4555
            'ISO88598'    => 'ISO-8859-8', // Hebrew
4556
            'ISO88599'    => 'ISO-8859-9',
4557
            'LATIN5'      => 'ISO-8859-9', // Turkish
4558
            'ISO885911'   => 'ISO-8859-11',
4559
            'TIS620'      => 'ISO-8859-11', // Thai
4560
            'ISO885910'   => 'ISO-8859-10',
4561
            'LATIN6'      => 'ISO-8859-10', // Nordic
4562
            'ISO885913'   => 'ISO-8859-13',
4563
            'LATIN7'      => 'ISO-8859-13', // Baltic
4564
            'ISO885914'   => 'ISO-8859-14',
4565
            'LATIN8'      => 'ISO-8859-14', // Celtic
4566
            'ISO885915'   => 'ISO-8859-15',
4567
            'LATIN9'      => 'ISO-8859-15', // Western European (with some extra chars e.g. €)
4568
            'ISO885916'   => 'ISO-8859-16',
4569
            'LATIN10'     => 'ISO-8859-16', // Southeast European
4570
            'CP1250'      => 'WINDOWS-1250',
4571
            'WIN1250'     => 'WINDOWS-1250',
4572
            'WINDOWS1250' => 'WINDOWS-1250',
4573
            'CP1251'      => 'WINDOWS-1251',
4574
            'WIN1251'     => 'WINDOWS-1251',
4575
            'WINDOWS1251' => 'WINDOWS-1251',
4576
            'CP1252'      => 'WINDOWS-1252',
4577
            'WIN1252'     => 'WINDOWS-1252',
4578
            'WINDOWS1252' => 'WINDOWS-1252',
4579
            'CP1253'      => 'WINDOWS-1253',
4580
            'WIN1253'     => 'WINDOWS-1253',
4581
            'WINDOWS1253' => 'WINDOWS-1253',
4582
            'CP1254'      => 'WINDOWS-1254',
4583
            'WIN1254'     => 'WINDOWS-1254',
4584
            'WINDOWS1254' => 'WINDOWS-1254',
4585
            'CP1255'      => 'WINDOWS-1255',
4586
            'WIN1255'     => 'WINDOWS-1255',
4587
            'WINDOWS1255' => 'WINDOWS-1255',
4588
            'CP1256'      => 'WINDOWS-1256',
4589
            'WIN1256'     => 'WINDOWS-1256',
4590
            'WINDOWS1256' => 'WINDOWS-1256',
4591
            'CP1257'      => 'WINDOWS-1257',
4592
            'WIN1257'     => 'WINDOWS-1257',
4593
            'WINDOWS1257' => 'WINDOWS-1257',
4594
            'CP1258'      => 'WINDOWS-1258',
4595
            'WIN1258'     => 'WINDOWS-1258',
4596
            'WINDOWS1258' => 'WINDOWS-1258',
4597
            'UTF16'       => 'UTF-16',
4598
            'UTF32'       => 'UTF-32',
4599
            'UTF8'        => 'UTF-8',
4600
            'UTF'         => 'UTF-8',
4601
            'UTF7'        => 'UTF-7',
4602
            '8BIT'        => 'CP850',
4603
            'BINARY'      => 'CP850',
4604
        ];
4605
4606 4
        if (!empty($equivalences[$encoding_upper_helper])) {
4607 3
            $encoding = $equivalences[$encoding_upper_helper];
4608
        }
4609
4610 4
        $STATIC_NORMALIZE_ENCODING_CACHE[$encoding_original] = $encoding;
4611
4612 4
        return $encoding;
4613
    }
4614
4615
    /**
4616
     * Standardize line ending to unix-like.
4617
     *
4618
     * @param string          $str      <p>The input string.</p>
4619
     * @param string|string[] $replacer <p>The replacer char e.g. "\n" (Linux) or "\r\n" (Windows). You can also use \PHP_EOL
4620
     *                                  here.</p>
4621
     *
4622
     * @psalm-pure
4623
     *
4624
     * @return string
4625
     *                <p>A string with normalized line ending.</p>
4626
     */
4627 4
    public static function normalize_line_ending(string $str, $replacer = "\n"): string
4628
    {
4629 4
        return \str_replace(["\r\n", "\r", "\n"], $replacer, $str);
4630
    }
4631
4632
    /**
4633
     * Normalize some MS Word special characters.
4634
     *
4635
     * EXAMPLE: <code>UTF8::normalize_msword('„Abcdef…”'); // '"Abcdef..."'</code>
4636
     *
4637
     * @param string $str <p>The string to be normalized.</p>
4638
     *
4639
     * @psalm-pure
4640
     *
4641
     * @return string
4642
     *                <p>A string with normalized characters for commonly used chars in Word documents.</p>
4643
     */
4644 10
    public static function normalize_msword(string $str): string
4645
    {
4646 10
        return ASCII::normalize_msword($str);
4647
    }
4648
4649
    /**
4650
     * Normalize the whitespace.
4651
     *
4652
     * EXAMPLE: <code>UTF8::normalize_whitespace("abc-\xc2\xa0-öäü-\xe2\x80\xaf-\xE2\x80\xAC", true); // "abc-\xc2\xa0-öäü- -"</code>
4653
     *
4654
     * @param string $str                          <p>The string to be normalized.</p>
4655
     * @param bool   $keep_non_breaking_space      [optional] <p>Set to true, to keep non-breaking-spaces.</p>
4656
     * @param bool   $keep_bidi_unicode_controls   [optional] <p>Set to true, to keep non-printable (for the web)
4657
     *                                             bidirectional text chars.</p>
4658
     * @param bool   $normalize_control_characters [optional] <p>Set to true, to convert e.g. LINE-, PARAGRAPH-SEPARATOR with "\n" and LINE TABULATION with "\t".</p>
4659
     *
4660
     * @psalm-pure
4661
     *
4662
     * @return string
4663
     *                <p>A string with normalized whitespace.</p>
4664
     */
4665 61
    public static function normalize_whitespace(
4666
        string $str,
4667
        bool $keep_non_breaking_space = false,
4668
        bool $keep_bidi_unicode_controls = false,
4669
        bool $normalize_control_characters = false
4670
    ): string {
4671 61
        return ASCII::normalize_whitespace(
4672 61
            $str,
4673
            $keep_non_breaking_space,
4674
            $keep_bidi_unicode_controls,
4675
            $normalize_control_characters
4676
        );
4677
    }
4678
4679
    /**
4680
     * Calculates Unicode code point of the given UTF-8 encoded character.
4681
     *
4682
     * INFO: opposite to UTF8::chr()
4683
     *
4684
     * EXAMPLE: <code>UTF8::ord('☃'); // 0x2603</code>
4685
     *
4686
     * @param string $chr      <p>The character of which to calculate code point.<p/>
4687
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
4688
     *
4689
     * @psalm-pure
4690
     *
4691
     * @return int
4692
     *             <p>Unicode code point of the given character,<br>
4693
     *             0 on invalid UTF-8 byte sequence</p>
4694
     */
4695 27
    public static function ord($chr, string $encoding = 'UTF-8'): int
4696
    {
4697
        /**
4698
         * @psalm-suppress ImpureStaticVariable
4699
         *
4700
         * @var array<string,int>
4701
         */
4702 27
        static $CHAR_CACHE = [];
4703
4704
        // init
4705 27
        $chr = (string) $chr;
4706
4707 27
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
4708 5
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
4709
        }
4710
4711 27
        $cache_key = $chr . '_' . $encoding;
4712 27
        if (isset($CHAR_CACHE[$cache_key])) {
4713 27
            return $CHAR_CACHE[$cache_key];
4714
        }
4715
4716
        // check again, if it's still not UTF-8
4717 11
        if ($encoding !== 'UTF-8') {
4718 3
            $chr = self::encode($encoding, $chr);
4719
        }
4720
4721 11
        if (self::$ORD === null) {
4722 1
            self::$ORD = self::getData('ord');
4723
        }
4724
4725 11
        if (isset(self::$ORD[$chr])) {
4726 11
            return $CHAR_CACHE[$cache_key] = self::$ORD[$chr];
4727
        }
4728
4729
        //
4730
        // fallback via "IntlChar"
4731
        //
4732
4733 6
        if (self::$SUPPORT['intlChar'] === true) {
4734 5
            $code = \IntlChar::ord($chr);
4735 5
            if ($code) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $code of type integer|null is loosely compared to true; this is ambiguous if the integer can be 0. You might want to explicitly use !== null instead.

In PHP, under loose comparison (like ==, or !=, or switch conditions), values of different types might be equal.

For integer values, zero is a special case, in particular the following results might be unexpected:

0   == false // true
0   == null  // true
123 == false // false
123 == null  // false

// It is often better to use strict comparison
0 === false // false
0 === null  // false
Loading history...
4736 5
                return $CHAR_CACHE[$cache_key] = $code;
4737
            }
4738
        }
4739
4740
        //
4741
        // fallback via vanilla php
4742
        //
4743
4744 1
        $chr = \unpack('C*', (string) \substr($chr, 0, 4));
4745
        /** @noinspection PhpSillyAssignmentInspection - hack for phpstan */
4746
        /** @var int[] $chr - "unpack": only false if the format string contains errors */
4747 1
        $chr = $chr;
4748 1
        $code = $chr ? $chr[1] : 0;
4749
4750 1
        if ($code >= 0xF0 && isset($chr[4])) {
4751
            return $CHAR_CACHE[$cache_key] = ((($code - 0xF0) << 18) + (($chr[2] - 0x80) << 12) + (($chr[3] - 0x80) << 6) + $chr[4] - 0x80);
4752
        }
4753
4754 1
        if ($code >= 0xE0 && isset($chr[3])) {
4755 1
            return $CHAR_CACHE[$cache_key] = ((($code - 0xE0) << 12) + (($chr[2] - 0x80) << 6) + $chr[3] - 0x80);
4756
        }
4757
4758 1
        if ($code >= 0xC0 && isset($chr[2])) {
4759 1
            return $CHAR_CACHE[$cache_key] = ((($code - 0xC0) << 6) + $chr[2] - 0x80);
4760
        }
4761
4762
        return $CHAR_CACHE[$cache_key] = $code;
4763
    }
4764
4765
    /**
4766
     * Parses the string into an array (into the the second parameter).
4767
     *
4768
     * WARNING: Unlike "parse_str()", this method does not (re-)place variables in the current scope,
4769
     *          if the second parameter is not set!
4770
     *
4771
     * EXAMPLE: <code>
4772
     * UTF8::parse_str('Iñtërnâtiônéàlizætiøn=測試&arr[]=foo+測試&arr[]=ການທົດສອບ', $array);
4773
     * echo $array['Iñtërnâtiônéàlizætiøn']; // '測試'
4774
     * </code>
4775
     *
4776
     * @see http://php.net/manual/en/function.parse-str.php
4777
     *
4778
     * @param string $str        <p>The input string.</p>
4779
     * @param array  $result     <p>The result will be returned into this reference parameter.</p>
4780
     * @param bool   $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
4781
     *
4782
     * @psalm-pure
4783
     *
4784
     * @return bool
4785
     *              <p>Will return <strong>false</strong> if php can't parse the string and we haven't any $result.</p>
4786
     */
4787 2
    public static function parse_str(string $str, &$result, bool $clean_utf8 = false): bool
4788
    {
4789 2
        if ($clean_utf8) {
4790 2
            $str = self::clean($str);
4791
        }
4792
4793 2
        if (self::$SUPPORT['mbstring'] === true) {
4794 2
            $return = \mb_parse_str($str, $result);
4795
4796 2
            return $return !== false && $result !== [];
4797
        }
4798
4799
        /**
4800
         * @psalm-suppress ImpureFunctionCall - we use the second parameter, so we don't change variables by magic
4801
         */
4802
        \parse_str($str, $result);
4803
4804
        return $result !== [];
4805
    }
4806
4807
    /**
4808
     * Checks if \u modifier is available that enables Unicode support in PCRE.
4809
     *
4810
     * @psalm-pure
4811
     *
4812
     * @return bool
4813
     *              <p>
4814
     *              <strong>true</strong> if support is available,<br>
4815
     *              <strong>false</strong> otherwise
4816
     *              </p>
4817
     */
4818
    public static function pcre_utf8_support(): bool
4819
    {
4820
        /** @noinspection PhpUsageOfSilenceOperatorInspection */
4821
        return (bool) @\preg_match('//u', '');
4822
    }
4823
4824
    /**
4825
     * Create an array containing a range of UTF-8 characters.
4826
     *
4827
     * EXAMPLE: <code>UTF8::range('κ', 'ζ'); // array('κ', 'ι', 'θ', 'η', 'ζ',)</code>
4828
     *
4829
     * @param int|string $var1      <p>Numeric or hexadecimal code points, or a UTF-8 character to start from.</p>
4830
     * @param int|string $var2      <p>Numeric or hexadecimal code points, or a UTF-8 character to end at.</p>
4831
     * @param bool       $use_ctype <p>use ctype to detect numeric and hexadecimal, otherwise we will use a simple
4832
     *                              "is_numeric"</p>
4833
     * @param string     $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
4834
     * @param float|int  $step      [optional] <p>
4835
     *                              If a step value is given, it will be used as the
4836
     *                              increment between elements in the sequence. step
4837
     *                              should be given as a positive number. If not specified,
4838
     *                              step will default to 1.
4839
     *                              </p>
4840
     *
4841
     * @psalm-pure
4842
     *
4843
     * @return string[]
4844
     */
4845 2
    public static function range(
4846
        $var1,
4847
        $var2,
4848
        bool $use_ctype = true,
4849
        string $encoding = 'UTF-8',
4850
        $step = 1
4851
    ): array {
4852 2
        if (!$var1 || !$var2) {
4853 2
            return [];
4854
        }
4855
4856 2
        if ($step !== 1) {
4857
            /**
4858
             * @psalm-suppress RedundantConditionGivenDocblockType
4859
             * @psalm-suppress DocblockTypeContradiction
4860
             */
4861 1
            if (!\is_numeric($step)) {
0 ignored issues
show
introduced by
The condition is_numeric($step) is always true.
Loading history...
4862
                throw new \InvalidArgumentException('$step need to be a number, type given: ' . \gettype($step));
4863
            }
4864
4865
            /**
4866
             * @psalm-suppress RedundantConditionGivenDocblockType - false-positive from psalm?
4867
             */
4868 1
            if ($step <= 0) {
4869
                throw new \InvalidArgumentException('$step need to be a positive number, given: ' . $step);
4870
            }
4871
        }
4872
4873 2
        if ($use_ctype && self::$SUPPORT['ctype'] === false) {
4874
            throw new \RuntimeException('ext-ctype: is not installed');
4875
        }
4876
4877 2
        $is_digit = false;
4878 2
        $is_xdigit = false;
4879
4880 2
        if ($use_ctype && \ctype_digit((string) $var1) && \ctype_digit((string) $var2)) {
4881 2
            $is_digit = true;
4882 2
            $start = (int) $var1;
4883 2
        } elseif ($use_ctype && \ctype_xdigit($var1) && \ctype_xdigit($var2)) {
4884
            $is_xdigit = true;
4885
            $start = (int) self::hex_to_int((string) $var1);
4886 2
        } elseif (!$use_ctype && \is_numeric($var1)) {
4887 1
            $start = (int) $var1;
4888
        } else {
4889 2
            $start = self::ord((string) $var1);
4890
        }
4891
4892 2
        if (!$start) {
4893
            return [];
4894
        }
4895
4896 2
        if ($is_digit) {
4897 2
            $end = (int) $var2;
4898 2
        } elseif ($is_xdigit) {
4899
            $end = (int) self::hex_to_int((string) $var2);
4900 2
        } elseif (!$use_ctype && \is_numeric($var2)) {
4901 1
            $end = (int) $var2;
4902
        } else {
4903 2
            $end = self::ord((string) $var2);
4904
        }
4905
4906 2
        if (!$end) {
4907
            return [];
4908
        }
4909
4910 2
        $array = [];
4911 2
        foreach (\range($start, $end, $step) as $i) {
4912 2
            $array[] = (string) self::chr((int) $i, $encoding);
4913
        }
4914
4915 2
        return $array;
4916
    }
4917
4918
    /**
4919
     * Multi decode HTML entity + fix urlencoded-win1252-chars.
4920
     *
4921
     * EXAMPLE: <code>UTF8::rawurldecode('tes%20öäü%20\u00edtest+test'); // 'tes öäü ítest+test'</code>
4922
     *
4923
     * e.g:
4924
     * 'test+test'                     => 'test+test'
4925
     * 'D&#252;sseldorf'               => 'Düsseldorf'
4926
     * 'D%FCsseldorf'                  => 'Düsseldorf'
4927
     * 'D&#xFC;sseldorf'               => 'Düsseldorf'
4928
     * 'D%26%23xFC%3Bsseldorf'         => 'Düsseldorf'
4929
     * 'Düsseldorf'                   => 'Düsseldorf'
4930
     * 'D%C3%BCsseldorf'               => 'Düsseldorf'
4931
     * 'D%C3%83%C2%BCsseldorf'         => 'Düsseldorf'
4932
     * 'D%25C3%2583%25C2%25BCsseldorf' => 'Düsseldorf'
4933
     *
4934
     * @param string $str          <p>The input string.</p>
4935
     * @param bool   $multi_decode <p>Decode as often as possible.</p>
4936
     *
4937
     * @psalm-pure
4938
     *
4939
     * @return string
4940
     *                <p>The decoded URL, as a string.</p>
4941
     */
4942 6
    public static function rawurldecode(string $str, bool $multi_decode = true): string
4943
    {
4944 6
        if ($str === '') {
4945 4
            return '';
4946
        }
4947
4948 6
        $str = self::urldecode_unicode_helper($str);
4949
4950 6
        if ($multi_decode) {
4951
            do {
4952 5
                $str_compare = $str;
4953
4954
                /**
4955
                 * @psalm-suppress PossiblyInvalidArgument
4956
                 */
4957 5
                $str = \rawurldecode(
4958 5
                    self::html_entity_decode(
4959 5
                        self::to_utf8($str),
4960 5
                        \ENT_QUOTES | \ENT_HTML5
4961
                    )
4962
                );
4963 5
            } while ($str_compare !== $str);
4964
        } else {
4965
            /**
4966
             * @psalm-suppress PossiblyInvalidArgument
4967
             */
4968 1
            $str = \rawurldecode(
4969 1
                self::html_entity_decode(
4970 1
                    self::to_utf8($str),
4971 1
                    \ENT_QUOTES | \ENT_HTML5
4972
                )
4973
            );
4974
        }
4975
4976 6
        return self::fix_simple_utf8($str);
4977
    }
4978
4979
    /**
4980
     * Replaces all occurrences of $pattern in $str by $replacement.
4981
     *
4982
     * @param string $str         <p>The input string.</p>
4983
     * @param string $pattern     <p>The regular expression pattern.</p>
4984
     * @param string $replacement <p>The string to replace with.</p>
4985
     * @param string $options     [optional] <p>Matching conditions to be used.</p>
4986
     * @param string $delimiter   [optional] <p>Delimiter the the regex. Default: '/'</p>
4987
     *
4988
     * @psalm-pure
4989
     *
4990
     * @return string
4991
     */
4992 18
    public static function regex_replace(
4993
        string $str,
4994
        string $pattern,
4995
        string $replacement,
4996
        string $options = '',
4997
        string $delimiter = '/'
4998
    ): string {
4999 18
        if ($options === 'msr') {
5000 9
            $options = 'ms';
5001
        }
5002
5003
        // fallback
5004 18
        if (!$delimiter) {
5005
            $delimiter = '/';
5006
        }
5007
5008 18
        return (string) \preg_replace(
5009 18
            $delimiter . $pattern . $delimiter . 'u' . $options,
5010 18
            $replacement,
5011 18
            $str
5012
        );
5013
    }
5014
5015
    /**
5016
     * Remove the BOM from UTF-8 / UTF-16 / UTF-32 strings.
5017
     *
5018
     * EXAMPLE: <code>UTF8::remove_bom("\xEF\xBB\xBFΜπορώ να"); // 'Μπορώ να'</code>
5019
     *
5020
     * @param string $str <p>The input string.</p>
5021
     *
5022
     * @psalm-pure
5023
     *
5024
     * @return string
5025
     *                <p>A string without UTF-BOM.</p>
5026
     */
5027 54
    public static function remove_bom(string $str): string
5028
    {
5029 54
        if ($str === '') {
5030 9
            return '';
5031
        }
5032
5033 54
        $str_length = \strlen($str);
5034 54
        foreach (self::$BOM as $bom_string => $bom_byte_length) {
5035 54
            if (\strncmp($str, $bom_string, $bom_byte_length) === 0) {
5036
                /** @var false|string $str_tmp - needed for PhpStan (stubs error) */
5037 9
                $str_tmp = \substr($str, $bom_byte_length, $str_length);
5038 9
                if ($str_tmp === false) {
5039
                    return '';
5040
                }
5041
5042 9
                $str_length -= $bom_byte_length;
5043
5044 9
                $str = (string) $str_tmp;
5045
            }
5046
        }
5047
5048 54
        return $str;
5049
    }
5050
5051
    /**
5052
     * Removes duplicate occurrences of a string in another string.
5053
     *
5054
     * EXAMPLE: <code>UTF8::remove_duplicates('öäü-κόσμεκόσμε-äöü', 'κόσμε'); // 'öäü-κόσμε-äöü'</code>
5055
     *
5056
     * @param string          $str  <p>The base string.</p>
5057
     * @param string|string[] $what <p>String to search for in the base string.</p>
5058
     *
5059
     * @psalm-pure
5060
     *
5061
     * @return string
5062
     *                <p>A string with removed duplicates.</p>
5063
     */
5064 2
    public static function remove_duplicates(string $str, $what = ' '): string
5065
    {
5066 2
        if (\is_string($what)) {
5067 2
            $what = [$what];
5068
        }
5069
5070
        /**
5071
         * @psalm-suppress RedundantConditionGivenDocblockType
5072
         */
5073 2
        if (\is_array($what)) {
0 ignored issues
show
introduced by
The condition is_array($what) is always true.
Loading history...
5074 2
            foreach ($what as $item) {
5075 2
                $str = (string) \preg_replace('/(' . \preg_quote($item, '/') . ')+/u', $item, $str);
5076
            }
5077
        }
5078
5079 2
        return $str;
5080
    }
5081
5082
    /**
5083
     * Remove html via "strip_tags()" from the string.
5084
     *
5085
     * @param string $str            <p>The input string.</p>
5086
     * @param string $allowable_tags [optional] <p>You can use the optional second parameter to specify tags which
5087
     *                               should not be stripped. Default: null
5088
     *                               </p>
5089
     *
5090
     * @psalm-pure
5091
     *
5092
     * @return string
5093
     *                <p>A string with without html tags.</p>
5094
     */
5095 6
    public static function remove_html(string $str, string $allowable_tags = ''): string
5096
    {
5097 6
        return \strip_tags($str, $allowable_tags);
5098
    }
5099
5100
    /**
5101
     * Remove all breaks [<br> | \r\n | \r | \n | ...] from the string.
5102
     *
5103
     * @param string $str         <p>The input string.</p>
5104
     * @param string $replacement [optional] <p>Default is a empty string.</p>
5105
     *
5106
     * @psalm-pure
5107
     *
5108
     * @return string
5109
     *                <p>A string without breaks.</p>
5110
     */
5111 6
    public static function remove_html_breaks(string $str, string $replacement = ''): string
5112
    {
5113 6
        return (string) \preg_replace("#/\r\n|\r|\n|<br.*/?>#isU", $replacement, $str);
5114
    }
5115
5116
    /**
5117
     * Remove invisible characters from a string.
5118
     *
5119
     * e.g.: This prevents sandwiching null characters between ascii characters, like Java\0script.
5120
     *
5121
     * EXAMPLE: <code>UTF8::remove_invisible_characters("κόσ\0με"); // 'κόσμε'</code>
5122
     *
5123
     * copy&past from https://github.com/bcit-ci/CodeIgniter/blob/develop/system/core/Common.php
5124
     *
5125
     * @param string $str                           <p>The input string.</p>
5126
     * @param bool   $url_encoded                   [optional] <p>
5127
     *                                              Try to remove url encoded control character.
5128
     *                                              WARNING: maybe contains false-positives e.g. aa%0Baa -> aaaa.
5129
     *                                              <br>
5130
     *                                              Default: false
5131
     *                                              </p>
5132
     * @param string $replacement                   [optional] <p>The replacement character.</p>
5133
     * @param bool   $keep_basic_control_characters [optional] <p>Keep control characters like [LRM] or [LSEP].</p>
5134
     *
5135
     * @psalm-pure
5136
     *
5137
     * @return string
5138
     *                <p>A string without invisible chars.</p>
5139
     */
5140 92
    public static function remove_invisible_characters(
5141
        string $str,
5142
        bool $url_encoded = false,
5143
        string $replacement = '',
5144
        bool $keep_basic_control_characters = true
5145
    ): string {
5146 92
        return ASCII::remove_invisible_characters(
5147 92
            $str,
5148
            $url_encoded,
5149
            $replacement,
5150
            $keep_basic_control_characters
5151
        );
5152
    }
5153
5154
    /**
5155
     * Returns a new string with the prefix $substring removed, if present.
5156
     *
5157
     * @param string $str       <p>The input string.</p>
5158
     * @param string $substring <p>The prefix to remove.</p>
5159
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
5160
     *
5161
     * @psalm-pure
5162
     *
5163
     * @return string
5164
     *                <p>A string without the prefix $substring.</p>
5165
     */
5166 12
    public static function remove_left(
5167
        string $str,
5168
        string $substring,
5169
        string $encoding = 'UTF-8'
5170
    ): string {
5171
        if (
5172 12
            $substring
5173
            &&
5174 12
            \strpos($str, $substring) === 0
5175
        ) {
5176 6
            if ($encoding === 'UTF-8') {
5177 4
                return (string) \mb_substr(
5178 4
                    $str,
5179 4
                    (int) \mb_strlen($substring)
5180
                );
5181
            }
5182
5183 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
5184
5185 2
            return (string) self::substr(
5186 2
                $str,
5187 2
                (int) self::strlen($substring, $encoding),
5188 2
                null,
5189
                $encoding
5190
            );
5191
        }
5192
5193 6
        return $str;
5194
    }
5195
5196
    /**
5197
     * Returns a new string with the suffix $substring removed, if present.
5198
     *
5199
     * @param string $str
5200
     * @param string $substring <p>The suffix to remove.</p>
5201
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
5202
     *
5203
     * @psalm-pure
5204
     *
5205
     * @return string
5206
     *                <p>A string having a $str without the suffix $substring.</p>
5207
     */
5208 12
    public static function remove_right(
5209
        string $str,
5210
        string $substring,
5211
        string $encoding = 'UTF-8'
5212
    ): string {
5213 12
        if ($substring && \substr($str, -\strlen($substring)) === $substring) {
5214 6
            if ($encoding === 'UTF-8') {
5215 4
                return (string) \mb_substr(
5216 4
                    $str,
5217 4
                    0,
5218 4
                    (int) \mb_strlen($str) - (int) \mb_strlen($substring)
5219
                );
5220
            }
5221
5222 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
5223
5224 2
            return (string) self::substr(
5225 2
                $str,
5226 2
                0,
5227 2
                (int) self::strlen($str, $encoding) - (int) self::strlen($substring, $encoding),
5228
                $encoding
5229
            );
5230
        }
5231
5232 6
        return $str;
5233
    }
5234
5235
    /**
5236
     * Replaces all occurrences of $search in $str by $replacement.
5237
     *
5238
     * @param string $str            <p>The input string.</p>
5239
     * @param string $search         <p>The needle to search for.</p>
5240
     * @param string $replacement    <p>The string to replace with.</p>
5241
     * @param bool   $case_sensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
5242
     *
5243
     * @psalm-pure
5244
     *
5245
     * @return string
5246
     *                <p>A string with replaced parts.</p>
5247
     */
5248 29
    public static function replace(
5249
        string $str,
5250
        string $search,
5251
        string $replacement,
5252
        bool $case_sensitive = true
5253
    ): string {
5254 29
        if ($case_sensitive) {
5255 22
            return \str_replace($search, $replacement, $str);
5256
        }
5257
5258 7
        return self::str_ireplace($search, $replacement, $str);
0 ignored issues
show
Bug Best Practice introduced by
The expression return self::str_ireplac...ch, $replacement, $str) could return the type string[] which is incompatible with the type-hinted return string. Consider adding an additional type-check to rule them out.
Loading history...
5259
    }
5260
5261
    /**
5262
     * Replaces all occurrences of $search in $str by $replacement.
5263
     *
5264
     * @param string       $str            <p>The input string.</p>
5265
     * @param array        $search         <p>The elements to search for.</p>
5266
     * @param array|string $replacement    <p>The string to replace with.</p>
5267
     * @param bool         $case_sensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
5268
     *
5269
     * @psalm-pure
5270
     *
5271
     * @return string
5272
     *                <p>A string with replaced parts.</p>
5273
     */
5274 30
    public static function replace_all(
5275
        string $str,
5276
        array $search,
5277
        $replacement,
5278
        bool $case_sensitive = true
5279
    ): string {
5280 30
        if ($case_sensitive) {
5281 23
            return \str_replace($search, $replacement, $str);
5282
        }
5283
5284 7
        return self::str_ireplace($search, $replacement, $str);
0 ignored issues
show
Bug Best Practice introduced by
The expression return self::str_ireplac...ch, $replacement, $str) could return the type string[] which is incompatible with the type-hinted return string. Consider adding an additional type-check to rule them out.
Loading history...
5285
    }
5286
5287
    /**
5288
     * Replace the diamond question mark (�) and invalid-UTF8 chars with the replacement.
5289
     *
5290
     * EXAMPLE: <code>UTF8::replace_diamond_question_mark('中文空白�', ''); // '中文空白'</code>
5291
     *
5292
     * @param string $str                        <p>The input string</p>
5293
     * @param string $replacement_char           <p>The replacement character.</p>
5294
     * @param bool   $process_invalid_utf8_chars <p>Convert invalid UTF-8 chars </p>
5295
     *
5296
     * @psalm-pure
5297
     *
5298
     * @return string
5299
     *                <p>A string without diamond question marks (�).</p>
5300
     */
5301 35
    public static function replace_diamond_question_mark(
5302
        string $str,
5303
        string $replacement_char = '',
5304
        bool $process_invalid_utf8_chars = true
5305
    ): string {
5306 35
        if ($str === '') {
5307 9
            return '';
5308
        }
5309
5310 35
        if ($process_invalid_utf8_chars) {
5311 35
            if ($replacement_char === '') {
5312 35
                $replacement_char_helper = 'none';
5313
            } else {
5314 2
                $replacement_char_helper = \ord($replacement_char);
5315
            }
5316
5317 35
            if (self::$SUPPORT['mbstring'] === false) {
5318
                // if there is no native support for "mbstring",
5319
                // then we need to clean the string before ...
5320
                $str = self::clean($str);
5321
            }
5322
5323
            /**
5324
             * @psalm-suppress ImpureFunctionCall - we will reset the value in the next step
5325
             */
5326 35
            $save = \mb_substitute_character();
5327
            /** @noinspection PhpUsageOfSilenceOperatorInspection - ignore "Unknown character" warnings, it's working anyway */
5328 35
            @\mb_substitute_character($replacement_char_helper);
0 ignored issues
show
Security Best Practice introduced by
It seems like you do not handle an error condition for mb_substitute_character(). This can introduce security issues, and is generally not recommended. ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-unhandled  annotation

5328
            /** @scrutinizer ignore-unhandled */ @\mb_substitute_character($replacement_char_helper);

If you suppress an error, we recommend checking for the error condition explicitly:

// For example instead of
@mkdir($dir);

// Better use
if (@mkdir($dir) === false) {
    throw new \RuntimeException('The directory '.$dir.' could not be created.');
}
Loading history...
5329
            // the polyfill maybe return false, so cast to string
5330 35
            $str = (string) \mb_convert_encoding($str, 'UTF-8', 'UTF-8');
5331 35
            \mb_substitute_character($save);
0 ignored issues
show
Bug introduced by
It seems like $save can also be of type true; however, parameter $substitute_character of mb_substitute_character() does only seem to accept integer|null|string, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

5331
            \mb_substitute_character(/** @scrutinizer ignore-type */ $save);
Loading history...
5332
        }
5333
5334 35
        return \str_replace(
5335
            [
5336 35
                "\xEF\xBF\xBD",
5337
                '�',
5338
            ],
5339
            [
5340 35
                $replacement_char,
5341 35
                $replacement_char,
5342
            ],
5343 35
            $str
5344
        );
5345
    }
5346
5347
    /**
5348
     * Strip whitespace or other characters from the end of a UTF-8 string.
5349
     *
5350
     * EXAMPLE: <code>UTF8::rtrim('-ABC-中文空白-  '); // '-ABC-中文空白-'</code>
5351
     *
5352
     * @param string      $str   <p>The string to be trimmed.</p>
5353
     * @param string|null $chars <p>Optional characters to be stripped.</p>
5354
     *
5355
     * @psalm-pure
5356
     *
5357
     * @return string
5358
     *                <p>A string with unwanted characters stripped from the right.</p>
5359
     */
5360 21
    public static function rtrim(string $str = '', string $chars = null): string
5361
    {
5362 21
        if ($str === '') {
5363 3
            return '';
5364
        }
5365
5366 20
        if (self::$SUPPORT['mbstring'] === true) {
5367 20
            if ($chars !== null) {
5368
                /** @noinspection PregQuoteUsageInspection */
5369 9
                $chars = \preg_quote($chars);
5370 9
                $pattern = "[${chars}]+$";
5371
            } else {
5372 14
                $pattern = '[\\s]+$';
5373
            }
5374
5375 20
            return (string) \mb_ereg_replace($pattern, '', $str);
5376
        }
5377
5378
        if ($chars !== null) {
5379
            $chars = \preg_quote($chars, '/');
5380
            $pattern = "[${chars}]+$";
5381
        } else {
5382
            $pattern = '[\\s]+$';
5383
        }
5384
5385
        return self::regex_replace($str, $pattern, '');
5386
    }
5387
5388
    /**
5389
     * WARNING: Print native UTF-8 support (libs) by default, e.g. for debugging.
5390
     *
5391
     * @param bool $useEcho
5392
     *
5393
     * @psalm-pure
5394
     *
5395
     * @return string|void
5396
     */
5397 2
    public static function showSupport(bool $useEcho = true)
5398
    {
5399
        // init
5400 2
        $html = '';
5401
5402 2
        $html .= '<pre>';
5403 2
        foreach (self::$SUPPORT as $key => &$value) {
5404 2
            $html .= $key . ' - ' . \print_r($value, true) . "\n<br>";
0 ignored issues
show
Bug introduced by
Are you sure print_r($value, true) of type string|true can be used in concatenation? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

5404
            $html .= $key . ' - ' . /** @scrutinizer ignore-type */ \print_r($value, true) . "\n<br>";
Loading history...
5405
        }
5406 2
        $html .= '</pre>';
5407
5408 2
        if ($useEcho) {
5409 1
            echo $html;
5410
        }
5411
5412 2
        return $html;
5413
    }
5414
5415
    /**
5416
     * Converts a UTF-8 character to HTML Numbered Entity like "&#123;".
5417
     *
5418
     * EXAMPLE: <code>UTF8::single_chr_html_encode('κ'); // '&#954;'</code>
5419
     *
5420
     * @param string $char             <p>The Unicode character to be encoded as numbered entity.</p>
5421
     * @param bool   $keep_ascii_chars <p>Set to <strong>true</strong> to keep ASCII chars.</>
5422
     * @param string $encoding         [optional] <p>Set the charset for e.g. "mb_" function</p>
5423
     *
5424
     * @psalm-pure
5425
     *
5426
     * @return string
5427
     *                <p>The HTML numbered entity for the given character.</p>
5428
     */
5429 2
    public static function single_chr_html_encode(
5430
        string $char,
5431
        bool $keep_ascii_chars = false,
5432
        string $encoding = 'UTF-8'
5433
    ): string {
5434 2
        if ($char === '') {
5435 2
            return '';
5436
        }
5437
5438
        if (
5439 2
            $keep_ascii_chars
5440
            &&
5441 2
            ASCII::is_ascii($char)
5442
        ) {
5443 2
            return $char;
5444
        }
5445
5446 2
        return '&#' . self::ord($char, $encoding) . ';';
5447
    }
5448
5449
    /**
5450
     * @param string $str
5451
     * @param int    $tab_length
5452
     *
5453
     * @psalm-pure
5454
     *
5455
     * @return string
5456
     */
5457 5
    public static function spaces_to_tabs(string $str, int $tab_length = 4): string
5458
    {
5459 5
        if ($tab_length === 4) {
5460 3
            $tab = '    ';
5461 2
        } elseif ($tab_length === 2) {
5462 1
            $tab = '  ';
5463
        } else {
5464 1
            $tab = \str_repeat(' ', $tab_length);
5465
        }
5466
5467 5
        return \str_replace($tab, "\t", $str);
5468
    }
5469
5470
    /**
5471
     * Returns a camelCase version of the string. Trims surrounding spaces,
5472
     * capitalizes letters following digits, spaces, dashes and underscores,
5473
     * and removes spaces, dashes, as well as underscores.
5474
     *
5475
     * @param string      $str                           <p>The input string.</p>
5476
     * @param string      $encoding                      [optional] <p>Default: 'UTF-8'</p>
5477
     * @param bool        $clean_utf8                    [optional] <p>Remove non UTF-8 chars from the string.</p>
5478
     * @param string|null $lang                          [optional] <p>Set the language for special cases: az, el, lt,
5479
     *                                                   tr</p>
5480
     * @param bool        $try_to_keep_the_string_length [optional] <p>true === try to keep the string length: e.g. ẞ
5481
     *                                                   -> ß</p>
5482
     *
5483
     * @psalm-pure
5484
     *
5485
     * @return string
5486
     */
5487 32
    public static function str_camelize(
5488
        string $str,
5489
        string $encoding = 'UTF-8',
5490
        bool $clean_utf8 = false,
5491
        string $lang = null,
5492
        bool $try_to_keep_the_string_length = false
5493
    ): string {
5494 32
        if ($clean_utf8) {
5495
            $str = self::clean($str);
5496
        }
5497
5498 32
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
5499 26
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
5500
        }
5501
5502 32
        $str = self::lcfirst(
5503 32
            \trim($str),
5504 32
            $encoding,
5505 32
            false,
5506 32
            $lang,
5507 32
            $try_to_keep_the_string_length
5508
        );
5509 32
        $str = (string) \preg_replace('/^[-_]+/', '', $str);
5510
5511 32
        $use_mb_functions = $lang === null && !$try_to_keep_the_string_length;
5512
5513 32
        $str = (string) \preg_replace_callback(
5514 32
            '/[-_\\s]+(.)?/u',
5515
            /**
5516
             * @param array $match
5517
             *
5518
             * @psalm-pure
5519
             *
5520
             * @return string
5521
             */
5522 32
            static function (array $match) use ($use_mb_functions, $encoding, $lang, $try_to_keep_the_string_length): string {
5523 27
                if (isset($match[1])) {
5524 27
                    if ($use_mb_functions) {
5525 27
                        if ($encoding === 'UTF-8') {
5526 27
                            return \mb_strtoupper($match[1]);
5527
                        }
5528
5529
                        return \mb_strtoupper($match[1], $encoding);
5530
                    }
5531
5532
                    return self::strtoupper($match[1], $encoding, false, $lang, $try_to_keep_the_string_length);
5533
                }
5534
5535 1
                return '';
5536 32
            },
5537 32
            $str
5538
        );
5539
5540 32
        return (string) \preg_replace_callback(
5541 32
            '/[\\p{N}]+(.)?/u',
5542
            /**
5543
             * @param array $match
5544
             *
5545
             * @psalm-pure
5546
             *
5547
             * @return string
5548
             */
5549 32
            static function (array $match) use ($use_mb_functions, $encoding, $clean_utf8, $lang, $try_to_keep_the_string_length): string {
5550 6
                if ($use_mb_functions) {
5551 6
                    if ($encoding === 'UTF-8') {
5552 6
                        return \mb_strtoupper($match[0]);
5553
                    }
5554
5555
                    return \mb_strtoupper($match[0], $encoding);
5556
                }
5557
5558
                return self::strtoupper($match[0], $encoding, $clean_utf8, $lang, $try_to_keep_the_string_length);
5559 32
            },
5560 32
            $str
5561
        );
5562
    }
5563
5564
    /**
5565
     * Returns the string with the first letter of each word capitalized,
5566
     * except for when the word is a name which shouldn't be capitalized.
5567
     *
5568
     * @param string $str
5569
     *
5570
     * @psalm-pure
5571
     *
5572
     * @return string
5573
     *                <p>A string with $str capitalized.</p>
5574
     */
5575 1
    public static function str_capitalize_name(string $str): string
5576
    {
5577 1
        return self::str_capitalize_name_helper(
5578 1
            self::str_capitalize_name_helper(
5579 1
                self::collapse_whitespace($str),
5580 1
                ' '
5581
            ),
5582 1
            '-'
5583
        );
5584
    }
5585
5586
    /**
5587
     * Returns true if the string contains $needle, false otherwise. By default
5588
     * the comparison is case-sensitive, but can be made insensitive by setting
5589
     * $case_sensitive to false.
5590
     *
5591
     * @param string $haystack       <p>The input string.</p>
5592
     * @param string $needle         <p>Substring to look for.</p>
5593
     * @param bool   $case_sensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
5594
     *
5595
     * @psalm-pure
5596
     *
5597
     * @return bool
5598
     *              <p>Whether or not $haystack contains $needle.</p>
5599
     */
5600 21
    public static function str_contains(
5601
        string $haystack,
5602
        string $needle,
5603
        bool $case_sensitive = true
5604
    ): bool {
5605 21
        if ($case_sensitive) {
5606 11
            if (\PHP_VERSION_ID >= 80000) {
5607
                /** @phpstan-ignore-next-line - only for PHP8 */
5608
                return \str_contains($haystack, $needle);
5609
            }
5610
5611 11
            return \strpos($haystack, $needle) !== false;
5612
        }
5613
5614 10
        return \mb_stripos($haystack, $needle) !== false;
5615
    }
5616
5617
    /**
5618
     * Returns true if the string contains all $needles, false otherwise. By
5619
     * default the comparison is case-sensitive, but can be made insensitive by
5620
     * setting $case_sensitive to false.
5621
     *
5622
     * @param string $haystack       <p>The input string.</p>
5623
     * @param array  $needles        <p>SubStrings to look for.</p>
5624
     * @param bool   $case_sensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
5625
     *
5626
     * @psalm-pure
5627
     *
5628
     * @return bool
5629
     *              <p>Whether or not $haystack contains $needle.</p>
5630
     */
5631 45
    public static function str_contains_all(
5632
        string $haystack,
5633
        array $needles,
5634
        bool $case_sensitive = true
5635
    ): bool {
5636 45
        if ($haystack === '' || $needles === []) {
5637 1
            return false;
5638
        }
5639
5640 44
        foreach ($needles as &$needle) {
5641 44
            if ($case_sensitive) {
5642 24
                if (!$needle || \strpos($haystack, $needle) === false) {
5643 12
                    return false;
5644
                }
5645
            }
5646
5647 33
            if (!$needle || \mb_stripos($haystack, $needle) === false) {
5648 8
                return false;
5649
            }
5650
        }
5651
5652 24
        return true;
5653
    }
5654
5655
    /**
5656
     * Returns true if the string contains any $needles, false otherwise. By
5657
     * default the comparison is case-sensitive, but can be made insensitive by
5658
     * setting $case_sensitive to false.
5659
     *
5660
     * @param string $haystack       <p>The input string.</p>
5661
     * @param array  $needles        <p>SubStrings to look for.</p>
5662
     * @param bool   $case_sensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
5663
     *
5664
     * @psalm-pure
5665
     *
5666
     * @return bool
5667
     *              <p>Whether or not $str contains $needle.</p>
5668
     */
5669 46
    public static function str_contains_any(
5670
        string $haystack,
5671
        array $needles,
5672
        bool $case_sensitive = true
5673
    ): bool {
5674 46
        if ($haystack === '' || $needles === []) {
5675 1
            return false;
5676
        }
5677
5678 45
        foreach ($needles as &$needle) {
5679 45
            if (!$needle) {
5680
                continue;
5681
            }
5682
5683 45
            if ($case_sensitive) {
5684 25
                if (\strpos($haystack, $needle) !== false) {
5685 14
                    return true;
5686
                }
5687
5688 13
                continue;
5689
            }
5690
5691 20
            if (\mb_stripos($haystack, $needle) !== false) {
5692 12
                return true;
5693
            }
5694
        }
5695
5696 19
        return false;
5697
    }
5698
5699
    /**
5700
     * Returns a lowercase and trimmed string separated by dashes. Dashes are
5701
     * inserted before uppercase characters (with the exception of the first
5702
     * character of the string), and in place of spaces as well as underscores.
5703
     *
5704
     * @param string $str      <p>The input string.</p>
5705
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
5706
     *
5707
     * @psalm-pure
5708
     *
5709
     * @return string
5710
     */
5711 19
    public static function str_dasherize(string $str, string $encoding = 'UTF-8'): string
5712
    {
5713 19
        return self::str_delimit($str, '-', $encoding);
5714
    }
5715
5716
    /**
5717
     * Returns a lowercase and trimmed string separated by the given delimiter.
5718
     * Delimiters are inserted before uppercase characters (with the exception
5719
     * of the first character of the string), and in place of spaces, dashes,
5720
     * and underscores. Alpha delimiters are not converted to lowercase.
5721
     *
5722
     * @param string      $str                           <p>The input string.</p>
5723
     * @param string      $delimiter                     <p>Sequence used to separate parts of the string.</p>
5724
     * @param string      $encoding                      [optional] <p>Set the charset for e.g. "mb_" function</p>
5725
     * @param bool        $clean_utf8                    [optional] <p>Remove non UTF-8 chars from the string.</p>
5726
     * @param string|null $lang                          [optional] <p>Set the language for special cases: az, el, lt,
5727
     *                                                   tr</p>
5728
     * @param bool        $try_to_keep_the_string_length [optional] <p>true === try to keep the string length: e.g. ẞ ->
5729
     *                                                   ß</p>
5730
     *
5731
     * @psalm-pure
5732
     *
5733
     * @return string
5734
     */
5735 49
    public static function str_delimit(
5736
        string $str,
5737
        string $delimiter,
5738
        string $encoding = 'UTF-8',
5739
        bool $clean_utf8 = false,
5740
        string $lang = null,
5741
        bool $try_to_keep_the_string_length = false
5742
    ): string {
5743 49
        if (self::$SUPPORT['mbstring'] === true) {
5744 49
            $str = (string) \mb_ereg_replace('\\B(\\p{Lu})', '-\1', \trim($str));
5745
5746 49
            $use_mb_functions = $lang === null && !$try_to_keep_the_string_length;
5747 49
            if ($use_mb_functions && $encoding === 'UTF-8') {
5748 22
                $str = \mb_strtolower($str);
5749
            } else {
5750 27
                $str = self::strtolower($str, $encoding, $clean_utf8, $lang, $try_to_keep_the_string_length);
5751
            }
5752
5753 49
            return (string) \mb_ereg_replace('[\\-_\\s]+', $delimiter, $str);
5754
        }
5755
5756
        $str = (string) \preg_replace('/\\B(\\p{Lu})/u', '-\1', \trim($str));
5757
5758
        $use_mb_functions = $lang === null && !$try_to_keep_the_string_length;
5759
        if ($use_mb_functions && $encoding === 'UTF-8') {
5760
            $str = \mb_strtolower($str);
5761
        } else {
5762
            $str = self::strtolower($str, $encoding, $clean_utf8, $lang, $try_to_keep_the_string_length);
5763
        }
5764
5765
        return (string) \preg_replace('/[\\-_\\s]+/u', $delimiter, $str);
5766
    }
5767
5768
    /**
5769
     * Optimized "mb_detect_encoding()"-function -> with support for UTF-16 and UTF-32.
5770
     *
5771
     * EXAMPLE: <code>
5772
     * UTF8::str_detect_encoding('中文空白'); // 'UTF-8'
5773
     * UTF8::str_detect_encoding('Abc'); // 'ASCII'
5774
     * </code>
5775
     *
5776
     * @param string $str <p>The input string.</p>
5777
     *
5778
     * @psalm-pure
5779
     *
5780
     * @return false|string
5781
     *                      <p>
5782
     *                      The detected string-encoding e.g. UTF-8 or UTF-16BE,<br>
5783
     *                      otherwise it will return false e.g. for BINARY or not detected encoding.
5784
     *                      </p>
5785
     */
5786 30
    public static function str_detect_encoding($str)
5787
    {
5788
        // init
5789 30
        $str = (string) $str;
5790
5791
        //
5792
        // 1.) check binary strings (010001001...) like UTF-16 / UTF-32 / PDF / Images / ...
5793
        //
5794
5795 30
        if (self::is_binary($str, self::string_has_bom($str) ? false : true)) {
5796 10
            $is_utf32 = self::is_utf32($str, false);
5797 10
            if ($is_utf32 === 1) {
5798
                return 'UTF-32LE';
5799
            }
5800 10
            if ($is_utf32 === 2) {
5801 1
                return 'UTF-32BE';
5802
            }
5803
5804 10
            $is_utf16 = self::is_utf16($str, false);
5805 10
            if ($is_utf16 === 1) {
5806 3
                return 'UTF-16LE';
5807
            }
5808 10
            if ($is_utf16 === 2) {
5809 2
                return 'UTF-16BE';
5810
            }
5811
5812
            // is binary but not "UTF-16" or "UTF-32"
5813 8
            return false;
5814
        }
5815
5816
        //
5817
        // 2.) simple check for ASCII chars
5818
        //
5819
5820 27
        if (ASCII::is_ascii($str)) {
5821 10
            return 'ASCII';
5822
        }
5823
5824
        //
5825
        // 3.) simple check for UTF-8 chars
5826
        //
5827
5828 27
        if (self::is_utf8_string($str)) {
5829 19
            return 'UTF-8';
5830
        }
5831
5832
        //
5833
        // 4.) check via "mb_detect_encoding()"
5834
        //
5835
        // INFO: UTF-16, UTF-32, UCS2 and UCS4, encoding detection will fail always with "mb_detect_encoding()"
5836
5837
        $encoding_detecting_order = [
5838 16
            'ISO-8859-1',
5839
            'ISO-8859-2',
5840
            'ISO-8859-3',
5841
            'ISO-8859-4',
5842
            'ISO-8859-5',
5843
            'ISO-8859-6',
5844
            'ISO-8859-7',
5845
            'ISO-8859-8',
5846
            'ISO-8859-9',
5847
            'ISO-8859-10',
5848
            'ISO-8859-13',
5849
            'ISO-8859-14',
5850
            'ISO-8859-15',
5851
            'ISO-8859-16',
5852
            'WINDOWS-1251',
5853
            'WINDOWS-1252',
5854
            'WINDOWS-1254',
5855
            'CP932',
5856
            'CP936',
5857
            'CP950',
5858
            'CP866',
5859
            'CP850',
5860
            'CP51932',
5861
            'CP50220',
5862
            'CP50221',
5863
            'CP50222',
5864
            'ISO-2022-JP',
5865
            'ISO-2022-KR',
5866
            'JIS',
5867
            'JIS-ms',
5868
            'EUC-CN',
5869
            'EUC-JP',
5870
        ];
5871
5872 16
        if (self::$SUPPORT['mbstring'] === true) {
5873
            // info: do not use the symfony polyfill here
5874 16
            $encoding = \mb_detect_encoding($str, $encoding_detecting_order, true);
5875 16
            if ($encoding) {
5876 16
                return $encoding;
5877
            }
5878
        }
5879
5880
        //
5881
        // 5.) check via "iconv()"
5882
        //
5883
5884
        if (self::$ENCODINGS === null) {
5885
            self::$ENCODINGS = self::getData('encodings');
5886
        }
5887
5888
        foreach (self::$ENCODINGS as $encoding_tmp) {
5889
            // INFO: //IGNORE but still throw notice
5890
            /** @noinspection PhpUsageOfSilenceOperatorInspection */
5891
            if ((string) @\iconv($encoding_tmp, $encoding_tmp . '//IGNORE', $str) === $str) {
5892
                return $encoding_tmp;
5893
            }
5894
        }
5895
5896
        return false;
5897
    }
5898
5899
    /**
5900
     * Check if the string ends with the given substring.
5901
     *
5902
     * EXAMPLE: <code>
5903
     * UTF8::str_ends_with('BeginMiddleΚόσμε', 'Κόσμε'); // true
5904
     * UTF8::str_ends_with('BeginMiddleΚόσμε', 'κόσμε'); // false
5905
     * </code>
5906
     *
5907
     * @param string $haystack <p>The string to search in.</p>
5908
     * @param string $needle   <p>The substring to search for.</p>
5909
     *
5910
     * @psalm-pure
5911
     *
5912
     * @return bool
5913
     */
5914 9
    public static function str_ends_with(string $haystack, string $needle): bool
5915
    {
5916 9
        if ($needle === '') {
5917 2
            return true;
5918
        }
5919
5920 9
        if ($haystack === '') {
5921
            return false;
5922
        }
5923
5924 9
        if (\PHP_VERSION_ID >= 80000) {
5925
            /** @phpstan-ignore-next-line - only for PHP8 */
5926
            return \str_ends_with($haystack, $needle);
5927
        }
5928
5929 9
        return \substr($haystack, -\strlen($needle)) === $needle;
5930
    }
5931
5932
    /**
5933
     * Returns true if the string ends with any of $substrings, false otherwise.
5934
     *
5935
     * - case-sensitive
5936
     *
5937
     * @param string   $str        <p>The input string.</p>
5938
     * @param string[] $substrings <p>Substrings to look for.</p>
5939
     *
5940
     * @psalm-pure
5941
     *
5942
     * @return bool
5943
     *              <p>Whether or not $str ends with $substring.</p>
5944
     */
5945 7
    public static function str_ends_with_any(string $str, array $substrings): bool
5946
    {
5947 7
        if ($substrings === []) {
5948
            return false;
5949
        }
5950
5951 7
        foreach ($substrings as &$substring) {
5952 7
            if (\substr($str, -\strlen($substring)) === $substring) {
5953 1
                return true;
5954
            }
5955
        }
5956
5957 6
        return false;
5958
    }
5959
5960
    /**
5961
     * Ensures that the string begins with $substring. If it doesn't, it's
5962
     * prepended.
5963
     *
5964
     * @param string $str       <p>The input string.</p>
5965
     * @param string $substring <p>The substring to add if not present.</p>
5966
     *
5967
     * @psalm-pure
5968
     *
5969
     * @return string
5970
     */
5971 10
    public static function str_ensure_left(string $str, string $substring): string
5972
    {
5973
        if (
5974 10
            $substring !== ''
5975
            &&
5976 10
            \strpos($str, $substring) === 0
5977
        ) {
5978 6
            return $str;
5979
        }
5980
5981 4
        return $substring . $str;
5982
    }
5983
5984
    /**
5985
     * Ensures that the string ends with $substring. If it doesn't, it's appended.
5986
     *
5987
     * @param string $str       <p>The input string.</p>
5988
     * @param string $substring <p>The substring to add if not present.</p>
5989
     *
5990
     * @psalm-pure
5991
     *
5992
     * @return string
5993
     */
5994 10
    public static function str_ensure_right(string $str, string $substring): string
5995
    {
5996
        if (
5997 10
            $str === ''
5998
            ||
5999 10
            $substring === ''
6000
            ||
6001 10
            \substr($str, -\strlen($substring)) !== $substring
6002
        ) {
6003 4
            $str .= $substring;
6004
        }
6005
6006 10
        return $str;
6007
    }
6008
6009
    /**
6010
     * Capitalizes the first word of the string, replaces underscores with
6011
     * spaces, and strips '_id'.
6012
     *
6013
     * @param string $str
6014
     *
6015
     * @psalm-pure
6016
     *
6017
     * @return string
6018
     */
6019 3
    public static function str_humanize($str): string
6020
    {
6021 3
        $str = \str_replace(
6022
            [
6023 3
                '_id',
6024
                '_',
6025
            ],
6026
            [
6027 3
                '',
6028
                ' ',
6029
            ],
6030 3
            $str
6031
        );
6032
6033 3
        return self::ucfirst(\trim($str));
6034
    }
6035
6036
    /**
6037
     * Check if the string ends with the given substring, case-insensitive.
6038
     *
6039
     * EXAMPLE: <code>
6040
     * UTF8::str_iends_with('BeginMiddleΚόσμε', 'Κόσμε'); // true
6041
     * UTF8::str_iends_with('BeginMiddleΚόσμε', 'κόσμε'); // true
6042
     * </code>
6043
     *
6044
     * @param string $haystack <p>The string to search in.</p>
6045
     * @param string $needle   <p>The substring to search for.</p>
6046
     *
6047
     * @psalm-pure
6048
     *
6049
     * @return bool
6050
     */
6051 12
    public static function str_iends_with(string $haystack, string $needle): bool
6052
    {
6053 12
        if ($needle === '') {
6054 2
            return true;
6055
        }
6056
6057 12
        if ($haystack === '') {
6058
            return false;
6059
        }
6060
6061 12
        return self::strcasecmp(\substr($haystack, -\strlen($needle)), $needle) === 0;
6062
    }
6063
6064
    /**
6065
     * Returns true if the string ends with any of $substrings, false otherwise.
6066
     *
6067
     * - case-insensitive
6068
     *
6069
     * @param string   $str        <p>The input string.</p>
6070
     * @param string[] $substrings <p>Substrings to look for.</p>
6071
     *
6072
     * @psalm-pure
6073
     *
6074
     * @return bool
6075
     *              <p>Whether or not $str ends with $substring.</p>
6076
     */
6077 4
    public static function str_iends_with_any(string $str, array $substrings): bool
6078
    {
6079 4
        if ($substrings === []) {
6080
            return false;
6081
        }
6082
6083 4
        foreach ($substrings as &$substring) {
6084 4
            if (self::str_iends_with($str, $substring)) {
6085 4
                return true;
6086
            }
6087
        }
6088
6089
        return false;
6090
    }
6091
6092
    /**
6093
     * Inserts $substring into the string at the $index provided.
6094
     *
6095
     * @param string $str       <p>The input string.</p>
6096
     * @param string $substring <p>String to be inserted.</p>
6097
     * @param int    $index     <p>The index at which to insert the substring.</p>
6098
     * @param string $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
6099
     *
6100
     * @psalm-pure
6101
     *
6102
     * @return string
6103
     */
6104 8
    public static function str_insert(
6105
        string $str,
6106
        string $substring,
6107
        int $index,
6108
        string $encoding = 'UTF-8'
6109
    ): string {
6110 8
        if ($encoding === 'UTF-8') {
6111 4
            $len = (int) \mb_strlen($str);
6112 4
            if ($index > $len) {
6113
                return $str;
6114
            }
6115
6116
            /** @noinspection UnnecessaryCastingInspection */
6117 4
            return (string) \mb_substr($str, 0, $index) .
6118 4
                   $substring .
6119 4
                   (string) \mb_substr($str, $index, $len);
6120
        }
6121
6122 4
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
6123
6124 4
        $len = (int) self::strlen($str, $encoding);
6125 4
        if ($index > $len) {
6126 1
            return $str;
6127
        }
6128
6129 3
        return ((string) self::substr($str, 0, $index, $encoding)) .
6130 3
               $substring .
6131 3
               ((string) self::substr($str, $index, $len, $encoding));
6132
    }
6133
6134
    /**
6135
     * Case-insensitive and UTF-8 safe version of <function>str_replace</function>.
6136
     *
6137
     * EXAMPLE: <code>
6138
     * UTF8::str_ireplace('lIzÆ', 'lise', 'Iñtërnâtiônàlizætiøn'); // 'Iñtërnâtiônàlisetiøn'
6139
     * </code>
6140
     *
6141
     * @see http://php.net/manual/en/function.str-ireplace.php
6142
     *
6143
     * @param string|string[] $search      <p>
6144
     *                                     Every replacement with search array is
6145
     *                                     performed on the result of previous replacement.
6146
     *                                     </p>
6147
     * @param string|string[] $replacement <p>The replacement.</p>
6148
     * @param string|string[] $subject     <p>
6149
     *                                     If subject is an array, then the search and
6150
     *                                     replace is performed with every entry of
6151
     *                                     subject, and the return value is an array as
6152
     *                                     well.
6153
     *                                     </p>
6154
     * @param int             $count       [optional] <p>
6155
     *                                     The number of matched and replaced needles will
6156
     *                                     be returned in count which is passed by
6157
     *                                     reference.
6158
     *                                     </p>
6159
     *
6160
     * @psalm-pure
6161
     *
6162
     * @return string|string[]
6163
     *                         <p>A string or an array of replacements.</p>
6164
     *
6165
     * @template TStrIReplaceSubject
6166
     * @phpstan-param TStrIReplaceSubject $subject
6167
     * @phpstan-return TStrIReplaceSubject
6168
     */
6169 29
    public static function str_ireplace($search, $replacement, $subject, &$count = null)
6170
    {
6171 29
        $search = (array) $search;
6172
6173
        /** @noinspection AlterInForeachInspection */
6174 29
        foreach ($search as &$s) {
6175 29
            $s = (string) $s;
6176 29
            if ($s === '') {
6177 6
                $s = '/^(?<=.)$/';
6178
            } else {
6179 24
                $s = '/' . \preg_quote($s, '/') . '/ui';
6180
            }
6181
        }
6182
6183
        // fallback
6184
        /** @phpstan-ignore-next-line - only a fallback for PHP8 */
6185 29
        if ($replacement === null) {
0 ignored issues
show
introduced by
The condition $replacement === null is always false.
Loading history...
6186 1
            $replacement = '';
6187
        }
6188
        /** @phpstan-ignore-next-line - only a fallback for PHP8 */
6189 29
        if ($subject === null) {
6190 1
            $subject = '';
6191
        }
6192
6193
        /**
6194
         * @psalm-suppress PossiblyNullArgument
6195
         * @phpstan-var TStrIReplaceSubject $subject
6196
         */
6197 29
        $subject = \preg_replace($search, $replacement, $subject, -1, $count);
6198
6199 29
        return $subject;
6200
    }
6201
6202
    /**
6203
     * Replaces $search from the beginning of string with $replacement.
6204
     *
6205
     * @param string $str         <p>The input string.</p>
6206
     * @param string $search      <p>The string to search for.</p>
6207
     * @param string $replacement <p>The replacement.</p>
6208
     *
6209
     * @psalm-pure
6210
     *
6211
     * @return string
6212
     *                <p>The string after the replacement.</p>
6213
     */
6214 17
    public static function str_ireplace_beginning(string $str, string $search, string $replacement): string
6215
    {
6216 17
        if ($str === '') {
6217 4
            if ($replacement === '') {
6218 2
                return '';
6219
            }
6220
6221 2
            if ($search === '') {
6222 2
                return $replacement;
6223
            }
6224
        }
6225
6226 13
        if ($search === '') {
6227 2
            return $str . $replacement;
6228
        }
6229
6230 11
        $searchLength = \strlen($search);
6231 11
        if (\strncasecmp($str, $search, $searchLength) === 0) {
6232 10
            return $replacement . \substr($str, $searchLength);
6233
        }
6234
6235 1
        return $str;
6236
    }
6237
6238
    /**
6239
     * Replaces $search from the ending of string with $replacement.
6240
     *
6241
     * @param string $str         <p>The input string.</p>
6242
     * @param string $search      <p>The string to search for.</p>
6243
     * @param string $replacement <p>The replacement.</p>
6244
     *
6245
     * @psalm-pure
6246
     *
6247
     * @return string
6248
     *                <p>The string after the replacement.</p>
6249
     */
6250 17
    public static function str_ireplace_ending(string $str, string $search, string $replacement): string
6251
    {
6252 17
        if ($str === '') {
6253 4
            if ($replacement === '') {
6254 2
                return '';
6255
            }
6256
6257 2
            if ($search === '') {
6258 2
                return $replacement;
6259
            }
6260
        }
6261
6262 13
        if ($search === '') {
6263 2
            return $str . $replacement;
6264
        }
6265
6266 11
        if (\stripos($str, $search, \strlen($str) - \strlen($search)) !== false) {
6267 9
            $str = \substr($str, 0, -\strlen($search)) . $replacement;
6268
        }
6269
6270 11
        return $str;
6271
    }
6272
6273
    /**
6274
     * Check if the string starts with the given substring, case-insensitive.
6275
     *
6276
     * EXAMPLE: <code>
6277
     * UTF8::str_istarts_with('ΚόσμεMiddleEnd', 'Κόσμε'); // true
6278
     * UTF8::str_istarts_with('ΚόσμεMiddleEnd', 'κόσμε'); // true
6279
     * </code>
6280
     *
6281
     * @param string $haystack <p>The string to search in.</p>
6282
     * @param string $needle   <p>The substring to search for.</p>
6283
     *
6284
     * @psalm-pure
6285
     *
6286
     * @return bool
6287
     */
6288 13
    public static function str_istarts_with(string $haystack, string $needle): bool
6289
    {
6290 13
        if ($needle === '') {
6291 2
            return true;
6292
        }
6293
6294 13
        if ($haystack === '') {
6295
            return false;
6296
        }
6297
6298 13
        return self::stripos($haystack, $needle) === 0;
6299
    }
6300
6301
    /**
6302
     * Returns true if the string begins with any of $substrings, false otherwise.
6303
     *
6304
     * - case-insensitive
6305
     *
6306
     * @param string $str        <p>The input string.</p>
6307
     * @param array  $substrings <p>Substrings to look for.</p>
6308
     *
6309
     * @psalm-pure
6310
     *
6311
     * @return bool
6312
     *              <p>Whether or not $str starts with $substring.</p>
6313
     */
6314 5
    public static function str_istarts_with_any(string $str, array $substrings): bool
6315
    {
6316 5
        if ($str === '') {
6317
            return false;
6318
        }
6319
6320 5
        if ($substrings === []) {
6321
            return false;
6322
        }
6323
6324 5
        foreach ($substrings as &$substring) {
6325 5
            if (self::str_istarts_with($str, $substring)) {
6326 5
                return true;
6327
            }
6328
        }
6329
6330 1
        return false;
6331
    }
6332
6333
    /**
6334
     * Gets the substring after the first occurrence of a separator.
6335
     *
6336
     * @param string $str       <p>The input string.</p>
6337
     * @param string $separator <p>The string separator.</p>
6338
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
6339
     *
6340
     * @psalm-pure
6341
     *
6342
     * @return string
6343
     */
6344 1
    public static function str_isubstr_after_first_separator(
6345
        string $str,
6346
        string $separator,
6347
        string $encoding = 'UTF-8'
6348
    ): string {
6349 1
        if ($separator === '' || $str === '') {
6350 1
            return '';
6351
        }
6352
6353 1
        $offset = self::stripos($str, $separator);
6354 1
        if ($offset === false) {
6355 1
            return '';
6356
        }
6357
6358 1
        if ($encoding === 'UTF-8') {
6359 1
            return (string) \mb_substr(
6360 1
                $str,
6361 1
                $offset + (int) \mb_strlen($separator)
6362
            );
6363
        }
6364
6365
        return (string) self::substr(
6366
            $str,
6367
            $offset + (int) self::strlen($separator, $encoding),
6368
            null,
6369
            $encoding
6370
        );
6371
    }
6372
6373
    /**
6374
     * Gets the substring after the last occurrence of a separator.
6375
     *
6376
     * @param string $str       <p>The input string.</p>
6377
     * @param string $separator <p>The string separator.</p>
6378
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
6379
     *
6380
     * @psalm-pure
6381
     *
6382
     * @return string
6383
     */
6384 1
    public static function str_isubstr_after_last_separator(
6385
        string $str,
6386
        string $separator,
6387
        string $encoding = 'UTF-8'
6388
    ): string {
6389 1
        if ($separator === '' || $str === '') {
6390 1
            return '';
6391
        }
6392
6393 1
        $offset = self::strripos($str, $separator);
6394 1
        if ($offset === false) {
6395 1
            return '';
6396
        }
6397
6398 1
        if ($encoding === 'UTF-8') {
6399 1
            return (string) \mb_substr(
6400 1
                $str,
6401 1
                $offset + (int) self::strlen($separator)
6402
            );
6403
        }
6404
6405
        return (string) self::substr(
6406
            $str,
6407
            $offset + (int) self::strlen($separator, $encoding),
6408
            null,
6409
            $encoding
6410
        );
6411
    }
6412
6413
    /**
6414
     * Gets the substring before the first occurrence of a separator.
6415
     *
6416
     * @param string $str       <p>The input string.</p>
6417
     * @param string $separator <p>The string separator.</p>
6418
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
6419
     *
6420
     * @psalm-pure
6421
     *
6422
     * @return string
6423
     */
6424 1
    public static function str_isubstr_before_first_separator(
6425
        string $str,
6426
        string $separator,
6427
        string $encoding = 'UTF-8'
6428
    ): string {
6429 1
        if ($separator === '' || $str === '') {
6430 1
            return '';
6431
        }
6432
6433 1
        $offset = self::stripos($str, $separator);
6434 1
        if ($offset === false) {
6435 1
            return '';
6436
        }
6437
6438 1
        if ($encoding === 'UTF-8') {
6439 1
            return (string) \mb_substr($str, 0, $offset);
6440
        }
6441
6442
        return (string) self::substr($str, 0, $offset, $encoding);
6443
    }
6444
6445
    /**
6446
     * Gets the substring before the last occurrence of a separator.
6447
     *
6448
     * @param string $str       <p>The input string.</p>
6449
     * @param string $separator <p>The string separator.</p>
6450
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
6451
     *
6452
     * @psalm-pure
6453
     *
6454
     * @return string
6455
     */
6456 1
    public static function str_isubstr_before_last_separator(
6457
        string $str,
6458
        string $separator,
6459
        string $encoding = 'UTF-8'
6460
    ): string {
6461 1
        if ($separator === '' || $str === '') {
6462 1
            return '';
6463
        }
6464
6465 1
        if ($encoding === 'UTF-8') {
6466 1
            $offset = \mb_strripos($str, $separator);
6467 1
            if ($offset === false) {
6468 1
                return '';
6469
            }
6470
6471 1
            return (string) \mb_substr($str, 0, $offset);
6472
        }
6473
6474
        $offset = self::strripos($str, $separator, 0, $encoding);
6475
        if ($offset === false) {
6476
            return '';
6477
        }
6478
6479
        return (string) self::substr($str, 0, $offset, $encoding);
6480
    }
6481
6482
    /**
6483
     * Gets the substring after (or before via "$before_needle") the first occurrence of the "$needle".
6484
     *
6485
     * @param string $str           <p>The input string.</p>
6486
     * @param string $needle        <p>The string to look for.</p>
6487
     * @param bool   $before_needle [optional] <p>Default: false</p>
6488
     * @param string $encoding      [optional] <p>Default: 'UTF-8'</p>
6489
     *
6490
     * @psalm-pure
6491
     *
6492
     * @return string
6493
     */
6494 2
    public static function str_isubstr_first(
6495
        string $str,
6496
        string $needle,
6497
        bool $before_needle = false,
6498
        string $encoding = 'UTF-8'
6499
    ): string {
6500
        if (
6501 2
            $needle === ''
6502
            ||
6503 2
            $str === ''
6504
        ) {
6505 2
            return '';
6506
        }
6507
6508 2
        $part = self::stristr(
6509 2
            $str,
6510
            $needle,
6511
            $before_needle,
6512
            $encoding
6513
        );
6514 2
        if ($part === false) {
6515 2
            return '';
6516
        }
6517
6518 2
        return $part;
6519
    }
6520
6521
    /**
6522
     * Gets the substring after (or before via "$before_needle") the last occurrence of the "$needle".
6523
     *
6524
     * @param string $str           <p>The input string.</p>
6525
     * @param string $needle        <p>The string to look for.</p>
6526
     * @param bool   $before_needle [optional] <p>Default: false</p>
6527
     * @param string $encoding      [optional] <p>Default: 'UTF-8'</p>
6528
     *
6529
     * @psalm-pure
6530
     *
6531
     * @return string
6532
     */
6533 1
    public static function str_isubstr_last(
6534
        string $str,
6535
        string $needle,
6536
        bool $before_needle = false,
6537
        string $encoding = 'UTF-8'
6538
    ): string {
6539
        if (
6540 1
            $needle === ''
6541
            ||
6542 1
            $str === ''
6543
        ) {
6544 1
            return '';
6545
        }
6546
6547 1
        $part = self::strrichr(
6548 1
            $str,
6549
            $needle,
6550
            $before_needle,
6551
            $encoding
6552
        );
6553 1
        if ($part === false) {
6554 1
            return '';
6555
        }
6556
6557 1
        return $part;
6558
    }
6559
6560
    /**
6561
     * Returns the last $n characters of the string.
6562
     *
6563
     * @param string $str      <p>The input string.</p>
6564
     * @param int    $n        <p>Number of characters to retrieve from the end.</p>
6565
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6566
     *
6567
     * @psalm-pure
6568
     *
6569
     * @return string
6570
     */
6571 12
    public static function str_last_char(
6572
        string $str,
6573
        int $n = 1,
6574
        string $encoding = 'UTF-8'
6575
    ): string {
6576 12
        if ($str === '' || $n <= 0) {
6577 4
            return '';
6578
        }
6579
6580 8
        if ($encoding === 'UTF-8') {
6581 4
            return (string) \mb_substr($str, -$n);
6582
        }
6583
6584 4
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
6585
6586 4
        return (string) self::substr($str, -$n, null, $encoding);
6587
    }
6588
6589
    /**
6590
     * Limit the number of characters in a string.
6591
     *
6592
     * @param string $str        <p>The input string.</p>
6593
     * @param int    $length     [optional] <p>Default: 100</p>
6594
     * @param string $str_add_on [optional] <p>Default: …</p>
6595
     * @param string $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
6596
     *
6597
     * @psalm-pure
6598
     *
6599
     * @return string
6600
     */
6601 2
    public static function str_limit(
6602
        string $str,
6603
        int $length = 100,
6604
        string $str_add_on = '…',
6605
        string $encoding = 'UTF-8'
6606
    ): string {
6607 2
        if ($str === '' || $length <= 0) {
6608 2
            return '';
6609
        }
6610
6611 2
        if ($encoding === 'UTF-8') {
6612 2
            if ((int) \mb_strlen($str) <= $length) {
6613 2
                return $str;
6614
            }
6615
6616
            /** @noinspection UnnecessaryCastingInspection */
6617 2
            return (string) \mb_substr($str, 0, $length - (int) self::strlen($str_add_on)) . $str_add_on;
6618
        }
6619
6620
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
6621
6622
        if ((int) self::strlen($str, $encoding) <= $length) {
6623
            return $str;
6624
        }
6625
6626
        return ((string) self::substr($str, 0, $length - (int) self::strlen($str_add_on), $encoding)) . $str_add_on;
6627
    }
6628
6629
    /**
6630
     * Limit the number of characters in a string, but also after the next word.
6631
     *
6632
     * EXAMPLE: <code>UTF8::str_limit_after_word('fòô bàř fòô', 8, ''); // 'fòô bàř'</code>
6633
     *
6634
     * @param string $str        <p>The input string.</p>
6635
     * @param int    $length     [optional] <p>Default: 100</p>
6636
     * @param string $str_add_on [optional] <p>Default: …</p>
6637
     * @param string $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
6638
     *
6639
     * @psalm-pure
6640
     *
6641
     * @return string
6642
     */
6643 6
    public static function str_limit_after_word(
6644
        string $str,
6645
        int $length = 100,
6646
        string $str_add_on = '…',
6647
        string $encoding = 'UTF-8'
6648
    ): string {
6649 6
        if ($str === '' || $length <= 0) {
6650 2
            return '';
6651
        }
6652
6653 6
        if ($encoding === 'UTF-8') {
6654 2
            if ((int) \mb_strlen($str) <= $length) {
6655 2
                return $str;
6656
            }
6657
6658 2
            if (\mb_substr($str, $length - 1, 1) === ' ') {
6659 2
                return ((string) \mb_substr($str, 0, $length - 1)) . $str_add_on;
6660
            }
6661
6662 2
            $str = \mb_substr($str, 0, $length);
6663
6664 2
            $array = \explode(' ', $str, -1);
6665 2
            $new_str = \implode(' ', $array);
6666
6667 2
            if ($new_str === '') {
6668 2
                return ((string) \mb_substr($str, 0, $length - 1)) . $str_add_on;
6669
            }
6670
        } else {
6671 4
            if ((int) self::strlen($str, $encoding) <= $length) {
6672
                return $str;
6673
            }
6674
6675 4
            if (self::substr($str, $length - 1, 1, $encoding) === ' ') {
6676 3
                return ((string) self::substr($str, 0, $length - 1, $encoding)) . $str_add_on;
6677
            }
6678
6679
            /** @noinspection CallableParameterUseCaseInTypeContextInspection - FP */
6680 1
            $str = self::substr($str, 0, $length, $encoding);
6681 1
            if ($str === false) {
6682
                return '' . $str_add_on;
6683
            }
6684
6685 1
            $array = \explode(' ', $str, -1);
6686 1
            $new_str = \implode(' ', $array);
6687
6688 1
            if ($new_str === '') {
6689
                return ((string) self::substr($str, 0, $length - 1, $encoding)) . $str_add_on;
6690
            }
6691
        }
6692
6693 3
        return $new_str . $str_add_on;
6694
    }
6695
6696
    /**
6697
     * Returns the longest common prefix between the $str1 and $str2.
6698
     *
6699
     * @param string $str1     <p>The input sting.</p>
6700
     * @param string $str2     <p>Second string for comparison.</p>
6701
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6702
     *
6703
     * @psalm-pure
6704
     *
6705
     * @return string
6706
     */
6707 10
    public static function str_longest_common_prefix(
6708
        string $str1,
6709
        string $str2,
6710
        string $encoding = 'UTF-8'
6711
    ): string {
6712
        // init
6713 10
        $longest_common_prefix = '';
6714
6715 10
        if ($encoding === 'UTF-8') {
6716 5
            $max_length = (int) \min(
6717 5
                \mb_strlen($str1),
6718 5
                \mb_strlen($str2)
6719
            );
6720
6721 5
            for ($i = 0; $i < $max_length; ++$i) {
6722 4
                $char = \mb_substr($str1, $i, 1);
6723
6724
                if (
6725 4
                    $char !== false
6726
                    &&
6727 4
                    $char === \mb_substr($str2, $i, 1)
6728
                ) {
6729 3
                    $longest_common_prefix .= $char;
6730
                } else {
6731 3
                    break;
6732
                }
6733
            }
6734
        } else {
6735 5
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
6736
6737 5
            $max_length = (int) \min(
6738 5
                self::strlen($str1, $encoding),
6739 5
                self::strlen($str2, $encoding)
6740
            );
6741
6742 5
            for ($i = 0; $i < $max_length; ++$i) {
6743 4
                $char = self::substr($str1, $i, 1, $encoding);
6744
6745
                if (
6746 4
                    $char !== false
6747
                    &&
6748 4
                    $char === self::substr($str2, $i, 1, $encoding)
6749
                ) {
6750 3
                    $longest_common_prefix .= $char;
6751
                } else {
6752 3
                    break;
6753
                }
6754
            }
6755
        }
6756
6757 10
        return $longest_common_prefix;
6758
    }
6759
6760
    /**
6761
     * Returns the longest common substring between the $str1 and $str2.
6762
     * In the case of ties, it returns that which occurs first.
6763
     *
6764
     * @param string $str1
6765
     * @param string $str2     <p>Second string for comparison.</p>
6766
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6767
     *
6768
     * @psalm-pure
6769
     *
6770
     * @return string
6771
     *                <p>A string with its $str being the longest common substring.</p>
6772
     */
6773 11
    public static function str_longest_common_substring(
6774
        string $str1,
6775
        string $str2,
6776
        string $encoding = 'UTF-8'
6777
    ): string {
6778 11
        if ($str1 === '' || $str2 === '') {
6779 2
            return '';
6780
        }
6781
6782
        // Uses dynamic programming to solve
6783
        // http://en.wikipedia.org/wiki/Longest_common_substring_problem
6784
6785 9
        if ($encoding === 'UTF-8') {
6786 4
            $str_length = (int) \mb_strlen($str1);
6787 4
            $other_length = (int) \mb_strlen($str2);
6788
        } else {
6789 5
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
6790
6791 5
            $str_length = (int) self::strlen($str1, $encoding);
6792 5
            $other_length = (int) self::strlen($str2, $encoding);
6793
        }
6794
6795
        // Return if either string is empty
6796 9
        if ($str_length === 0 || $other_length === 0) {
6797
            return '';
6798
        }
6799
6800 9
        $len = 0;
6801 9
        $end = 0;
6802 9
        $table = \array_fill(
6803 9
            0,
6804 9
            $str_length + 1,
6805 9
            \array_fill(0, $other_length + 1, 0)
6806
        );
6807
6808 9
        if ($encoding === 'UTF-8') {
6809 9
            for ($i = 1; $i <= $str_length; ++$i) {
6810 9
                for ($j = 1; $j <= $other_length; ++$j) {
6811 9
                    $str_char = \mb_substr($str1, $i - 1, 1);
6812 9
                    $other_char = \mb_substr($str2, $j - 1, 1);
6813
6814 9
                    if ($str_char === $other_char) {
6815 8
                        $table[$i][$j] = $table[$i - 1][$j - 1] + 1;
6816 8
                        if ($table[$i][$j] > $len) {
6817 8
                            $len = $table[$i][$j];
6818 8
                            $end = $i;
6819
                        }
6820
                    } else {
6821 9
                        $table[$i][$j] = 0;
6822
                    }
6823
                }
6824
            }
6825
        } else {
6826
            for ($i = 1; $i <= $str_length; ++$i) {
6827
                for ($j = 1; $j <= $other_length; ++$j) {
6828
                    $str_char = self::substr($str1, $i - 1, 1, $encoding);
6829
                    $other_char = self::substr($str2, $j - 1, 1, $encoding);
6830
6831
                    if ($str_char === $other_char) {
6832
                        $table[$i][$j] = $table[$i - 1][$j - 1] + 1;
6833
                        if ($table[$i][$j] > $len) {
6834
                            $len = $table[$i][$j];
6835
                            $end = $i;
6836
                        }
6837
                    } else {
6838
                        $table[$i][$j] = 0;
6839
                    }
6840
                }
6841
            }
6842
        }
6843
6844 9
        if ($encoding === 'UTF-8') {
6845 9
            return (string) \mb_substr($str1, $end - $len, $len);
6846
        }
6847
6848
        return (string) self::substr($str1, $end - $len, $len, $encoding);
6849
    }
6850
6851
    /**
6852
     * Returns the longest common suffix between the $str1 and $str2.
6853
     *
6854
     * @param string $str1
6855
     * @param string $str2     <p>Second string for comparison.</p>
6856
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6857
     *
6858
     * @psalm-pure
6859
     *
6860
     * @return string
6861
     */
6862 10
    public static function str_longest_common_suffix(
6863
        string $str1,
6864
        string $str2,
6865
        string $encoding = 'UTF-8'
6866
    ): string {
6867 10
        if ($str1 === '' || $str2 === '') {
6868 2
            return '';
6869
        }
6870
6871 8
        if ($encoding === 'UTF-8') {
6872 4
            $max_length = (int) \min(
6873 4
                \mb_strlen($str1, $encoding),
6874 4
                \mb_strlen($str2, $encoding)
6875
            );
6876
6877 4
            $longest_common_suffix = '';
6878 4
            for ($i = 1; $i <= $max_length; ++$i) {
6879 4
                $char = \mb_substr($str1, -$i, 1);
6880
6881
                if (
6882 4
                    $char !== false
6883
                    &&
6884 4
                    $char === \mb_substr($str2, -$i, 1)
6885
                ) {
6886 3
                    $longest_common_suffix = $char . $longest_common_suffix;
6887
                } else {
6888 3
                    break;
6889
                }
6890
            }
6891
        } else {
6892 4
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
6893
6894 4
            $max_length = (int) \min(
6895 4
                self::strlen($str1, $encoding),
6896 4
                self::strlen($str2, $encoding)
6897
            );
6898
6899 4
            $longest_common_suffix = '';
6900 4
            for ($i = 1; $i <= $max_length; ++$i) {
6901 4
                $char = self::substr($str1, -$i, 1, $encoding);
6902
6903
                if (
6904 4
                    $char !== false
6905
                    &&
6906 4
                    $char === self::substr($str2, -$i, 1, $encoding)
6907
                ) {
6908 3
                    $longest_common_suffix = $char . $longest_common_suffix;
6909
                } else {
6910 3
                    break;
6911
                }
6912
            }
6913
        }
6914
6915 8
        return $longest_common_suffix;
6916
    }
6917
6918
    /**
6919
     * Returns true if $str matches the supplied pattern, false otherwise.
6920
     *
6921
     * @param string $str     <p>The input string.</p>
6922
     * @param string $pattern <p>Regex pattern to match against.</p>
6923
     *
6924
     * @psalm-pure
6925
     *
6926
     * @return bool
6927
     *              <p>Whether or not $str matches the pattern.</p>
6928
     */
6929 10
    public static function str_matches_pattern(string $str, string $pattern): bool
6930
    {
6931 10
        return (bool) \preg_match('/' . $pattern . '/u', $str);
6932
    }
6933
6934
    /**
6935
     * Returns whether or not a character exists at an index. Offsets may be
6936
     * negative to count from the last character in the string. Implements
6937
     * part of the ArrayAccess interface.
6938
     *
6939
     * @param string $str      <p>The input string.</p>
6940
     * @param int    $offset   <p>The index to check.</p>
6941
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6942
     *
6943
     * @psalm-pure
6944
     *
6945
     * @return bool
6946
     *              <p>Whether or not the index exists.</p>
6947
     */
6948 6
    public static function str_offset_exists(string $str, int $offset, string $encoding = 'UTF-8'): bool
6949
    {
6950
        // init
6951 6
        $length = (int) self::strlen($str, $encoding);
6952
6953 6
        if ($offset >= 0) {
6954 3
            return $length > $offset;
6955
        }
6956
6957 3
        return $length >= \abs($offset);
6958
    }
6959
6960
    /**
6961
     * Returns the character at the given index. Offsets may be negative to
6962
     * count from the last character in the string. Implements part of the
6963
     * ArrayAccess interface, and throws an OutOfBoundsException if the index
6964
     * does not exist.
6965
     *
6966
     * @param string $str      <p>The input string.</p>
6967
     * @param int    $index    <p>The <strong>index</strong> from which to retrieve the char.</p>
6968
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6969
     *
6970
     * @throws \OutOfBoundsException if the positive or negative offset does not exist
6971
     *
6972
     * @return string
6973
     *                <p>The character at the specified index.</p>
6974
     *
6975
     * @psalm-pure
6976
     */
6977 2
    public static function str_offset_get(string $str, int $index, string $encoding = 'UTF-8'): string
6978
    {
6979
        // init
6980 2
        $length = (int) self::strlen($str);
6981
6982
        if (
6983 2
            ($index >= 0 && $length <= $index)
6984
            ||
6985 2
            $length < \abs($index)
6986
        ) {
6987 1
            throw new \OutOfBoundsException('No character exists at the index');
6988
        }
6989
6990 1
        return self::char_at($str, $index, $encoding);
6991
    }
6992
6993
    /**
6994
     * Pad a UTF-8 string to a given length with another string.
6995
     *
6996
     * EXAMPLE: <code>UTF8::str_pad('中文空白', 10, '_', STR_PAD_BOTH); // '___中文空白___'</code>
6997
     *
6998
     * @param string     $str        <p>The input string.</p>
6999
     * @param int        $pad_length <p>The length of return string.</p>
7000
     * @param string     $pad_string [optional] <p>String to use for padding the input string.</p>
7001
     * @param int|string $pad_type   [optional] <p>
7002
     *                               Can be <strong>STR_PAD_RIGHT</strong> (default), [or string "right"]<br>
7003
     *                               <strong>STR_PAD_LEFT</strong> [or string "left"] or<br>
7004
     *                               <strong>STR_PAD_BOTH</strong> [or string "both"]
7005
     *                               </p>
7006
     * @param string     $encoding   [optional] <p>Default: 'UTF-8'</p>
7007
     *
7008
     * @psalm-pure
7009
     *
7010
     * @return string
7011
     *                <p>Returns the padded string.</p>
7012
     */
7013 41
    public static function str_pad(
7014
        string $str,
7015
        int $pad_length,
7016
        string $pad_string = ' ',
7017
        $pad_type = \STR_PAD_RIGHT,
7018
        string $encoding = 'UTF-8'
7019
    ): string {
7020 41
        if ($pad_length === 0 || $pad_string === '') {
7021 1
            return $str;
7022
        }
7023
7024 41
        if ($pad_type !== (int) $pad_type) {
7025 13
            if ($pad_type === 'left') {
7026 3
                $pad_type = \STR_PAD_LEFT;
7027 10
            } elseif ($pad_type === 'right') {
7028 6
                $pad_type = \STR_PAD_RIGHT;
7029 4
            } elseif ($pad_type === 'both') {
7030 3
                $pad_type = \STR_PAD_BOTH;
7031
            } else {
7032 1
                throw new \InvalidArgumentException(
7033 1
                    'Pad expects $pad_type to be "STR_PAD_*" or ' . "to be one of 'left', 'right' or 'both'"
7034
                );
7035
            }
7036
        }
7037
7038 40
        if ($encoding === 'UTF-8') {
7039 25
            $str_length = (int) \mb_strlen($str);
7040
7041 25
            if ($pad_length >= $str_length) {
7042 25
                switch ($pad_type) {
7043
                    case \STR_PAD_LEFT:
7044 8
                        $ps_length = (int) \mb_strlen($pad_string);
7045
7046 8
                        $diff = ($pad_length - $str_length);
7047
7048 8
                        $pre = (string) \mb_substr(
7049 8
                            \str_repeat($pad_string, (int) \ceil($diff / $ps_length)),
7050 8
                            0,
7051 8
                            $diff
7052
                        );
7053 8
                        $post = '';
7054
7055 8
                        break;
7056
7057
                    case \STR_PAD_BOTH:
7058 14
                        $diff = ($pad_length - $str_length);
7059
7060 14
                        $ps_length_left = (int) \floor($diff / 2);
7061
7062 14
                        $ps_length_right = (int) \ceil($diff / 2);
7063
7064 14
                        $pre = (string) \mb_substr(
7065 14
                            \str_repeat($pad_string, $ps_length_left),
7066 14
                            0,
7067 14
                            $ps_length_left
7068
                        );
7069 14
                        $post = (string) \mb_substr(
7070 14
                            \str_repeat($pad_string, $ps_length_right),
7071 14
                            0,
7072 14
                            $ps_length_right
7073
                        );
7074
7075 14
                        break;
7076
7077
                    case \STR_PAD_RIGHT:
7078
                    default:
7079 9
                        $ps_length = (int) \mb_strlen($pad_string);
7080
7081 9
                        $diff = ($pad_length - $str_length);
7082
7083 9
                        $post = (string) \mb_substr(
7084 9
                            \str_repeat($pad_string, (int) \ceil($diff / $ps_length)),
7085 9
                            0,
7086 9
                            $diff
7087
                        );
7088 9
                        $pre = '';
7089
                }
7090
7091 25
                return $pre . $str . $post;
7092
            }
7093
7094 3
            return $str;
7095
        }
7096
7097 15
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
7098
7099 15
        $str_length = (int) self::strlen($str, $encoding);
7100
7101 15
        if ($pad_length >= $str_length) {
7102 14
            switch ($pad_type) {
7103
                case \STR_PAD_LEFT:
7104 5
                    $ps_length = (int) self::strlen($pad_string, $encoding);
7105
7106 5
                    $diff = ($pad_length - $str_length);
7107
7108 5
                    $pre = (string) self::substr(
7109 5
                        \str_repeat($pad_string, (int) \ceil($diff / $ps_length)),
7110 5
                        0,
7111
                        $diff,
7112
                        $encoding
7113
                    );
7114 5
                    $post = '';
7115
7116 5
                    break;
7117
7118
                case \STR_PAD_BOTH:
7119 3
                    $diff = ($pad_length - $str_length);
7120
7121 3
                    $ps_length_left = (int) \floor($diff / 2);
7122
7123 3
                    $ps_length_right = (int) \ceil($diff / 2);
7124
7125 3
                    $pre = (string) self::substr(
7126 3
                        \str_repeat($pad_string, $ps_length_left),
7127 3
                        0,
7128
                        $ps_length_left,
7129
                        $encoding
7130
                    );
7131 3
                    $post = (string) self::substr(
7132 3
                        \str_repeat($pad_string, $ps_length_right),
7133 3
                        0,
7134
                        $ps_length_right,
7135
                        $encoding
7136
                    );
7137
7138 3
                    break;
7139
7140
                case \STR_PAD_RIGHT:
7141
                default:
7142 6
                    $ps_length = (int) self::strlen($pad_string, $encoding);
7143
7144 6
                    $diff = ($pad_length - $str_length);
7145
7146 6
                    $post = (string) self::substr(
7147 6
                        \str_repeat($pad_string, (int) \ceil($diff / $ps_length)),
7148 6
                        0,
7149
                        $diff,
7150
                        $encoding
7151
                    );
7152 6
                    $pre = '';
7153
            }
7154
7155 14
            return $pre . $str . $post;
7156
        }
7157
7158 1
        return $str;
7159
    }
7160
7161
    /**
7162
     * Returns a new string of a given length such that both sides of the
7163
     * string are padded. Alias for "UTF8::str_pad()" with a $pad_type of 'both'.
7164
     *
7165
     * @param string $str
7166
     * @param int    $length   <p>Desired string length after padding.</p>
7167
     * @param string $pad_str  [optional] <p>String used to pad, defaults to space. Default: ' '</p>
7168
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
7169
     *
7170
     * @psalm-pure
7171
     *
7172
     * @return string
7173
     *                <p>The string with padding applied.</p>
7174
     */
7175 11
    public static function str_pad_both(
7176
        string $str,
7177
        int $length,
7178
        string $pad_str = ' ',
7179
        string $encoding = 'UTF-8'
7180
    ): string {
7181 11
        return self::str_pad(
7182 11
            $str,
7183 11
            $length,
7184 11
            $pad_str,
7185 11
            \STR_PAD_BOTH,
7186 11
            $encoding
7187
        );
7188
    }
7189
7190
    /**
7191
     * Returns a new string of a given length such that the beginning of the
7192
     * string is padded. Alias for "UTF8::str_pad()" with a $pad_type of 'left'.
7193
     *
7194
     * @param string $str
7195
     * @param int    $length   <p>Desired string length after padding.</p>
7196
     * @param string $pad_str  [optional] <p>String used to pad, defaults to space. Default: ' '</p>
7197
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
7198
     *
7199
     * @psalm-pure
7200
     *
7201
     * @return string
7202
     *                <p>The string with left padding.</p>
7203
     */
7204 7
    public static function str_pad_left(
7205
        string $str,
7206
        int $length,
7207
        string $pad_str = ' ',
7208
        string $encoding = 'UTF-8'
7209
    ): string {
7210 7
        return self::str_pad(
7211 7
            $str,
7212 7
            $length,
7213 7
            $pad_str,
7214 7
            \STR_PAD_LEFT,
7215 7
            $encoding
7216
        );
7217
    }
7218
7219
    /**
7220
     * Returns a new string of a given length such that the end of the string
7221
     * is padded. Alias for "UTF8::str_pad()" with a $pad_type of 'right'.
7222
     *
7223
     * @param string $str
7224
     * @param int    $length   <p>Desired string length after padding.</p>
7225
     * @param string $pad_str  [optional] <p>String used to pad, defaults to space. Default: ' '</p>
7226
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
7227
     *
7228
     * @psalm-pure
7229
     *
7230
     * @return string
7231
     *                <p>The string with right padding.</p>
7232
     */
7233 7
    public static function str_pad_right(
7234
        string $str,
7235
        int $length,
7236
        string $pad_str = ' ',
7237
        string $encoding = 'UTF-8'
7238
    ): string {
7239 7
        return self::str_pad(
7240 7
            $str,
7241 7
            $length,
7242 7
            $pad_str,
7243 7
            \STR_PAD_RIGHT,
7244 7
            $encoding
7245
        );
7246
    }
7247
7248
    /**
7249
     * Repeat a string.
7250
     *
7251
     * EXAMPLE: <code>UTF8::str_repeat("°~\xf0\x90\x28\xbc", 2); // '°~ð(¼°~ð(¼'</code>
7252
     *
7253
     * @param string $str        <p>
7254
     *                           The string to be repeated.
7255
     *                           </p>
7256
     * @param int    $multiplier <p>
7257
     *                           Number of time the input string should be
7258
     *                           repeated.
7259
     *                           </p>
7260
     *                           <p>
7261
     *                           multiplier has to be greater than or equal to 0.
7262
     *                           If the multiplier is set to 0, the function
7263
     *                           will return an empty string.
7264
     *                           </p>
7265
     *
7266
     * @psalm-pure
7267
     *
7268
     * @return string
7269
     *                <p>The repeated string.</p>
7270
     */
7271 9
    public static function str_repeat(string $str, int $multiplier): string
7272
    {
7273 9
        $str = self::filter($str);
7274
7275 9
        return \str_repeat($str, $multiplier);
7276
    }
7277
7278
    /**
7279
     * INFO: This is only a wrapper for "str_replace()"  -> the original functions is already UTF-8 safe.
7280
     *
7281
     * Replace all occurrences of the search string with the replacement string
7282
     *
7283
     * @see http://php.net/manual/en/function.str-replace.php
7284
     *
7285
     * @param string|string[] $search  <p>
7286
     *                                 The value being searched for, otherwise known as the needle.
7287
     *                                 An array may be used to designate multiple needles.
7288
     *                                 </p>
7289
     * @param string|string[] $replace <p>
7290
     *                                 The replacement value that replaces found search
7291
     *                                 values. An array may be used to designate multiple replacements.
7292
     *                                 </p>
7293
     * @param string|string[] $subject <p>
7294
     *                                 The string or array of strings being searched and replaced on,
7295
     *                                 otherwise known as the haystack.
7296
     *                                 </p>
7297
     *                                 <p>
7298
     *                                 If subject is an array, then the search and
7299
     *                                 replace is performed with every entry of
7300
     *                                 subject, and the return value is an array as
7301
     *                                 well.
7302
     *                                 </p>
7303
     * @param int|null        $count   [optional] <p>
7304
     *                                 If passed, this will hold the number of matched and replaced needles.
7305
     *                                 </p>
7306
     *
7307
     * @psalm-pure
7308
     *
7309
     * @return string|string[]
7310
     *                         <p>This function returns a string or an array with the replaced values.</p>
7311
     *
7312
     * @template TStrReplaceSubject
7313
     * @phpstan-param TStrReplaceSubject $subject
7314
     * @phpstan-return TStrReplaceSubject
7315
     *
7316
     * @deprecated please use \str_replace() instead
7317
     */
7318 12
    public static function str_replace(
7319
        $search,
7320
        $replace,
7321
        $subject,
7322
        int &$count = null
7323
    ) {
7324
        /**
7325
         * @psalm-suppress PossiblyNullArgument
7326
         * @phpstan-var TStrReplaceSubject $return;
7327
         */
7328 12
        $return = \str_replace(
7329 12
            $search,
7330 12
            $replace,
7331 12
            $subject,
7332 12
            $count
7333
        );
7334
7335 12
        return $return;
7336
    }
7337
7338
    /**
7339
     * Replaces $search from the beginning of string with $replacement.
7340
     *
7341
     * @param string $str         <p>The input string.</p>
7342
     * @param string $search      <p>The string to search for.</p>
7343
     * @param string $replacement <p>The replacement.</p>
7344
     *
7345
     * @psalm-pure
7346
     *
7347
     * @return string
7348
     *                <p>A string after the replacements.</p>
7349
     */
7350 17
    public static function str_replace_beginning(
7351
        string $str,
7352
        string $search,
7353
        string $replacement
7354
    ): string {
7355 17
        if ($str === '') {
7356 4
            if ($replacement === '') {
7357 2
                return '';
7358
            }
7359
7360 2
            if ($search === '') {
7361 2
                return $replacement;
7362
            }
7363
        }
7364
7365 13
        if ($search === '') {
7366 2
            return $str . $replacement;
7367
        }
7368
7369 11
        $searchLength = \strlen($search);
7370 11
        if (\strncmp($str, $search, $searchLength) === 0) {
7371 9
            return $replacement . \substr($str, $searchLength);
7372
        }
7373
7374 2
        return $str;
7375
    }
7376
7377
    /**
7378
     * Replaces $search from the ending of string with $replacement.
7379
     *
7380
     * @param string $str         <p>The input string.</p>
7381
     * @param string $search      <p>The string to search for.</p>
7382
     * @param string $replacement <p>The replacement.</p>
7383
     *
7384
     * @psalm-pure
7385
     *
7386
     * @return string
7387
     *                <p>A string after the replacements.</p>
7388
     */
7389 17
    public static function str_replace_ending(
7390
        string $str,
7391
        string $search,
7392
        string $replacement
7393
    ): string {
7394 17
        if ($str === '') {
7395 4
            if ($replacement === '') {
7396 2
                return '';
7397
            }
7398
7399 2
            if ($search === '') {
7400 2
                return $replacement;
7401
            }
7402
        }
7403
7404 13
        if ($search === '') {
7405 2
            return $str . $replacement;
7406
        }
7407
7408 11
        if (\strpos($str, $search, \strlen($str) - \strlen($search)) !== false) {
7409 8
            $str = \substr($str, 0, -\strlen($search)) . $replacement;
7410
        }
7411
7412 11
        return $str;
7413
    }
7414
7415
    /**
7416
     * Replace the first "$search"-term with the "$replace"-term.
7417
     *
7418
     * @param string $search
7419
     * @param string $replace
7420
     * @param string $subject
7421
     *
7422
     * @psalm-pure
7423
     *
7424
     * @return string
7425
     *
7426
     * @psalm-suppress InvalidReturnType
7427
     */
7428 2
    public static function str_replace_first(
7429
        string $search,
7430
        string $replace,
7431
        string $subject
7432
    ): string {
7433 2
        $pos = self::strpos($subject, $search);
7434
7435 2
        if ($pos !== false) {
7436
            /**
7437
             * @psalm-suppress InvalidReturnStatement
7438
             */
7439 2
            return self::substr_replace(
7440 2
                $subject,
7441
                $replace,
7442
                $pos,
7443 2
                (int) self::strlen($search)
7444
            );
7445
        }
7446
7447 2
        return $subject;
7448
    }
7449
7450
    /**
7451
     * Replace the last "$search"-term with the "$replace"-term.
7452
     *
7453
     * @param string $search
7454
     * @param string $replace
7455
     * @param string $subject
7456
     *
7457
     * @psalm-pure
7458
     *
7459
     * @return string
7460
     *
7461
     * @psalm-suppress InvalidReturnType
7462
     */
7463 2
    public static function str_replace_last(
7464
        string $search,
7465
        string $replace,
7466
        string $subject
7467
    ): string {
7468 2
        $pos = self::strrpos($subject, $search);
7469 2
        if ($pos !== false) {
7470
            /**
7471
             * @psalm-suppress InvalidReturnStatement
7472
             */
7473 2
            return self::substr_replace(
7474 2
                $subject,
7475
                $replace,
7476
                $pos,
7477 2
                (int) self::strlen($search)
7478
            );
7479
        }
7480
7481 2
        return $subject;
7482
    }
7483
7484
    /**
7485
     * Shuffles all the characters in the string.
7486
     *
7487
     * INFO: uses random algorithm which is weak for cryptography purposes
7488
     *
7489
     * EXAMPLE: <code>UTF8::str_shuffle('fòô bàř fòô'); // 'àòôřb ffòô '</code>
7490
     *
7491
     * @param string $str      <p>The input string</p>
7492
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
7493
     *
7494
     * @return string
7495
     *                <p>The shuffled string.</p>
7496
     */
7497 5
    public static function str_shuffle(string $str, string $encoding = 'UTF-8'): string
7498
    {
7499 5
        if ($encoding === 'UTF-8') {
7500 5
            $indexes = \range(0, (int) \mb_strlen($str) - 1);
7501 5
            \shuffle($indexes);
7502
7503
            // init
7504 5
            $shuffled_str = '';
7505
7506 5
            foreach ($indexes as &$i) {
7507 5
                $tmp_sub_str = \mb_substr($str, $i, 1);
7508 5
                if ($tmp_sub_str !== false) {
7509 5
                    $shuffled_str .= $tmp_sub_str;
7510
                }
7511
            }
7512
        } else {
7513
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
7514
7515
            $indexes = \range(0, (int) self::strlen($str, $encoding) - 1);
7516
            \shuffle($indexes);
7517
7518
            // init
7519
            $shuffled_str = '';
7520
7521
            foreach ($indexes as &$i) {
7522
                $tmp_sub_str = self::substr($str, $i, 1, $encoding);
7523
                if ($tmp_sub_str !== false) {
7524
                    $shuffled_str .= $tmp_sub_str;
7525
                }
7526
            }
7527
        }
7528
7529 5
        return $shuffled_str;
7530
    }
7531
7532
    /**
7533
     * Returns the substring beginning at $start, and up to, but not including
7534
     * the index specified by $end. If $end is omitted, the function extracts
7535
     * the remaining string. If $end is negative, it is computed from the end
7536
     * of the string.
7537
     *
7538
     * @param string   $str
7539
     * @param int      $start    <p>Initial index from which to begin extraction.</p>
7540
     * @param int|null $end      [optional] <p>Index at which to end extraction. Default: null</p>
7541
     * @param string   $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
7542
     *
7543
     * @psalm-pure
7544
     *
7545
     * @return false|string
7546
     *                      <p>The extracted substring.</p><p>If <i>str</i> is shorter than <i>start</i>
7547
     *                      characters long, <b>FALSE</b> will be returned.
7548
     */
7549 18
    public static function str_slice(
7550
        string $str,
7551
        int $start,
7552
        int $end = null,
7553
        string $encoding = 'UTF-8'
7554
    ) {
7555 18
        if ($encoding === 'UTF-8') {
7556 7
            if ($end === null) {
7557 1
                $length = (int) \mb_strlen($str);
7558 6
            } elseif ($end >= 0 && $end <= $start) {
7559 2
                return '';
7560 4
            } elseif ($end < 0) {
7561 1
                $length = (int) \mb_strlen($str) + $end - $start;
7562
            } else {
7563 3
                $length = $end - $start;
7564
            }
7565
7566 5
            return \mb_substr($str, $start, $length);
7567
        }
7568
7569 11
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
7570
7571 11
        if ($end === null) {
7572 5
            $length = (int) self::strlen($str, $encoding);
7573 6
        } elseif ($end >= 0 && $end <= $start) {
7574 2
            return '';
7575 4
        } elseif ($end < 0) {
7576 1
            $length = (int) self::strlen($str, $encoding) + $end - $start;
7577
        } else {
7578 3
            $length = $end - $start;
7579
        }
7580
7581 9
        return self::substr($str, $start, $length, $encoding);
7582
    }
7583
7584
    /**
7585
     * Convert a string to e.g.: "snake_case"
7586
     *
7587
     * @param string $str
7588
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
7589
     *
7590
     * @psalm-pure
7591
     *
7592
     * @return string
7593
     *                <p>A string in snake_case.</p>
7594
     */
7595 22
    public static function str_snakeize(string $str, string $encoding = 'UTF-8'): string
7596
    {
7597 22
        if ($str === '') {
7598
            return '';
7599
        }
7600
7601 22
        $str = \str_replace(
7602 22
            '-',
7603 22
            '_',
7604 22
            self::normalize_whitespace($str)
7605
        );
7606
7607 22
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
7608 19
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
7609
        }
7610
7611 22
        $str = (string) \preg_replace_callback(
7612 22
            '/([\\p{N}|\\p{Lu}])/u',
7613
            /**
7614
             * @param string[] $matches
7615
             *
7616
             * @psalm-pure
7617
             *
7618
             * @return string
7619
             */
7620 22
            static function (array $matches) use ($encoding): string {
7621 9
                $match = $matches[1];
7622 9
                $match_int = (int) $match;
7623
7624 9
                if ((string) $match_int === $match) {
7625 4
                    return '_' . $match . '_';
7626
                }
7627
7628 5
                if ($encoding === 'UTF-8') {
7629 5
                    return '_' . \mb_strtolower($match);
7630
                }
7631
7632
                return '_' . self::strtolower($match, $encoding);
7633 22
            },
7634 22
            $str
7635
        );
7636
7637 22
        $str = (string) \preg_replace(
7638
            [
7639 22
                '/\\s+/u',           // convert spaces to "_"
7640
                '/^\\s+|\\s+$/u', // trim leading & trailing spaces
7641
                '/_+/',                 // remove double "_"
7642
            ],
7643
            [
7644 22
                '_',
7645
                '',
7646
                '_',
7647
            ],
7648 22
            $str
7649
        );
7650
7651 22
        return \trim(\trim($str, '_')); // trim leading & trailing "_" + whitespace
7652
    }
7653
7654
    /**
7655
     * Sort all characters according to code points.
7656
     *
7657
     * EXAMPLE: <code>UTF8::str_sort('  -ABC-中文空白-  '); // '    ---ABC中文白空'</code>
7658
     *
7659
     * @param string $str    <p>A UTF-8 string.</p>
7660
     * @param bool   $unique <p>Sort unique. If <strong>true</strong>, repeated characters are ignored.</p>
7661
     * @param bool   $desc   <p>If <strong>true</strong>, will sort characters in reverse code point order.</p>
7662
     *
7663
     * @psalm-pure
7664
     *
7665
     * @return string
7666
     *                <p>A string of sorted characters.</p>
7667
     */
7668 2
    public static function str_sort(string $str, bool $unique = false, bool $desc = false): string
7669
    {
7670
        /** @var int[] $array */
7671 2
        $array = self::codepoints($str);
7672
7673 2
        if ($unique) {
7674 2
            $array = \array_flip(\array_flip($array));
7675
        }
7676
7677 2
        if ($desc) {
7678 2
            \arsort($array);
7679
        } else {
7680 2
            \asort($array);
7681
        }
7682
7683 2
        return self::string($array);
7684
    }
7685
7686
    /**
7687
     * Convert a string to an array of Unicode characters.
7688
     *
7689
     * EXAMPLE: <code>
7690
     * UTF8::str_split_array(['中文空白', 'test'], 2); // [['中文', '空白'], ['te', 'st']]
7691
     * </code>
7692
     *
7693
     * @param int[]|string[] $input                   <p>The string[] or int[] to split into array.</p>
7694
     * @param int            $length                  [optional] <p>Max character length of each array
7695
     *                                                lement.</p>
7696
     * @param bool           $clean_utf8              [optional] <p>Remove non UTF-8 chars from the
7697
     *                                                string.</p>
7698
     * @param bool           $try_to_use_mb_functions [optional] <p>Set to false, if you don't want to use
7699
     *                                                "mb_substr"</p>
7700
     *
7701
     * @psalm-pure
7702
     *
7703
     * @return string[][]
7704
     *                    <p>An array containing chunks of the input.</p>
7705
     */
7706 1
    public static function str_split_array(
7707
        array $input,
7708
        int $length = 1,
7709
        bool $clean_utf8 = false,
7710
        bool $try_to_use_mb_functions = true
7711
    ): array {
7712 1
        foreach ($input as &$v) {
7713 1
            $v = self::str_split(
7714 1
                $v,
7715
                $length,
7716
                $clean_utf8,
7717
                $try_to_use_mb_functions
7718
            );
7719
        }
7720
7721
        /** @var string[][] $input */
7722 1
        return $input;
7723
    }
7724
7725
    /**
7726
     * Convert a string to an array of unicode characters.
7727
     *
7728
     * EXAMPLE: <code>UTF8::str_split('中文空白'); // array('中', '文', '空', '白')</code>
7729
     *
7730
     * @param int|string $input                   <p>The string or int to split into array.</p>
7731
     * @param int        $length                  [optional] <p>Max character length of each array
7732
     *                                            element.</p>
7733
     * @param bool       $clean_utf8              [optional] <p>Remove non UTF-8 chars from the
7734
     *                                            string.</p>
7735
     * @param bool       $try_to_use_mb_functions [optional] <p>Set to false, if you don't want to use
7736
     *                                            "mb_substr"</p>
7737
     *
7738
     * @psalm-pure
7739
     *
7740
     * @return string[]
7741
     *                  <p>An array containing chunks of chars from the input.</p>
7742
     */
7743 90
    public static function str_split(
7744
        $input,
7745
        int $length = 1,
7746
        bool $clean_utf8 = false,
7747
        bool $try_to_use_mb_functions = true
7748
    ): array {
7749 90
        if ($length <= 0) {
7750 3
            return [];
7751
        }
7752
7753
        // this is only an old fallback
7754
        /** @noinspection PhpSillyAssignmentInspection - hack for phpstan */
7755
        /** @var int|int[]|string|string[] $input */
7756 89
        $input = $input;
7757 89
        if (\is_array($input)) {
7758
            /** @psalm-suppress InvalidReturnStatement */
7759
            /** @phpstan-ignore-next-line - old code :/ */
7760
            return self::str_split_array(
0 ignored issues
show
Bug Best Practice introduced by
The expression return self::str_split_a...ry_to_use_mb_functions) returns the type array<mixed,string[]> which is incompatible with the documented return type string[].
Loading history...
7761
                $input,
7762
                $length,
7763
                $clean_utf8,
7764
                $try_to_use_mb_functions
7765
            );
7766
        }
7767
7768
        // init
7769 89
        $input = (string) $input;
7770
7771 89
        if ($input === '') {
7772 14
            return [];
7773
        }
7774
7775 86
        if ($clean_utf8) {
7776 19
            $input = self::clean($input);
7777
        }
7778
7779
        if (
7780 86
            $try_to_use_mb_functions
7781
            &&
7782 86
            self::$SUPPORT['mbstring'] === true
7783
        ) {
7784 82
            if (\function_exists('mb_str_split')) {
7785
                /**
7786
                 * @psalm-suppress ImpureFunctionCall - why?
7787
                 */
7788 82
                $return = \mb_str_split($input, $length);
7789 82
                if ($return !== false) {
7790 82
                    return $return;
7791
                }
7792
            }
7793
7794
            $i_max = \mb_strlen($input);
7795
            if ($i_max <= 127) {
7796
                $ret = [];
7797
                for ($i = 0; $i < $i_max; ++$i) {
7798
                    $ret[] = \mb_substr($input, $i, 1);
7799
                }
7800
            } else {
7801
                $return_array = [];
7802
                \preg_match_all('/./us', $input, $return_array);
7803
                $ret = $return_array[0] ?? [];
7804
            }
7805 23
        } elseif (self::$SUPPORT['pcre_utf8'] === true) {
7806 17
            $return_array = [];
7807 17
            \preg_match_all('/./us', $input, $return_array);
7808 17
            $ret = $return_array[0] ?? [];
7809
        } else {
7810
7811
            // fallback
7812
7813 8
            $ret = [];
7814 8
            $len = \strlen($input);
7815
7816 8
            for ($i = 0; $i < $len; ++$i) {
7817 8
                if (($input[$i] & "\x80") === "\x00") {
7818 8
                    $ret[] = $input[$i];
7819
                } elseif (
7820 8
                    isset($input[$i + 1])
7821
                    &&
7822 8
                    ($input[$i] & "\xE0") === "\xC0"
7823
                ) {
7824 4
                    if (($input[$i + 1] & "\xC0") === "\x80") {
7825 4
                        $ret[] = $input[$i] . $input[$i + 1];
7826
7827 4
                        ++$i;
7828
                    }
7829
                } elseif (
7830 6
                    isset($input[$i + 2])
7831
                    &&
7832 6
                    ($input[$i] & "\xF0") === "\xE0"
7833
                ) {
7834
                    if (
7835 6
                        ($input[$i + 1] & "\xC0") === "\x80"
7836
                        &&
7837 6
                        ($input[$i + 2] & "\xC0") === "\x80"
7838
                    ) {
7839 6
                        $ret[] = $input[$i] . $input[$i + 1] . $input[$i + 2];
7840
7841 6
                        $i += 2;
7842
                    }
7843
                } elseif (
7844
                    isset($input[$i + 3])
7845
                    &&
7846
                    ($input[$i] & "\xF8") === "\xF0"
7847
                ) {
7848
                    if (
7849
                        ($input[$i + 1] & "\xC0") === "\x80"
7850
                        &&
7851
                        ($input[$i + 2] & "\xC0") === "\x80"
7852
                        &&
7853
                        ($input[$i + 3] & "\xC0") === "\x80"
7854
                    ) {
7855
                        $ret[] = $input[$i] . $input[$i + 1] . $input[$i + 2] . $input[$i + 3];
7856
7857
                        $i += 3;
7858
                    }
7859
                }
7860
            }
7861
        }
7862
7863 23
        if ($length > 1) {
7864 2
            return \array_map(
7865 2
                static function (array $item): string {
7866 2
                    return \implode('', $item);
7867 2
                },
7868 2
                \array_chunk($ret, $length)
7869
            );
7870
        }
7871
7872 23
        if (isset($ret[0]) && $ret[0] === '') {
7873
            return [];
7874
        }
7875
7876 23
        return $ret;
7877
    }
7878
7879
    /**
7880
     * Splits the string with the provided regular expression, returning an
7881
     * array of strings. An optional integer $limit will truncate the
7882
     * results.
7883
     *
7884
     * @param string $str
7885
     * @param string $pattern <p>The regex with which to split the string.</p>
7886
     * @param int    $limit   [optional] <p>Maximum number of results to return. Default: -1 === no limit</p>
7887
     *
7888
     * @psalm-pure
7889
     *
7890
     * @return string[]
7891
     *                  <p>An array of strings.</p>
7892
     */
7893 16
    public static function str_split_pattern(string $str, string $pattern, int $limit = -1): array
7894
    {
7895 16
        if ($limit === 0) {
7896 2
            return [];
7897
        }
7898
7899 14
        if ($pattern === '') {
7900 1
            return [$str];
7901
        }
7902
7903 13
        if (self::$SUPPORT['mbstring'] === true) {
7904 13
            if ($limit >= 0) {
7905 8
                $result_tmp = \mb_split($pattern, $str);
7906 8
                if ($result_tmp === false) {
7907
                    return [];
7908
                }
7909
7910 8
                $result = [];
7911 8
                foreach ($result_tmp as $item_tmp) {
7912 8
                    if ($limit === 0) {
7913 4
                        break;
7914
                    }
7915 8
                    --$limit;
7916
7917 8
                    $result[] = $item_tmp;
7918
                }
7919
7920 8
                return $result;
7921
            }
7922
7923 5
            $result = \mb_split($pattern, $str);
7924 5
            if ($result === false) {
7925
                return [];
7926
            }
7927
7928 5
            return $result;
7929
        }
7930
7931
        if ($limit > 0) {
7932
            ++$limit;
7933
        } else {
7934
            $limit = -1;
7935
        }
7936
7937
        $array = \preg_split('/' . \preg_quote($pattern, '/') . '/u', $str, $limit);
7938
        if ($array === false) {
7939
            return [];
7940
        }
7941
7942
        if ($limit > 0 && \count($array) === $limit) {
7943
            \array_pop($array);
7944
        }
7945
7946
        return $array;
7947
    }
7948
7949
    /**
7950
     * Check if the string starts with the given substring.
7951
     *
7952
     * EXAMPLE: <code>
7953
     * UTF8::str_starts_with('ΚόσμεMiddleEnd', 'Κόσμε'); // true
7954
     * UTF8::str_starts_with('ΚόσμεMiddleEnd', 'κόσμε'); // false
7955
     * </code>
7956
     *
7957
     * @param string $haystack <p>The string to search in.</p>
7958
     * @param string $needle   <p>The substring to search for.</p>
7959
     *
7960
     * @psalm-pure
7961
     *
7962
     * @return bool
7963
     */
7964 19
    public static function str_starts_with(string $haystack, string $needle): bool
7965
    {
7966 19
        if ($needle === '') {
7967 2
            return true;
7968
        }
7969
7970 19
        if ($haystack === '') {
7971
            return false;
7972
        }
7973
7974 19
        if (\PHP_VERSION_ID >= 80000) {
7975
            /** @phpstan-ignore-next-line - only for PHP8 */
7976
            return \str_starts_with($haystack, $needle);
7977
        }
7978
7979 19
        return \strncmp($haystack, $needle, \strlen($needle)) === 0;
7980
    }
7981
7982
    /**
7983
     * Returns true if the string begins with any of $substrings, false otherwise.
7984
     *
7985
     * - case-sensitive
7986
     *
7987
     * @param string $str        <p>The input string.</p>
7988
     * @param array  $substrings <p>Substrings to look for.</p>
7989
     *
7990
     * @psalm-pure
7991
     *
7992
     * @return bool
7993
     *              <p>Whether or not $str starts with $substring.</p>
7994
     */
7995 8
    public static function str_starts_with_any(string $str, array $substrings): bool
7996
    {
7997 8
        if ($str === '') {
7998
            return false;
7999
        }
8000
8001 8
        if ($substrings === []) {
8002
            return false;
8003
        }
8004
8005 8
        foreach ($substrings as &$substring) {
8006 8
            if (self::str_starts_with($str, $substring)) {
8007 2
                return true;
8008
            }
8009
        }
8010
8011 6
        return false;
8012
    }
8013
8014
    /**
8015
     * Gets the substring after the first occurrence of a separator.
8016
     *
8017
     * @param string $str       <p>The input string.</p>
8018
     * @param string $separator <p>The string separator.</p>
8019
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
8020
     *
8021
     * @psalm-pure
8022
     *
8023
     * @return string
8024
     */
8025 1
    public static function str_substr_after_first_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
8026
    {
8027 1
        if ($separator === '' || $str === '') {
8028 1
            return '';
8029
        }
8030
8031 1
        if ($encoding === 'UTF-8') {
8032 1
            $offset = \mb_strpos($str, $separator);
8033 1
            if ($offset === false) {
8034 1
                return '';
8035
            }
8036
8037 1
            return (string) \mb_substr(
8038 1
                $str,
8039 1
                $offset + (int) \mb_strlen($separator)
8040
            );
8041
        }
8042
8043
        $offset = self::strpos($str, $separator, 0, $encoding);
8044
        if ($offset === false) {
8045
            return '';
8046
        }
8047
8048
        return (string) \mb_substr(
8049
            $str,
8050
            $offset + (int) self::strlen($separator, $encoding),
8051
            null,
8052
            $encoding
8053
        );
8054
    }
8055
8056
    /**
8057
     * Gets the substring after the last occurrence of a separator.
8058
     *
8059
     * @param string $str       <p>The input string.</p>
8060
     * @param string $separator <p>The string separator.</p>
8061
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
8062
     *
8063
     * @psalm-pure
8064
     *
8065
     * @return string
8066
     */
8067 1
    public static function str_substr_after_last_separator(
8068
        string $str,
8069
        string $separator,
8070
        string $encoding = 'UTF-8'
8071
    ): string {
8072 1
        if ($separator === '' || $str === '') {
8073 1
            return '';
8074
        }
8075
8076 1
        if ($encoding === 'UTF-8') {
8077 1
            $offset = \mb_strrpos($str, $separator);
8078 1
            if ($offset === false) {
8079 1
                return '';
8080
            }
8081
8082 1
            return (string) \mb_substr(
8083 1
                $str,
8084 1
                $offset + (int) \mb_strlen($separator)
8085
            );
8086
        }
8087
8088
        $offset = self::strrpos($str, $separator, 0, $encoding);
8089
        if ($offset === false) {
8090
            return '';
8091
        }
8092
8093
        return (string) self::substr(
8094
            $str,
8095
            $offset + (int) self::strlen($separator, $encoding),
8096
            null,
8097
            $encoding
8098
        );
8099
    }
8100
8101
    /**
8102
     * Gets the substring before the first occurrence of a separator.
8103
     *
8104
     * @param string $str       <p>The input string.</p>
8105
     * @param string $separator <p>The string separator.</p>
8106
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
8107
     *
8108
     * @psalm-pure
8109
     *
8110
     * @return string
8111
     */
8112 1
    public static function str_substr_before_first_separator(
8113
        string $str,
8114
        string $separator,
8115
        string $encoding = 'UTF-8'
8116
    ): string {
8117 1
        if ($separator === '' || $str === '') {
8118 1
            return '';
8119
        }
8120
8121 1
        if ($encoding === 'UTF-8') {
8122 1
            $offset = \mb_strpos($str, $separator);
8123 1
            if ($offset === false) {
8124 1
                return '';
8125
            }
8126
8127 1
            return (string) \mb_substr(
8128 1
                $str,
8129 1
                0,
8130 1
                $offset
8131
            );
8132
        }
8133
8134
        $offset = self::strpos($str, $separator, 0, $encoding);
8135
        if ($offset === false) {
8136
            return '';
8137
        }
8138
8139
        return (string) self::substr(
8140
            $str,
8141
            0,
8142
            $offset,
8143
            $encoding
8144
        );
8145
    }
8146
8147
    /**
8148
     * Gets the substring before the last occurrence of a separator.
8149
     *
8150
     * @param string $str       <p>The input string.</p>
8151
     * @param string $separator <p>The string separator.</p>
8152
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
8153
     *
8154
     * @psalm-pure
8155
     *
8156
     * @return string
8157
     */
8158 1
    public static function str_substr_before_last_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
8159
    {
8160 1
        if ($separator === '' || $str === '') {
8161 1
            return '';
8162
        }
8163
8164 1
        if ($encoding === 'UTF-8') {
8165 1
            $offset = \mb_strrpos($str, $separator);
8166 1
            if ($offset === false) {
8167 1
                return '';
8168
            }
8169
8170 1
            return (string) \mb_substr(
8171 1
                $str,
8172 1
                0,
8173 1
                $offset
8174
            );
8175
        }
8176
8177
        $offset = self::strrpos($str, $separator, 0, $encoding);
8178
        if ($offset === false) {
8179
            return '';
8180
        }
8181
8182
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
8183
8184
        return (string) self::substr(
8185
            $str,
8186
            0,
8187
            $offset,
8188
            $encoding
8189
        );
8190
    }
8191
8192
    /**
8193
     * Gets the substring after (or before via "$before_needle") the first occurrence of the "$needle".
8194
     *
8195
     * @param string $str           <p>The input string.</p>
8196
     * @param string $needle        <p>The string to look for.</p>
8197
     * @param bool   $before_needle [optional] <p>Default: false</p>
8198
     * @param string $encoding      [optional] <p>Default: 'UTF-8'</p>
8199
     *
8200
     * @psalm-pure
8201
     *
8202
     * @return string
8203
     */
8204 2
    public static function str_substr_first(
8205
        string $str,
8206
        string $needle,
8207
        bool $before_needle = false,
8208
        string $encoding = 'UTF-8'
8209
    ): string {
8210 2
        if ($str === '' || $needle === '') {
8211 2
            return '';
8212
        }
8213
8214 2
        if ($encoding === 'UTF-8') {
8215 2
            if ($before_needle) {
8216 1
                $part = \mb_strstr(
8217 1
                    $str,
8218 1
                    $needle,
8219 1
                    $before_needle
8220
                );
8221
            } else {
8222 1
                $part = \mb_strstr(
8223 1
                    $str,
8224 2
                    $needle
8225
                );
8226
            }
8227
        } else {
8228
            $part = self::strstr(
8229
                $str,
8230
                $needle,
8231
                $before_needle,
8232
                $encoding
8233
            );
8234
        }
8235
8236 2
        return $part === false ? '' : $part;
8237
    }
8238
8239
    /**
8240
     * Gets the substring after (or before via "$before_needle") the last occurrence of the "$needle".
8241
     *
8242
     * @param string $str           <p>The input string.</p>
8243
     * @param string $needle        <p>The string to look for.</p>
8244
     * @param bool   $before_needle [optional] <p>Default: false</p>
8245
     * @param string $encoding      [optional] <p>Default: 'UTF-8'</p>
8246
     *
8247
     * @psalm-pure
8248
     *
8249
     * @return string
8250
     */
8251 2
    public static function str_substr_last(
8252
        string $str,
8253
        string $needle,
8254
        bool $before_needle = false,
8255
        string $encoding = 'UTF-8'
8256
    ): string {
8257 2
        if ($str === '' || $needle === '') {
8258 2
            return '';
8259
        }
8260
8261 2
        if ($encoding === 'UTF-8') {
8262 2
            if ($before_needle) {
8263 1
                $part = \mb_strrchr(
8264 1
                    $str,
8265 1
                    $needle,
8266 1
                    $before_needle
8267
                );
8268
            } else {
8269 1
                $part = \mb_strrchr(
8270 1
                    $str,
8271 2
                    $needle
8272
                );
8273
            }
8274
        } else {
8275
            $part = self::strrchr(
8276
                $str,
8277
                $needle,
8278
                $before_needle,
8279
                $encoding
8280
            );
8281
        }
8282
8283 2
        return $part === false ? '' : $part;
8284
    }
8285
8286
    /**
8287
     * Surrounds $str with the given substring.
8288
     *
8289
     * @param string $str
8290
     * @param string $substring <p>The substring to add to both sides.</p>
8291
     *
8292
     * @psalm-pure
8293
     *
8294
     * @return string
8295
     *                <p>A string with the substring both prepended and appended.</p>
8296
     */
8297 5
    public static function str_surround(string $str, string $substring): string
8298
    {
8299 5
        return $substring . $str . $substring;
8300
    }
8301
8302
    /**
8303
     * Returns a trimmed string with the first letter of each word capitalized.
8304
     * Also accepts an array, $ignore, allowing you to list words not to be
8305
     * capitalized.
8306
     *
8307
     * @param string              $str
8308
     * @param array|string[]|null $ignore                        [optional] <p>An array of words not to capitalize or
8309
     *                                                           null. Default: null</p>
8310
     * @param string              $encoding                      [optional] <p>Default: 'UTF-8'</p>
8311
     * @param bool                $clean_utf8                    [optional] <p>Remove non UTF-8 chars from the
8312
     *                                                           string.</p>
8313
     * @param string|null         $lang                          [optional] <p>Set the language for special cases: az,
8314
     *                                                           el, lt, tr</p>
8315
     * @param bool                $try_to_keep_the_string_length [optional] <p>true === try to keep the string length:
8316
     *                                                           e.g. ẞ -> ß</p>
8317
     * @param bool                $use_trim_first                [optional] <p>true === trim the input string,
8318
     *                                                           first</p>
8319
     * @param string|null         $word_define_chars             [optional] <p>An string of chars that will be used as
8320
     *                                                           whitespace separator === words.</p>
8321
     *
8322
     * @psalm-pure
8323
     *
8324
     * @return string
8325
     *                <p>The titleized string.</p>
8326
     */
8327 10
    public static function str_titleize(
8328
        string $str,
8329
        array $ignore = null,
8330
        string $encoding = 'UTF-8',
8331
        bool $clean_utf8 = false,
8332
        string $lang = null,
8333
        bool $try_to_keep_the_string_length = false,
8334
        bool $use_trim_first = true,
8335
        string $word_define_chars = null
8336
    ): string {
8337 10
        if ($str === '') {
8338
            return '';
8339
        }
8340
8341 10
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
8342 9
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
8343
        }
8344
8345 10
        if ($use_trim_first) {
8346 10
            $str = \trim($str);
8347
        }
8348
8349 10
        if ($clean_utf8) {
8350
            $str = self::clean($str);
8351
        }
8352
8353 10
        $use_mb_functions = $lang === null && !$try_to_keep_the_string_length;
8354
8355 10
        if ($word_define_chars) {
8356 4
            $word_define_chars = \preg_quote($word_define_chars, '/');
8357
        } else {
8358 6
            $word_define_chars = '';
8359
        }
8360
8361 10
        $str = (string) \preg_replace_callback(
8362 10
            '/([^\\s' . $word_define_chars . ']+)/u',
8363 10
            static function (array $match) use ($try_to_keep_the_string_length, $lang, $ignore, $use_mb_functions, $encoding): string {
8364 10
                if ($ignore !== null && \in_array($match[0], $ignore, true)) {
8365 4
                    return $match[0];
8366
                }
8367
8368 10
                if ($use_mb_functions) {
8369 10
                    if ($encoding === 'UTF-8') {
8370 10
                        return \mb_strtoupper(\mb_substr($match[0], 0, 1))
8371 10
                               . \mb_strtolower(\mb_substr($match[0], 1));
8372
                    }
8373
8374
                    return \mb_strtoupper(\mb_substr($match[0], 0, 1, $encoding), $encoding)
8375
                           . \mb_strtolower(\mb_substr($match[0], 1, null, $encoding), $encoding);
8376
                }
8377
8378
                return self::ucfirst(
8379
                    self::strtolower(
8380
                        $match[0],
8381
                        $encoding,
8382
                        false,
8383
                        $lang,
8384
                        $try_to_keep_the_string_length
8385
                    ),
8386
                    $encoding,
8387
                    false,
8388
                    $lang,
8389
                    $try_to_keep_the_string_length
8390
                );
8391 10
            },
8392 10
            $str
8393
        );
8394
8395 10
        return $str;
8396
    }
8397
8398
    /**
8399
     * Convert a string into a obfuscate string.
8400
     *
8401
     * EXAMPLE: <code>
8402
     *
8403
     * UTF8::str_obfuscate('[email protected]', 0.5, '*', ['@', '.']); // e.g. "l***@m**lleke*.*r*"
8404
     * </code>
8405
     *
8406
     * @param string   $str
8407
     * @param float    $percent
8408
     * @param string   $obfuscateChar
8409
     * @param string[] $keepChars
8410
     *
8411
     * @psalm-pure
8412
     *
8413
     * @return string
8414
     *                <p>The obfuscate string.</p>
8415
     */
8416 1
    public static function str_obfuscate(
8417
        string $str,
8418
        float $percent = 0.5,
8419
        string $obfuscateChar = '*',
8420
        array $keepChars = []
8421
    ): string {
8422 1
        $obfuscateCharHelper = "\u{2603}";
8423 1
        $str = \str_replace($obfuscateChar, $obfuscateCharHelper, $str);
8424
8425 1
        $chars = self::chars($str);
8426 1
        $charsMax = \count($chars);
8427 1
        $charsMaxChange = \round($charsMax * $percent);
8428 1
        $charsCounter = 0;
8429 1
        $charKeyDone = [];
8430
8431 1
        while ($charsCounter < $charsMaxChange) {
8432 1
            foreach ($chars as $charKey => $char) {
8433 1
                if (isset($charKeyDone[$charKey])) {
8434 1
                    continue;
8435
                }
8436
8437 1
                if (\random_int(0, 100) > 50) {
8438 1
                    continue;
8439
                }
8440
8441 1
                if ($char === $obfuscateChar) {
8442
                    continue;
8443
                }
8444
8445 1
                ++$charsCounter;
8446 1
                $charKeyDone[$charKey] = true;
8447
8448 1
                if ($charsCounter > $charsMaxChange) {
8449
                    break;
8450
                }
8451
8452 1
                if (\in_array($char, $keepChars, true)) {
8453 1
                    continue;
8454
                }
8455
8456 1
                $chars[$charKey] = $obfuscateChar;
8457
            }
8458
        }
8459
8460 1
        $str = \implode('', $chars);
8461
8462 1
        return \str_replace($obfuscateCharHelper, $obfuscateChar, $str);
8463
    }
8464
8465
    /**
8466
     * Returns a trimmed string in proper title case.
8467
     *
8468
     * Also accepts an array, $ignore, allowing you to list words not to be
8469
     * capitalized.
8470
     *
8471
     * Adapted from John Gruber's script.
8472
     *
8473
     * @see https://gist.github.com/gruber/9f9e8650d68b13ce4d78
8474
     *
8475
     * @param string $str
8476
     * @param array  $ignore   <p>An array of words not to capitalize.</p>
8477
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
8478
     *
8479
     * @psalm-pure
8480
     *
8481
     * @return string
8482
     *                <p>The titleized string.</p>
8483
     */
8484 35
    public static function str_titleize_for_humans(
8485
        string $str,
8486
        array $ignore = [],
8487
        string $encoding = 'UTF-8'
8488
    ): string {
8489 35
        if ($str === '') {
8490
            return '';
8491
        }
8492
8493
        $small_words = [
8494 35
            '(?<!q&)a',
8495
            'an',
8496
            'and',
8497
            'as',
8498
            'at(?!&t)',
8499
            'but',
8500
            'by',
8501
            'en',
8502
            'for',
8503
            'if',
8504
            'in',
8505
            'of',
8506
            'on',
8507
            'or',
8508
            'the',
8509
            'to',
8510
            'v[.]?',
8511
            'via',
8512
            'vs[.]?',
8513
        ];
8514
8515 35
        if ($ignore !== []) {
8516 1
            $small_words = \array_merge($small_words, $ignore);
8517
        }
8518
8519 35
        $small_words_rx = \implode('|', $small_words);
8520 35
        $apostrophe_rx = '(?x: [\'’] [[:lower:]]* )?';
8521
8522 35
        $str = \trim($str);
8523
8524 35
        if (!self::has_lowercase($str)) {
8525 2
            $str = self::strtolower($str, $encoding);
8526
        }
8527
8528
        // the main substitutions
8529 35
        $str = (string) \preg_replace_callback(
8530
            '~\\b (_*) (?:                                                           # 1. Leading underscore and
8531
                        ( (?<=[ ][/\\\\]) [[:alpha:]]+ [-_[:alpha:]/\\\\]+ |                # 2. file path or 
8532 35
                          [-_[:alpha:]]+ [@.:] [-_[:alpha:]@.:/]+ ' . $apostrophe_rx . ' )  #    URL, domain, or email
8533
                        |
8534 35
                        ( (?i: ' . $small_words_rx . ' ) ' . $apostrophe_rx . ' )           # 3. or small word (case-insensitive)
8535
                        |
8536 35
                        ( [[:alpha:]] [[:lower:]\'’()\[\]{}]* ' . $apostrophe_rx . ' )     # 4. or word w/o internal caps
8537
                        |
8538 35
                        ( [[:alpha:]] [[:alpha:]\'’()\[\]{}]* ' . $apostrophe_rx . ' )     # 5. or some other word
8539
                      ) (_*) \\b                                                          # 6. With trailing underscore
8540
                    ~ux',
8541
            /**
8542
             * @param string[] $matches
8543
             *
8544
             * @psalm-pure
8545
             *
8546
             * @return string
8547
             */
8548 35
            static function (array $matches) use ($encoding): string {
8549
                // preserve leading underscore
8550 35
                $str = $matches[1];
8551 35
                if ($matches[2]) {
8552
                    // preserve URLs, domains, emails and file paths
8553 5
                    $str .= $matches[2];
8554 35
                } elseif ($matches[3]) {
8555
                    // lower-case small words
8556 25
                    $str .= self::strtolower($matches[3], $encoding);
8557 35
                } elseif ($matches[4]) {
8558
                    // capitalize word w/o internal caps
8559 34
                    $str .= static::ucfirst($matches[4], $encoding);
8560
                } else {
8561
                    // preserve other kinds of word (iPhone)
8562 7
                    $str .= $matches[5];
8563
                }
8564
                // preserve trailing underscore
8565 35
                $str .= $matches[6];
8566
8567 35
                return $str;
8568 35
            },
8569 35
            $str
8570
        );
8571
8572
        // Exceptions for small words: capitalize at start of title...
8573 35
        $str = (string) \preg_replace_callback(
8574
            '~(  \\A [[:punct:]]*            # start of title...
8575
                      |  [:.;?!][ ]+                # or of subsentence...
8576
                      |  [ ][\'"“‘(\[][ ]* )        # or of inserted subphrase...
8577 35
                      ( ' . $small_words_rx . ' ) \\b # ...followed by small word
8578
                     ~uxi',
8579
            /**
8580
             * @param string[] $matches
8581
             *
8582
             * @psalm-pure
8583
             *
8584
             * @return string
8585
             */
8586 35
            static function (array $matches) use ($encoding): string {
8587 11
                return $matches[1] . static::ucfirst($matches[2], $encoding);
8588 35
            },
8589 35
            $str
8590
        );
8591
8592
        // ...and end of title
8593 35
        $str = (string) \preg_replace_callback(
8594 35
            '~\\b ( ' . $small_words_rx . ' ) # small word...
8595
                      (?= [[:punct:]]* \Z          # ...at the end of the title...
8596
                      |   [\'"’”)\]] [ ] )         # ...or of an inserted subphrase?
8597
                     ~uxi',
8598
            /**
8599
             * @param string[] $matches
8600
             *
8601
             * @psalm-pure
8602
             *
8603
             * @return string
8604
             */
8605 35
            static function (array $matches) use ($encoding): string {
8606 3
                return static::ucfirst($matches[1], $encoding);
8607 35
            },
8608 35
            $str
8609
        );
8610
8611
        // Exceptions for small words in hyphenated compound words.
8612
        // e.g. "in-flight" -> In-Flight
8613 35
        $str = (string) \preg_replace_callback(
8614
            '~\\b
8615
                        (?<! -)                   # Negative lookbehind for a hyphen; we do not want to match man-in-the-middle but do want (in-flight)
8616 35
                        ( ' . $small_words_rx . ' )
8617
                        (?= -[[:alpha:]]+)        # lookahead for "-someword"
8618
                       ~uxi',
8619
            /**
8620
             * @param string[] $matches
8621
             *
8622
             * @psalm-pure
8623
             *
8624
             * @return string
8625
             */
8626 35
            static function (array $matches) use ($encoding): string {
8627
                return static::ucfirst($matches[1], $encoding);
8628 35
            },
8629 35
            $str
8630
        );
8631
8632
        // e.g. "Stand-in" -> "Stand-In" (Stand is already capped at this point)
8633 35
        $str = (string) \preg_replace_callback(
8634
            '~\\b
8635
                      (?<!…)                    # Negative lookbehind for a hyphen; we do not want to match man-in-the-middle but do want (stand-in)
8636
                      ( [[:alpha:]]+- )         # $1 = first word and hyphen, should already be properly capped
8637 35
                      ( ' . $small_words_rx . ' ) # ...followed by small word
8638
                      (?!	- )                 # Negative lookahead for another -
8639
                     ~uxi',
8640
            /**
8641
             * @param string[] $matches
8642
             *
8643
             * @psalm-pure
8644
             *
8645
             * @return string
8646
             */
8647 35
            static function (array $matches) use ($encoding): string {
8648
                return $matches[1] . static::ucfirst($matches[2], $encoding);
8649 35
            },
8650 35
            $str
8651
        );
8652
8653 35
        return $str;
8654
    }
8655
8656
    /**
8657
     * Get a binary representation of a specific string.
8658
     *
8659
     * EXAPLE: <code>UTF8::str_to_binary('😃'); // '11110000100111111001100010000011'</code>
8660
     *
8661
     * @param string $str <p>The input string.</p>
8662
     *
8663
     * @psalm-pure
8664
     *
8665
     * @return false|string
8666
     *                      <p>false on error</p>
8667
     */
8668 2
    public static function str_to_binary(string $str)
8669
    {
8670
        /** @var array|false $value - needed for PhpStan (stubs error) */
8671 2
        $value = \unpack('H*', $str);
8672 2
        if ($value === false) {
8673
            return false;
8674
        }
8675
8676
        /** @noinspection OffsetOperationsInspection */
8677 2
        return \base_convert($value[1], 16, 2);
8678
    }
8679
8680
    /**
8681
     * @param string   $str
8682
     * @param bool     $remove_empty_values <p>Remove empty values.</p>
8683
     * @param int|null $remove_short_values <p>The min. string length or null to disable</p>
8684
     *
8685
     * @psalm-pure
8686
     *
8687
     * @return string[]
8688
     */
8689 17
    public static function str_to_lines(string $str, bool $remove_empty_values = false, int $remove_short_values = null): array
8690
    {
8691 17
        if ($str === '') {
8692 1
            return $remove_empty_values ? [] : [''];
8693
        }
8694
8695 16
        if (self::$SUPPORT['mbstring'] === true) {
8696 16
            $return = \mb_split("[\r\n]{1,2}", $str);
8697
        } else {
8698
            $return = \preg_split("/[\r\n]{1,2}/u", $str);
8699
        }
8700
8701 16
        if ($return === false) {
8702
            return $remove_empty_values ? [] : [''];
8703
        }
8704
8705
        if (
8706 16
            $remove_short_values === null
8707
            &&
8708 16
            !$remove_empty_values
8709
        ) {
8710 16
            return $return;
8711
        }
8712
8713
        return self::reduce_string_array(
8714
            $return,
8715
            $remove_empty_values,
8716
            $remove_short_values
8717
        );
8718
    }
8719
8720
    /**
8721
     * Convert a string into an array of words.
8722
     *
8723
     * EXAMPLE: <code>UTF8::str_to_words('中文空白 oöäü#s', '#') // array('', '中文空白', ' ', 'oöäü#s', '')</code>
8724
     *
8725
     * @param string   $str
8726
     * @param string   $char_list           <p>Additional chars for the definition of "words".</p>
8727
     * @param bool     $remove_empty_values <p>Remove empty values.</p>
8728
     * @param int|null $remove_short_values <p>The min. string length or null to disable</p>
8729
     *
8730
     * @psalm-pure
8731
     *
8732
     * @return string[]
8733
     */
8734 16
    public static function str_to_words(
8735
        string $str,
8736
        string $char_list = '',
8737
        bool $remove_empty_values = false,
8738
        int $remove_short_values = null
8739
    ): array {
8740 16
        if ($str === '') {
8741 4
            return $remove_empty_values ? [] : [''];
8742
        }
8743
8744 16
        $char_list = self::rxClass($char_list, '\pL');
8745
8746 16
        $return = \preg_split("/({$char_list}+(?:[\p{Pd}’']{$char_list}+)*)/u", $str, -1, \PREG_SPLIT_DELIM_CAPTURE);
8747 16
        if ($return === false) {
8748
            return $remove_empty_values ? [] : [''];
8749
        }
8750
8751
        if (
8752 16
            $remove_short_values === null
8753
            &&
8754 16
            !$remove_empty_values
8755
        ) {
8756 16
            return $return;
0 ignored issues
show
Bug Best Practice introduced by
The expression return $return returns an array which contains values of type array which are incompatible with the documented value type string.
Loading history...
8757
        }
8758
8759 2
        $tmp_return = self::reduce_string_array(
8760 2
            $return,
8761
            $remove_empty_values,
8762
            $remove_short_values
8763
        );
8764
8765 2
        foreach ($tmp_return as &$item) {
8766 2
            $item = (string) $item;
8767
        }
8768
8769 2
        return $tmp_return;
8770
    }
8771
8772
    /**
8773
     * Truncates the string to a given length. If $substring is provided, and
8774
     * truncating occurs, the string is further truncated so that the substring
8775
     * may be appended without exceeding the desired length.
8776
     *
8777
     * @param string $str
8778
     * @param int    $length    <p>Desired length of the truncated string.</p>
8779
     * @param string $substring [optional] <p>The substring to append if it can fit. Default: ''</p>
8780
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
8781
     *
8782
     * @psalm-pure
8783
     *
8784
     * @return string
8785
     *                <p>A string after truncating.</p>
8786
     */
8787 22
    public static function str_truncate(
8788
        string $str,
8789
        int $length,
8790
        string $substring = '',
8791
        string $encoding = 'UTF-8'
8792
    ): string {
8793 22
        if ($str === '') {
8794
            return '';
8795
        }
8796
8797 22
        if ($encoding === 'UTF-8') {
8798 10
            if ($length >= (int) \mb_strlen($str)) {
8799 2
                return $str;
8800
            }
8801
8802 8
            if ($substring !== '') {
8803 4
                $length -= (int) \mb_strlen($substring);
8804
8805
                /** @noinspection UnnecessaryCastingInspection */
8806 4
                return (string) \mb_substr($str, 0, $length) . $substring;
8807
            }
8808
8809 4
            return (string) \mb_substr($str, 0, $length);
8810
        }
8811
8812 12
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
8813
8814 12
        if ($length >= (int) self::strlen($str, $encoding)) {
8815 2
            return $str;
8816
        }
8817
8818 10
        if ($substring !== '') {
8819 6
            $length -= (int) self::strlen($substring, $encoding);
8820
        }
8821
8822
        return (
8823 10
               (string) self::substr(
8824 10
                   $str,
8825 10
                   0,
8826
                   $length,
8827
                   $encoding
8828
               )
8829 10
               ) . $substring;
8830
    }
8831
8832
    /**
8833
     * Truncates the string to a given length, while ensuring that it does not
8834
     * split words. If $substring is provided, and truncating occurs, the
8835
     * string is further truncated so that the substring may be appended without
8836
     * exceeding the desired length.
8837
     *
8838
     * @param string $str
8839
     * @param int    $length                                 <p>Desired length of the truncated string.</p>
8840
     * @param string $substring                              [optional] <p>The substring to append if it can fit.
8841
     *                                                       Default:
8842
     *                                                       ''</p>
8843
     * @param string $encoding                               [optional] <p>Default: 'UTF-8'</p>
8844
     * @param bool   $ignore_do_not_split_words_for_one_word [optional] <p>Default: false</p>
8845
     *
8846
     * @psalm-pure
8847
     *
8848
     * @return string
8849
     *                <p>A string after truncating.</p>
8850
     */
8851 47
    public static function str_truncate_safe(
8852
        string $str,
8853
        int $length,
8854
        string $substring = '',
8855
        string $encoding = 'UTF-8',
8856
        bool $ignore_do_not_split_words_for_one_word = false
8857
    ): string {
8858 47
        if ($str === '' || $length <= 0) {
8859 1
            return $substring;
8860
        }
8861
8862 47
        if ($encoding === 'UTF-8') {
8863 21
            if ($length >= (int) \mb_strlen($str)) {
8864 5
                return $str;
8865
            }
8866
8867
            // need to further trim the string so we can append the substring
8868 17
            $length -= (int) \mb_strlen($substring);
8869 17
            if ($length <= 0) {
8870 1
                return $substring;
8871
            }
8872
8873
            /** @var false|string $truncated - needed for PhpStan (stubs error) */
8874 17
            $truncated = \mb_substr($str, 0, $length);
8875 17
            if ($truncated === false) {
8876
                return '';
8877
            }
8878
8879
            // if the last word was truncated
8880 17
            $space_position = \mb_strpos($str, ' ', $length - 1);
8881 17
            if ($space_position !== $length) {
8882
                // find pos of the last occurrence of a space, get up to that
8883 13
                $last_position = \mb_strrpos($truncated, ' ', 0);
8884
8885
                if (
8886 13
                    $last_position !== false
8887
                    ||
8888
                    (
8889 3
                        $space_position !== false
8890
                        &&
8891 13
                        !$ignore_do_not_split_words_for_one_word
8892
                    )
8893
                ) {
8894 17
                    $truncated = (string) \mb_substr($truncated, 0, (int) $last_position);
8895
                }
8896
            }
8897
        } else {
8898 26
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
8899
8900 26
            if ($length >= (int) self::strlen($str, $encoding)) {
8901 4
                return $str;
8902
            }
8903
8904
            // need to further trim the string so we can append the substring
8905 22
            $length -= (int) self::strlen($substring, $encoding);
8906 22
            if ($length <= 0) {
8907
                return $substring;
8908
            }
8909
8910 22
            $truncated = self::substr($str, 0, $length, $encoding);
8911
8912 22
            if ($truncated === false) {
8913
                return '';
8914
            }
8915
8916
            // if the last word was truncated
8917 22
            $space_position = self::strpos($str, ' ', $length - 1, $encoding);
8918 22
            if ($space_position !== $length) {
8919
                // find pos of the last occurrence of a space, get up to that
8920 12
                $last_position = self::strrpos($truncated, ' ', 0, $encoding);
8921
8922
                if (
8923 12
                    $last_position !== false
8924
                    ||
8925
                    (
8926 4
                        $space_position !== false
8927
                        &&
8928 12
                        !$ignore_do_not_split_words_for_one_word
8929
                    )
8930
                ) {
8931 9
                    $truncated = (string) self::substr($truncated, 0, (int) $last_position, $encoding);
8932
                }
8933
            }
8934
        }
8935
8936 39
        return $truncated . $substring;
8937
    }
8938
8939
    /**
8940
     * Returns a lowercase and trimmed string separated by underscores.
8941
     * Underscores are inserted before uppercase characters (with the exception
8942
     * of the first character of the string), and in place of spaces as well as
8943
     * dashes.
8944
     *
8945
     * @param string $str
8946
     *
8947
     * @psalm-pure
8948
     *
8949
     * @return string
8950
     *                <p>The underscored string.</p>
8951
     */
8952 16
    public static function str_underscored(string $str): string
8953
    {
8954 16
        return self::str_delimit($str, '_');
8955
    }
8956
8957
    /**
8958
     * Returns an UpperCamelCase version of the supplied string. It trims
8959
     * surrounding spaces, capitalizes letters following digits, spaces, dashes
8960
     * and underscores, and removes spaces, dashes, underscores.
8961
     *
8962
     * @param string      $str                           <p>The input string.</p>
8963
     * @param string      $encoding                      [optional] <p>Default: 'UTF-8'</p>
8964
     * @param bool        $clean_utf8                    [optional] <p>Remove non UTF-8 chars from the string.</p>
8965
     * @param string|null $lang                          [optional] <p>Set the language for special cases: az, el, lt,
8966
     *                                                   tr</p>
8967
     * @param bool        $try_to_keep_the_string_length [optional] <p>true === try to keep the string length: e.g. ẞ
8968
     *                                                   -> ß</p>
8969
     *
8970
     * @psalm-pure
8971
     *
8972
     * @return string
8973
     *                <p>A string in UpperCamelCase.</p>
8974
     */
8975 13
    public static function str_upper_camelize(
8976
        string $str,
8977
        string $encoding = 'UTF-8',
8978
        bool $clean_utf8 = false,
8979
        string $lang = null,
8980
        bool $try_to_keep_the_string_length = false
8981
    ): string {
8982 13
        return self::ucfirst(self::str_camelize($str, $encoding), $encoding, $clean_utf8, $lang, $try_to_keep_the_string_length);
8983
    }
8984
8985
    /**
8986
     * Get the number of words in a specific string.
8987
     *
8988
     * EXAMPLES: <code>
8989
     * // format: 0 -> return only word count (int)
8990
     * //
8991
     * UTF8::str_word_count('中文空白 öäü abc#c'); // 4
8992
     * UTF8::str_word_count('中文空白 öäü abc#c', 0, '#'); // 3
8993
     *
8994
     * // format: 1 -> return words (array)
8995
     * //
8996
     * UTF8::str_word_count('中文空白 öäü abc#c', 1); // array('中文空白', 'öäü', 'abc', 'c')
8997
     * UTF8::str_word_count('中文空白 öäü abc#c', 1, '#'); // array('中文空白', 'öäü', 'abc#c')
8998
     *
8999
     * // format: 2 -> return words with offset (array)
9000
     * //
9001
     * UTF8::str_word_count('中文空白 öäü ab#c', 2); // array(0 => '中文空白', 5 => 'öäü', 9 => 'abc', 13 => 'c')
9002
     * UTF8::str_word_count('中文空白 öäü ab#c', 2, '#'); // array(0 => '中文空白', 5 => 'öäü', 9 => 'abc#c')
9003
     * </code>
9004
     *
9005
     * @param string $str       <p>The input string.</p>
9006
     * @param int    $format    [optional] <p>
9007
     *                          <strong>0</strong> => return a number of words (default)<br>
9008
     *                          <strong>1</strong> => return an array of words<br>
9009
     *                          <strong>2</strong> => return an array of words with word-offset as key
9010
     *                          </p>
9011
     * @param string $char_list [optional] <p>Additional chars that contains to words and do not start a new word.</p>
9012
     *
9013
     * @psalm-pure
9014
     *
9015
     * @return int|string[]
9016
     *                      <p>The number of words in the string.</p>
9017
     */
9018 2
    public static function str_word_count(string $str, int $format = 0, string $char_list = '')
9019
    {
9020 2
        $str_parts = self::str_to_words($str, $char_list);
9021
9022 2
        $len = \count($str_parts);
9023
9024 2
        if ($format === 1) {
9025 2
            $number_of_words = [];
9026 2
            for ($i = 1; $i < $len; $i += 2) {
9027 2
                $number_of_words[] = $str_parts[$i];
9028
            }
9029 2
        } elseif ($format === 2) {
9030 2
            $number_of_words = [];
9031 2
            $offset = (int) self::strlen($str_parts[0]);
9032 2
            for ($i = 1; $i < $len; $i += 2) {
9033 2
                $number_of_words[$offset] = $str_parts[$i];
9034 2
                $offset += (int) self::strlen($str_parts[$i]) + (int) self::strlen($str_parts[$i + 1]);
9035
            }
9036
        } else {
9037 2
            $number_of_words = (int) (($len - 1) / 2);
9038
        }
9039
9040 2
        return $number_of_words;
9041
    }
9042
9043
    /**
9044
     * Case-insensitive string comparison.
9045
     *
9046
     * INFO: Case-insensitive version of UTF8::strcmp()
9047
     *
9048
     * EXAMPLE: <code>UTF8::strcasecmp("iñtërnâtiôn\nàlizætiøn", "Iñtërnâtiôn\nàlizætiøn"); // 0</code>
9049
     *
9050
     * @param string $str1     <p>The first string.</p>
9051
     * @param string $str2     <p>The second string.</p>
9052
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
9053
     *
9054
     * @psalm-pure
9055
     *
9056
     * @return int
9057
     *             <strong>&lt; 0</strong> if str1 is less than str2;<br>
9058
     *             <strong>&gt; 0</strong> if str1 is greater than str2,<br>
9059
     *             <strong>0</strong> if they are equal
9060
     */
9061 23
    public static function strcasecmp(
9062
        string $str1,
9063
        string $str2,
9064
        string $encoding = 'UTF-8'
9065
    ): int {
9066 23
        return self::strcmp(
9067 23
            self::strtocasefold(
9068 23
                $str1,
9069 23
                true,
9070 23
                false,
9071
                $encoding,
9072 23
                null,
9073 23
                false
9074
            ),
9075 23
            self::strtocasefold(
9076 23
                $str2,
9077 23
                true,
9078 23
                false,
9079
                $encoding,
9080 23
                null,
9081 23
                false
9082
            )
9083
        );
9084
    }
9085
9086
    /**
9087
     * Case-sensitive string comparison.
9088
     *
9089
     * EXAMPLE: <code>UTF8::strcmp("iñtërnâtiôn\nàlizætiøn", "iñtërnâtiôn\nàlizætiøn"); // 0</code>
9090
     *
9091
     * @param string $str1 <p>The first string.</p>
9092
     * @param string $str2 <p>The second string.</p>
9093
     *
9094
     * @psalm-pure
9095
     *
9096
     * @return int
9097
     *             <strong>&lt; 0</strong> if str1 is less than str2<br>
9098
     *             <strong>&gt; 0</strong> if str1 is greater than str2<br>
9099
     *             <strong>0</strong> if they are equal
9100
     */
9101 29
    public static function strcmp(string $str1, string $str2): int
9102
    {
9103 29
        if ($str1 === $str2) {
9104 21
            return 0;
9105
        }
9106
9107 24
        return \strcmp(
9108
            /** @phpstan-ignore-next-line - we use only NFD */
9109 24
            \Normalizer::normalize($str1, \Normalizer::NFD),
9110
            /** @phpstan-ignore-next-line - we use only NFD */
9111 24
            \Normalizer::normalize($str2, \Normalizer::NFD)
9112
        );
9113
    }
9114
9115
    /**
9116
     * Find length of initial segment not matching mask.
9117
     *
9118
     * @param string   $str
9119
     * @param string   $char_list
9120
     * @param int      $offset
9121
     * @param int|null $length
9122
     * @param string   $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
9123
     *
9124
     * @psalm-pure
9125
     *
9126
     * @return int
9127
     */
9128 12
    public static function strcspn(
9129
        string $str,
9130
        string $char_list,
9131
        int $offset = 0,
9132
        int $length = null,
9133
        string $encoding = 'UTF-8'
9134
    ): int {
9135 12
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
9136
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
9137
        }
9138
9139 12
        if ($char_list === '') {
9140 2
            return (int) self::strlen($str, $encoding);
9141
        }
9142
9143 11
        if ($offset || $length !== null) {
9144 3
            if ($encoding === 'UTF-8') {
9145 3
                if ($length === null) {
9146 2
                    $str_tmp = \mb_substr($str, $offset);
9147
                } else {
9148 3
                    $str_tmp = \mb_substr($str, $offset, $length);
9149
                }
9150
            } else {
9151
                $str_tmp = self::substr($str, $offset, $length, $encoding);
9152
            }
9153
9154 3
            if ($str_tmp === false) {
9155
                return 0;
9156
            }
9157
9158 3
            $str = $str_tmp;
9159
        }
9160
9161 11
        if ($str === '') {
9162 2
            return 0;
9163
        }
9164
9165 10
        $matches = [];
9166 10
        if (\preg_match('/^(.*?)' . self::rxClass($char_list) . '/us', $str, $matches)) {
9167 9
            $return = self::strlen($matches[1], $encoding);
9168 9
            if ($return === false) {
9169
                return 0;
9170
            }
9171
9172 9
            return $return;
9173
        }
9174
9175 2
        return (int) self::strlen($str, $encoding);
9176
    }
9177
9178
    /**
9179
     * Create a UTF-8 string from code points.
9180
     *
9181
     * INFO: opposite to UTF8::codepoints()
9182
     *
9183
     * EXAMPLE: <code>UTF8::string(array(246, 228, 252)); // 'öäü'</code>
9184
     *
9185
     * @param int|int[]|string|string[] $intOrHex <p>Integer or Hexadecimal codepoints.</p>
9186
     *
9187
     * @phpstan-param int[]|numeric-string[]|int|numeric-string $intOrHex
9188
     *
9189
     * @psalm-pure
9190
     *
9191
     * @return string
9192
     *                <p>A UTF-8 encoded string.</p>
9193
     */
9194 4
    public static function string($intOrHex): string
9195
    {
9196 4
        if ($intOrHex === []) {
9197 4
            return '';
9198
        }
9199
9200 4
        if (!\is_array($intOrHex)) {
9201 1
            $intOrHex = [$intOrHex];
9202
        }
9203
9204 4
        $str = '';
9205 4
        foreach ($intOrHex as $strPart) {
9206 4
            $str .= '&#' . (int) $strPart . ';';
9207
        }
9208
9209
        // We cannot use html_entity_decode() here, as it will not return
9210
        // characters for many values < 160.
9211 4
        return mb_convert_encoding($str, 'UTF-8', 'HTML-ENTITIES');
0 ignored issues
show
Bug Best Practice introduced by
The expression return mb_convert_encodi...TF-8', 'HTML-ENTITIES') could return the type array which is incompatible with the type-hinted return string. Consider adding an additional type-check to rule them out.
Loading history...
9212
    }
9213
9214
    /**
9215
     * Checks if string starts with "BOM" (Byte Order Mark Character) character.
9216
     *
9217
     * EXAMPLE: <code>UTF8::string_has_bom("\xef\xbb\xbf foobar"); // true</code>
9218
     *
9219
     * @param string $str <p>The input string.</p>
9220
     *
9221
     * @psalm-pure
9222
     *
9223
     * @return bool
9224
     *              <p>
9225
     *              <strong>true</strong> if the string has BOM at the start,<br>
9226
     *              <strong>false</strong> otherwise
9227
     *              </p>
9228
     */
9229 43
    public static function string_has_bom(string $str): bool
9230
    {
9231 43
        foreach (self::$BOM as $bom_string => &$bom_byte_length) {
9232 43
            if (\strncmp($str, $bom_string, $bom_byte_length) === 0) {
9233 11
                return true;
9234
            }
9235
        }
9236
9237 43
        return false;
9238
    }
9239
9240
    /**
9241
     * Strip HTML and PHP tags from a string + clean invalid UTF-8.
9242
     *
9243
     * EXAMPLE: <code>UTF8::strip_tags("<span>κόσμε\xa0\xa1</span>"); // 'κόσμε'</code>
9244
     *
9245
     * @see http://php.net/manual/en/function.strip-tags.php
9246
     *
9247
     * @param string      $str            <p>
9248
     *                                    The input string.
9249
     *                                    </p>
9250
     * @param string|null $allowable_tags [optional] <p>
9251
     *                                    You can use the optional second parameter to specify tags which should
9252
     *                                    not be stripped.
9253
     *                                    </p>
9254
     *                                    <p>
9255
     *                                    HTML comments and PHP tags are also stripped. This is hardcoded and
9256
     *                                    can not be changed with allowable_tags.
9257
     *                                    </p>
9258
     * @param bool        $clean_utf8     [optional] <p>Remove non UTF-8 chars from the string.</p>
9259
     *
9260
     * @psalm-pure
9261
     *
9262
     * @return string
9263
     *                <p>The stripped string.</p>
9264
     */
9265 4
    public static function strip_tags(
9266
        string $str,
9267
        string $allowable_tags = null,
9268
        bool $clean_utf8 = false
9269
    ): string {
9270 4
        if ($str === '') {
9271 1
            return '';
9272
        }
9273
9274 4
        if ($clean_utf8) {
9275 2
            $str = self::clean($str);
9276
        }
9277
9278 4
        if ($allowable_tags === null) {
9279 4
            return \strip_tags($str);
9280
        }
9281
9282 2
        return \strip_tags($str, $allowable_tags);
9283
    }
9284
9285
    /**
9286
     * Strip all whitespace characters. This includes tabs and newline
9287
     * characters, as well as multibyte whitespace such as the thin space
9288
     * and ideographic space.
9289
     *
9290
     * EXAMPLE: <code>UTF8::strip_whitespace('   Ο     συγγραφέας  '); // 'Οσυγγραφέας'</code>
9291
     *
9292
     * @param string $str
9293
     *
9294
     * @psalm-pure
9295
     *
9296
     * @return string
9297
     */
9298 36
    public static function strip_whitespace(string $str): string
9299
    {
9300 36
        if ($str === '') {
9301 3
            return '';
9302
        }
9303
9304 33
        return (string) \preg_replace('/[[:space:]]+/u', '', $str);
9305
    }
9306
9307
    /**
9308
     * Find the position of the first occurrence of a substring in a string, case-insensitive.
9309
     *
9310
     * INFO: use UTF8::stripos_in_byte() for the byte-length
9311
     *
9312
     * EXAMPLE: <code>UTF8::stripos('aσσb', 'ΣΣ'); // 1</code> (σσ == ΣΣ)
9313
     *
9314
     * @see http://php.net/manual/en/function.mb-stripos.php
9315
     *
9316
     * @param string $haystack   <p>The string from which to get the position of the first occurrence of needle.</p>
9317
     * @param string $needle     <p>The string to find in haystack.</p>
9318
     * @param int    $offset     [optional] <p>The position in haystack to start searching.</p>
9319
     * @param string $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
9320
     * @param bool   $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
9321
     *
9322
     * @psalm-pure
9323
     *
9324
     * @return false|int
9325
     *                   Return the <strong>(int)</strong> numeric position of the first occurrence of needle in the
9326
     *                   haystack string,<br> or <strong>false</strong> if needle is not found
9327
     */
9328 25
    public static function stripos(
9329
        string $haystack,
9330
        string $needle,
9331
        int $offset = 0,
9332
        string $encoding = 'UTF-8',
9333
        bool $clean_utf8 = false
9334
    ) {
9335 25
        if ($haystack === '') {
9336 5
            if (\PHP_VERSION_ID >= 80000 && $needle === '') {
9337
                return 0;
9338
            }
9339
9340 5
            return false;
9341
        }
9342
9343 24
        if ($needle === '' && \PHP_VERSION_ID < 80000) {
9344 2
            return false;
9345
        }
9346
9347 24
        if ($clean_utf8) {
9348
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
9349
            // if invalid characters are found in $haystack before $needle
9350 1
            $haystack = self::clean($haystack);
9351 1
            $needle = self::clean($needle);
9352
        }
9353
9354 24
        if (self::$SUPPORT['mbstring'] === true) {
9355 24
            if ($encoding === 'UTF-8') {
9356 24
                return \mb_stripos($haystack, $needle, $offset);
9357
            }
9358
9359 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
9360
9361 2
            return \mb_stripos($haystack, $needle, $offset, $encoding);
9362
        }
9363
9364 2
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
9365
9366
        if (
9367 2
            $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
9368
            &&
9369 2
            $offset >= 0 // grapheme_stripos() can't handle negative offset
9370
            &&
9371 2
            self::$SUPPORT['intl'] === true
9372
        ) {
9373
            $return_tmp = \grapheme_stripos($haystack, $needle, $offset);
9374
            if ($return_tmp !== false) {
9375
                return $return_tmp;
9376
            }
9377
        }
9378
9379
        //
9380
        // fallback for ascii only
9381
        //
9382
9383 2
        if (ASCII::is_ascii($haystack . $needle)) {
9384 2
            return \stripos($haystack, $needle, $offset);
9385
        }
9386
9387
        //
9388
        // fallback via vanilla php
9389
        //
9390
9391 2
        $haystack = self::strtocasefold($haystack, true, false, $encoding, null, false);
9392 2
        $needle = self::strtocasefold($needle, true, false, $encoding, null, false);
9393
9394 2
        return self::strpos($haystack, $needle, $offset, $encoding);
9395
    }
9396
9397
    /**
9398
     * Returns all of haystack starting from and including the first occurrence of needle to the end.
9399
     *
9400
     * EXAMPLE: <code>
9401
     * $str = 'iñtërnâtiônàlizætiøn';
9402
     * $search = 'NÂT';
9403
     *
9404
     * UTF8::stristr($str, $search)); // 'nâtiônàlizætiøn'
9405
     * UTF8::stristr($str, $search, true)); // 'iñtër'
9406
     * </code>
9407
     *
9408
     * @param string $haystack      <p>The input string. Must be valid UTF-8.</p>
9409
     * @param string $needle        <p>The string to look for. Must be valid UTF-8.</p>
9410
     * @param bool   $before_needle [optional] <p>
9411
     *                              If <b>TRUE</b>, it returns the part of the
9412
     *                              haystack before the first occurrence of the needle (excluding the needle).
9413
     *                              </p>
9414
     * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
9415
     * @param bool   $clean_utf8    [optional] <p>Remove non UTF-8 chars from the string.</p>
9416
     *
9417
     * @psalm-pure
9418
     *
9419
     * @return false|string
9420
     *                      <p>A sub-string,<br>or <strong>false</strong> if needle is not found.</p>
9421
     */
9422 13
    public static function stristr(
9423
        string $haystack,
9424
        string $needle,
9425
        bool $before_needle = false,
9426
        string $encoding = 'UTF-8',
9427
        bool $clean_utf8 = false
9428
    ) {
9429 13
        if ($haystack === '') {
9430 3
            if (\PHP_VERSION_ID >= 80000 && $needle === '') {
9431
                return '';
9432
            }
9433
9434 3
            return false;
9435
        }
9436
9437 11
        if ($clean_utf8) {
9438
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
9439
            // if invalid characters are found in $haystack before $needle
9440 1
            $needle = self::clean($needle);
9441 1
            $haystack = self::clean($haystack);
9442
        }
9443
9444 11
        if ($needle === '') {
9445 2
            if (\PHP_VERSION_ID >= 80000) {
9446
                return $haystack;
9447
            }
9448
9449 2
            return false;
9450
        }
9451
9452 10
        if (self::$SUPPORT['mbstring'] === true) {
9453 10
            if ($encoding === 'UTF-8') {
9454 10
                return \mb_stristr($haystack, $needle, $before_needle);
9455
            }
9456
9457 1
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
9458
9459 1
            return \mb_stristr($haystack, $needle, $before_needle, $encoding);
9460
        }
9461
9462
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
9463
9464
        if (
9465
            $encoding !== 'UTF-8'
9466
            &&
9467
            self::$SUPPORT['mbstring'] === false
9468
        ) {
9469
            /**
9470
             * @psalm-suppress ImpureFunctionCall - is is only a warning
9471
             */
9472
            \trigger_error('UTF8::stristr() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
9473
        }
9474
9475
        if (
9476
            $encoding === 'UTF-8' // INFO: "grapheme_stristr()" can't handle other encodings
9477
            &&
9478
            self::$SUPPORT['intl'] === true
9479
        ) {
9480
            $return_tmp = \grapheme_stristr($haystack, $needle, $before_needle);
9481
            if ($return_tmp !== false) {
9482
                return $return_tmp;
9483
            }
9484
        }
9485
9486
        if (ASCII::is_ascii($needle . $haystack)) {
9487
            return \stristr($haystack, $needle, $before_needle);
9488
        }
9489
9490
        \preg_match('/^(.*?)' . \preg_quote($needle, '/') . '/usi', $haystack, $match);
9491
9492
        if (!isset($match[1])) {
9493
            return false;
9494
        }
9495
9496
        if ($before_needle) {
9497
            return $match[1];
9498
        }
9499
9500
        return self::substr($haystack, (int) self::strlen($match[1], $encoding), null, $encoding);
9501
    }
9502
9503
    /**
9504
     * Get the string length, not the byte-length!
9505
     *
9506
     * INFO: use UTF8::strwidth() for the char-length
9507
     *
9508
     * EXAMPLE: <code>UTF8::strlen("Iñtërnâtiôn\xE9àlizætiøn")); // 20</code>
9509
     *
9510
     * @see http://php.net/manual/en/function.mb-strlen.php
9511
     *
9512
     * @param string $str        <p>The string being checked for length.</p>
9513
     * @param string $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
9514
     * @param bool   $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
9515
     *
9516
     * @psalm-pure
9517
     *
9518
     * @return false|int
9519
     *                   <p>
9520
     *                   The number <strong>(int)</strong> of characters in the string $str having character encoding
9521
     *                   $encoding.
9522
     *                   (One multi-byte character counted as +1).
9523
     *                   <br>
9524
     *                   Can return <strong>false</strong>, if e.g. mbstring is not installed and we process invalid
9525
     *                   chars.
9526
     *                   </p>
9527
     */
9528 174
    public static function strlen(
9529
        string $str,
9530
        string $encoding = 'UTF-8',
9531
        bool $clean_utf8 = false
9532
    ) {
9533 174
        if ($str === '') {
9534 21
            return 0;
9535
        }
9536
9537 172
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
9538 12
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
9539
        }
9540
9541 172
        if ($clean_utf8) {
9542
            // "mb_strlen" and "\iconv_strlen" returns wrong length,
9543
            // if invalid characters are found in $str
9544 5
            $str = self::clean($str);
9545
        }
9546
9547
        //
9548
        // fallback via mbstring
9549
        //
9550
9551 172
        if (self::$SUPPORT['mbstring'] === true) {
9552 166
            if ($encoding === 'UTF-8') {
9553
                /** @noinspection PhpUsageOfSilenceOperatorInspection - ignore warnings, it's working anyway */
9554 166
                return @\mb_strlen($str);
9555
            }
9556
9557
            /** @noinspection PhpUsageOfSilenceOperatorInspection - ignore warnings, it's working anyway */
9558 4
            return @\mb_strlen($str, $encoding);
9559
        }
9560
9561
        //
9562
        // fallback for binary || ascii only
9563
        //
9564
9565
        if (
9566 8
            $encoding === 'CP850'
9567
            ||
9568 8
            $encoding === 'ASCII'
9569
        ) {
9570
            return \strlen($str);
9571
        }
9572
9573
        if (
9574 8
            $encoding !== 'UTF-8'
9575
            &&
9576 8
            self::$SUPPORT['mbstring'] === false
9577
            &&
9578 8
            self::$SUPPORT['iconv'] === false
9579
        ) {
9580
            /**
9581
             * @psalm-suppress ImpureFunctionCall - is is only a warning
9582
             */
9583 2
            \trigger_error('UTF8::strlen() without mbstring / iconv cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
9584
        }
9585
9586
        //
9587
        // fallback via iconv
9588
        //
9589
9590 8
        if (self::$SUPPORT['iconv'] === true) {
9591
            $return_tmp = \iconv_strlen($str, $encoding);
9592
            if ($return_tmp !== false) {
9593
                return $return_tmp;
9594
            }
9595
        }
9596
9597
        //
9598
        // fallback via intl
9599
        //
9600
9601
        if (
9602 8
            $encoding === 'UTF-8' // INFO: "grapheme_strlen()" can't handle other encodings
9603
            &&
9604 8
            self::$SUPPORT['intl'] === true
9605
        ) {
9606
            $return_tmp = \grapheme_strlen($str);
9607
            if ($return_tmp !== null) {
9608
                return $return_tmp;
9609
            }
9610
        }
9611
9612
        //
9613
        // fallback for ascii only
9614
        //
9615
9616 8
        if (ASCII::is_ascii($str)) {
9617 4
            return \strlen($str);
9618
        }
9619
9620
        //
9621
        // fallback via vanilla php
9622
        //
9623
9624 8
        \preg_match_all('/./us', $str, $parts);
9625
9626 8
        $return_tmp = \count($parts[0]);
9627 8
        if ($return_tmp === 0) {
9628
            return false;
9629
        }
9630
9631 8
        return $return_tmp;
9632
    }
9633
9634
    /**
9635
     * Get string length in byte.
9636
     *
9637
     * @param string $str
9638
     *
9639
     * @psalm-pure
9640
     *
9641
     * @return int
9642
     */
9643 1
    public static function strlen_in_byte(string $str): int
9644
    {
9645 1
        if ($str === '') {
9646
            return 0;
9647
        }
9648
9649 1
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
9650
            // "mb_" is available if overload is used, so use it ...
9651
            return \mb_strlen($str, 'CP850'); // 8-BIT
9652
        }
9653
9654 1
        return \strlen($str);
9655
    }
9656
9657
    /**
9658
     * Case-insensitive string comparisons using a "natural order" algorithm.
9659
     *
9660
     * INFO: natural order version of UTF8::strcasecmp()
9661
     *
9662
     * EXAMPLES: <code>
9663
     * UTF8::strnatcasecmp('2', '10Hello WORLD 中文空白!'); // -1
9664
     * UTF8::strcasecmp('2Hello world 中文空白!', '10Hello WORLD 中文空白!'); // 1
9665
     *
9666
     * UTF8::strnatcasecmp('10Hello world 中文空白!', '2Hello WORLD 中文空白!'); // 1
9667
     * UTF8::strcasecmp('10Hello world 中文空白!', '2Hello WORLD 中文空白!'); // -1
9668
     * </code>
9669
     *
9670
     * @param string $str1     <p>The first string.</p>
9671
     * @param string $str2     <p>The second string.</p>
9672
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
9673
     *
9674
     * @psalm-pure
9675
     *
9676
     * @return int
9677
     *             <strong>&lt; 0</strong> if str1 is less than str2<br>
9678
     *             <strong>&gt; 0</strong> if str1 is greater than str2<br>
9679
     *             <strong>0</strong> if they are equal
9680
     */
9681 2
    public static function strnatcasecmp(string $str1, string $str2, string $encoding = 'UTF-8'): int
9682
    {
9683 2
        return self::strnatcmp(
9684 2
            self::strtocasefold($str1, true, false, $encoding, null, false),
9685 2
            self::strtocasefold($str2, true, false, $encoding, null, false)
9686
        );
9687
    }
9688
9689
    /**
9690
     * String comparisons using a "natural order" algorithm
9691
     *
9692
     * INFO: natural order version of UTF8::strcmp()
9693
     *
9694
     * EXAMPLES: <code>
9695
     * UTF8::strnatcmp('2Hello world 中文空白!', '10Hello WORLD 中文空白!'); // -1
9696
     * UTF8::strcmp('2Hello world 中文空白!', '10Hello WORLD 中文空白!'); // 1
9697
     *
9698
     * UTF8::strnatcmp('10Hello world 中文空白!', '2Hello WORLD 中文空白!'); // 1
9699
     * UTF8::strcmp('10Hello world 中文空白!', '2Hello WORLD 中文空白!'); // -1
9700
     * </code>
9701
     *
9702
     * @see http://php.net/manual/en/function.strnatcmp.php
9703
     *
9704
     * @param string $str1 <p>The first string.</p>
9705
     * @param string $str2 <p>The second string.</p>
9706
     *
9707
     * @psalm-pure
9708
     *
9709
     * @return int
9710
     *             <strong>&lt; 0</strong> if str1 is less than str2;<br>
9711
     *             <strong>&gt; 0</strong> if str1 is greater than str2;<br>
9712
     *             <strong>0</strong> if they are equal
9713
     */
9714 4
    public static function strnatcmp(string $str1, string $str2): int
9715
    {
9716 4
        if ($str1 === $str2) {
9717 4
            return 0;
9718
        }
9719
9720 4
        return \strnatcmp(
9721 4
            (string) self::strtonatfold($str1),
9722 4
            (string) self::strtonatfold($str2)
9723
        );
9724
    }
9725
9726
    /**
9727
     * Case-insensitive string comparison of the first n characters.
9728
     *
9729
     * EXAMPLE: <code>
9730
     * UTF8::strcasecmp("iñtërnâtiôn\nàlizætiøn321", "iñtërnâtiôn\nàlizætiøn123", 5); // 0
9731
     * </code>
9732
     *
9733
     * @see http://php.net/manual/en/function.strncasecmp.php
9734
     *
9735
     * @param string $str1     <p>The first string.</p>
9736
     * @param string $str2     <p>The second string.</p>
9737
     * @param int    $len      <p>The length of strings to be used in the comparison.</p>
9738
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
9739
     *
9740
     * @psalm-pure
9741
     *
9742
     * @return int
9743
     *             <strong>&lt; 0</strong> if <i>str1</i> is less than <i>str2</i>;<br>
9744
     *             <strong>&gt; 0</strong> if <i>str1</i> is greater than <i>str2</i>;<br>
9745
     *             <strong>0</strong> if they are equal
9746
     */
9747 2
    public static function strncasecmp(
9748
        string $str1,
9749
        string $str2,
9750
        int $len,
9751
        string $encoding = 'UTF-8'
9752
    ): int {
9753 2
        return self::strncmp(
9754 2
            self::strtocasefold($str1, true, false, $encoding, null, false),
9755 2
            self::strtocasefold($str2, true, false, $encoding, null, false),
9756
            $len
9757
        );
9758
    }
9759
9760
    /**
9761
     * String comparison of the first n characters.
9762
     *
9763
     * EXAMPLE: <code>
9764
     * UTF8::strncmp("Iñtërnâtiôn\nàlizætiøn321", "Iñtërnâtiôn\nàlizætiøn123", 5); // 0
9765
     * </code>
9766
     *
9767
     * @see http://php.net/manual/en/function.strncmp.php
9768
     *
9769
     * @param string $str1     <p>The first string.</p>
9770
     * @param string $str2     <p>The second string.</p>
9771
     * @param int    $len      <p>Number of characters to use in the comparison.</p>
9772
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
9773
     *
9774
     * @psalm-pure
9775
     *
9776
     * @return int
9777
     *             <strong>&lt; 0</strong> if <i>str1</i> is less than <i>str2</i>;<br>
9778
     *             <strong>&gt; 0</strong> if <i>str1</i> is greater than <i>str2</i>;<br>
9779
     *             <strong>0</strong> if they are equal
9780
     */
9781 4
    public static function strncmp(
9782
        string $str1,
9783
        string $str2,
9784
        int $len,
9785
        string $encoding = 'UTF-8'
9786
    ): int {
9787 4
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
9788
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
9789
        }
9790
9791 4
        if ($encoding === 'UTF-8') {
9792 4
            $str1 = (string) \mb_substr($str1, 0, $len);
9793 4
            $str2 = (string) \mb_substr($str2, 0, $len);
9794
        } else {
9795
            $str1 = (string) self::substr($str1, 0, $len, $encoding);
9796
            $str2 = (string) self::substr($str2, 0, $len, $encoding);
9797
        }
9798
9799 4
        return self::strcmp($str1, $str2);
9800
    }
9801
9802
    /**
9803
     * Search a string for any of a set of characters.
9804
     *
9805
     * EXAMPLE: <code>UTF8::strpbrk('-中文空白-', '白'); // '白-'</code>
9806
     *
9807
     * @see http://php.net/manual/en/function.strpbrk.php
9808
     *
9809
     * @param string $haystack  <p>The string where char_list is looked for.</p>
9810
     * @param string $char_list <p>This parameter is case-sensitive.</p>
9811
     *
9812
     * @psalm-pure
9813
     *
9814
     * @return false|string
9815
     *                      <p>The string starting from the character found, or false if it is not found.</p>
9816
     */
9817 2
    public static function strpbrk(string $haystack, string $char_list)
9818
    {
9819 2
        if ($haystack === '' || $char_list === '') {
9820 2
            return false;
9821
        }
9822
9823 2
        if (\preg_match('/' . self::rxClass($char_list) . '/us', $haystack, $m)) {
9824 2
            return \substr($haystack, (int) \strpos($haystack, $m[0]));
9825
        }
9826
9827 2
        return false;
9828
    }
9829
9830
    /**
9831
     * Find the position of the first occurrence of a substring in a string.
9832
     *
9833
     * INFO: use UTF8::strpos_in_byte() for the byte-length
9834
     *
9835
     * EXAMPLE: <code>UTF8::strpos('ABC-ÖÄÜ-中文空白-中文空白', '中'); // 8</code>
9836
     *
9837
     * @see http://php.net/manual/en/function.mb-strpos.php
9838
     *
9839
     * @param string     $haystack   <p>The string from which to get the position of the first occurrence of needle.</p>
9840
     * @param int|string $needle     <p>The string to find in haystack.<br>Or a code point as int.</p>
9841
     * @param int        $offset     [optional] <p>The search offset. If it is not specified, 0 is used.</p>
9842
     * @param string     $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
9843
     * @param bool       $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
9844
     *
9845
     * @psalm-pure
9846
     *
9847
     * @return false|int
9848
     *                   The <strong>(int)</strong> numeric position of the first occurrence of needle in the haystack
9849
     *                   string.<br> If needle is not found it returns false.
9850
     */
9851 52
    public static function strpos(
9852
        string $haystack,
9853
        $needle,
9854
        int $offset = 0,
9855
        string $encoding = 'UTF-8',
9856
        bool $clean_utf8 = false
9857
    ) {
9858 52
        if ($haystack === '') {
9859 4
            if (\PHP_VERSION_ID >= 80000) {
9860
                if ($needle === '') {
9861
                    return 0;
9862
                }
9863
            } else {
9864 4
                return false;
9865
            }
9866
        }
9867
9868
        // iconv and mbstring do not support integer $needle
9869 51
        if ((int) $needle === $needle) {
9870
            $needle = (string) self::chr($needle);
9871
        }
9872 51
        $needle = (string) $needle;
9873
9874 51
        if ($haystack === '') {
9875
            if (\PHP_VERSION_ID >= 80000 && $needle === '') {
9876
                return 0;
9877
            }
9878
9879
            return false;
9880
        }
9881
9882 51
        if ($needle === '' && \PHP_VERSION_ID < 80000) {
9883 2
            return false;
9884
        }
9885
9886 51
        if ($clean_utf8) {
9887
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
9888
            // if invalid characters are found in $haystack before $needle
9889 3
            $needle = self::clean($needle);
9890 3
            $haystack = self::clean($haystack);
9891
        }
9892
9893 51
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
9894 10
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
9895
        }
9896
9897
        //
9898
        // fallback via mbstring
9899
        //
9900
9901 51
        if (self::$SUPPORT['mbstring'] === true) {
9902 49
            if ($encoding === 'UTF-8') {
9903
                /** @noinspection PhpUsageOfSilenceOperatorInspection - Offset not contained in string */
9904 49
                return @\mb_strpos($haystack, $needle, $offset);
9905
            }
9906
9907
            /** @noinspection PhpUsageOfSilenceOperatorInspection - Offset not contained in string */
9908 2
            return @\mb_strpos($haystack, $needle, $offset, $encoding);
9909
        }
9910
9911
        //
9912
        // fallback for binary || ascii only
9913
        //
9914
        if (
9915 4
            $encoding === 'CP850'
9916
            ||
9917 4
            $encoding === 'ASCII'
9918
        ) {
9919 2
            return \strpos($haystack, $needle, $offset);
9920
        }
9921
9922
        if (
9923 4
            $encoding !== 'UTF-8'
9924
            &&
9925 4
            self::$SUPPORT['iconv'] === false
9926
            &&
9927 4
            self::$SUPPORT['mbstring'] === false
9928
        ) {
9929
            /**
9930
             * @psalm-suppress ImpureFunctionCall - is is only a warning
9931
             */
9932 2
            \trigger_error('UTF8::strpos() without mbstring / iconv cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
9933
        }
9934
9935
        //
9936
        // fallback via intl
9937
        //
9938
9939
        if (
9940 4
            $encoding === 'UTF-8' // INFO: "grapheme_strpos()" can't handle other encodings
9941
            &&
9942 4
            $offset >= 0 // grapheme_strpos() can't handle negative offset
9943
            &&
9944 4
            self::$SUPPORT['intl'] === true
9945
        ) {
9946
            $return_tmp = \grapheme_strpos($haystack, $needle, $offset);
9947
            if ($return_tmp !== false) {
9948
                return $return_tmp;
9949
            }
9950
        }
9951
9952
        //
9953
        // fallback via iconv
9954
        //
9955
9956
        if (
9957 4
            $offset >= 0 // iconv_strpos() can't handle negative offset
9958
            &&
9959 4
            self::$SUPPORT['iconv'] === true
9960
        ) {
9961
            // ignore invalid negative offset to keep compatibility
9962
            // with php < 5.5.35, < 5.6.21, < 7.0.6
9963
            $return_tmp = \iconv_strpos($haystack, $needle, $offset > 0 ? $offset : 0, $encoding);
9964
            if ($return_tmp !== false) {
9965
                return $return_tmp;
9966
            }
9967
        }
9968
9969
        //
9970
        // fallback for ascii only
9971
        //
9972
9973 4
        if (ASCII::is_ascii($haystack . $needle)) {
9974
            /** @noinspection PhpUsageOfSilenceOperatorInspection - Offset not contained in string */
9975 2
            return @\strpos($haystack, $needle, $offset);
9976
        }
9977
9978
        //
9979
        // fallback via vanilla php
9980
        //
9981
9982 4
        $haystack_tmp = self::substr($haystack, $offset, null, $encoding);
9983 4
        if ($haystack_tmp === false) {
9984
            $haystack_tmp = '';
9985
        }
9986 4
        $haystack = (string) $haystack_tmp;
9987
9988 4
        if ($offset < 0) {
9989
            $offset = 0;
9990
        }
9991
9992 4
        $pos = \strpos($haystack, $needle);
9993 4
        if ($pos === false) {
9994 3
            return false;
9995
        }
9996
9997 4
        if ($pos) {
9998 4
            return $offset + (int) self::strlen(\substr($haystack, 0, $pos), $encoding);
9999
        }
10000
10001 2
        return $offset + 0;
10002
    }
10003
10004
    /**
10005
     * Find the position of the first occurrence of a substring in a string.
10006
     *
10007
     * @param string $haystack <p>
10008
     *                         The string being checked.
10009
     *                         </p>
10010
     * @param string $needle   <p>
10011
     *                         The position counted from the beginning of haystack.
10012
     *                         </p>
10013
     * @param int    $offset   [optional] <p>
10014
     *                         The search offset. If it is not specified, 0 is used.
10015
     *                         </p>
10016
     *
10017
     * @psalm-pure
10018
     *
10019
     * @return false|int
10020
     *                   <p>The numeric position of the first occurrence of needle in the
10021
     *                   haystack string. If needle is not found, it returns false.</p>
10022
     */
10023 2
    public static function strpos_in_byte(string $haystack, string $needle, int $offset = 0)
10024
    {
10025 2
        if ($haystack === '' || $needle === '') {
10026
            return false;
10027
        }
10028
10029 2
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
10030
            // "mb_" is available if overload is used, so use it ...
10031
            return \mb_strpos($haystack, $needle, $offset, 'CP850'); // 8-BIT
10032
        }
10033
10034 2
        return \strpos($haystack, $needle, $offset);
10035
    }
10036
10037
    /**
10038
     * Find the position of the first occurrence of a substring in a string, case-insensitive.
10039
     *
10040
     * @param string $haystack <p>
10041
     *                         The string being checked.
10042
     *                         </p>
10043
     * @param string $needle   <p>
10044
     *                         The position counted from the beginning of haystack.
10045
     *                         </p>
10046
     * @param int    $offset   [optional] <p>
10047
     *                         The search offset. If it is not specified, 0 is used.
10048
     *                         </p>
10049
     *
10050
     * @psalm-pure
10051
     *
10052
     * @return false|int
10053
     *                   <p>The numeric position of the first occurrence of needle in the
10054
     *                   haystack string. If needle is not found, it returns false.</p>
10055
     */
10056 2
    public static function stripos_in_byte(string $haystack, string $needle, int $offset = 0)
10057
    {
10058 2
        if ($haystack === '' || $needle === '') {
10059
            return false;
10060
        }
10061
10062 2
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
10063
            // "mb_" is available if overload is used, so use it ...
10064
            return \mb_stripos($haystack, $needle, $offset, 'CP850'); // 8-BIT
10065
        }
10066
10067 2
        return \stripos($haystack, $needle, $offset);
10068
    }
10069
10070
    /**
10071
     * Find the last occurrence of a character in a string within another.
10072
     *
10073
     * EXAMPLE: <code>UTF8::strrchr('κόσμεκόσμε-äöü', 'κόσμε'); // 'κόσμε-äöü'</code>
10074
     *
10075
     * @see http://php.net/manual/en/function.mb-strrchr.php
10076
     *
10077
     * @param string $haystack      <p>The string from which to get the last occurrence of needle.</p>
10078
     * @param string $needle        <p>The string to find in haystack</p>
10079
     * @param bool   $before_needle [optional] <p>
10080
     *                              Determines which portion of haystack
10081
     *                              this function returns.
10082
     *                              If set to true, it returns all of haystack
10083
     *                              from the beginning to the last occurrence of needle.
10084
     *                              If set to false, it returns all of haystack
10085
     *                              from the last occurrence of needle to the end,
10086
     *                              </p>
10087
     * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
10088
     * @param bool   $clean_utf8    [optional] <p>Remove non UTF-8 chars from the string.</p>
10089
     *
10090
     * @psalm-pure
10091
     *
10092
     * @return false|string
10093
     *                      <p>The portion of haystack or false if needle is not found.</p>
10094
     */
10095 2
    public static function strrchr(
10096
        string $haystack,
10097
        string $needle,
10098
        bool $before_needle = false,
10099
        string $encoding = 'UTF-8',
10100
        bool $clean_utf8 = false
10101
    ) {
10102 2
        if ($haystack === '' || $needle === '') {
10103 2
            return false;
10104
        }
10105
10106 2
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
10107 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
10108
        }
10109
10110 2
        if ($clean_utf8) {
10111
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
10112
            // if invalid characters are found in $haystack before $needle
10113 2
            $needle = self::clean($needle);
10114 2
            $haystack = self::clean($haystack);
10115
        }
10116
10117
        //
10118
        // fallback via mbstring
10119
        //
10120
10121 2
        if (self::$SUPPORT['mbstring'] === true) {
10122 2
            if ($encoding === 'UTF-8') {
10123 2
                return \mb_strrchr($haystack, $needle, $before_needle);
10124
            }
10125
10126 2
            return \mb_strrchr($haystack, $needle, $before_needle, $encoding);
10127
        }
10128
10129
        //
10130
        // fallback for binary || ascii only
10131
        //
10132
10133
        if (
10134
            !$before_needle
10135
            &&
10136
            (
10137
                $encoding === 'CP850'
10138
                ||
10139
                $encoding === 'ASCII'
10140
            )
10141
        ) {
10142
            return \strrchr($haystack, $needle);
10143
        }
10144
10145
        if (
10146
            $encoding !== 'UTF-8'
10147
            &&
10148
            self::$SUPPORT['mbstring'] === false
10149
        ) {
10150
            /**
10151
             * @psalm-suppress ImpureFunctionCall - is is only a warning
10152
             */
10153
            \trigger_error('UTF8::strrchr() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
10154
        }
10155
10156
        //
10157
        // fallback via iconv
10158
        //
10159
10160
        if (self::$SUPPORT['iconv'] === true) {
10161
            $needle_tmp = self::substr($needle, 0, 1, $encoding);
10162
            if ($needle_tmp === false) {
10163
                return false;
10164
            }
10165
            $needle = $needle_tmp;
10166
10167
            $pos = \iconv_strrpos($haystack, $needle, $encoding);
10168
            if ($pos === false) {
10169
                return false;
10170
            }
10171
10172
            if ($before_needle) {
10173
                return self::substr($haystack, 0, $pos, $encoding);
10174
            }
10175
10176
            return self::substr($haystack, $pos, null, $encoding);
10177
        }
10178
10179
        //
10180
        // fallback via vanilla php
10181
        //
10182
10183
        $needle_tmp = self::substr($needle, 0, 1, $encoding);
10184
        if ($needle_tmp === false) {
10185
            return false;
10186
        }
10187
        $needle = $needle_tmp;
10188
10189
        $pos = self::strrpos($haystack, $needle, 0, $encoding);
10190
        if ($pos === false) {
10191
            return false;
10192
        }
10193
10194
        if ($before_needle) {
10195
            return self::substr($haystack, 0, $pos, $encoding);
10196
        }
10197
10198
        return self::substr($haystack, $pos, null, $encoding);
10199
    }
10200
10201
    /**
10202
     * Reverses characters order in the string.
10203
     *
10204
     * EXAMPLE: <code>UTF8::strrev('κ-öäü'); // 'üäö-κ'</code>
10205
     *
10206
     * @param string $str      <p>The input string.</p>
10207
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
10208
     *
10209
     * @psalm-pure
10210
     *
10211
     * @return string
10212
     *                <p>The string with characters in the reverse sequence.</p>
10213
     */
10214 10
    public static function strrev(string $str, string $encoding = 'UTF-8'): string
10215
    {
10216 10
        if ($str === '') {
10217 4
            return '';
10218
        }
10219
10220
        // init
10221 8
        $reversed = '';
10222
10223 8
        $str = self::emoji_encode($str, true);
10224
10225 8
        if ($encoding === 'UTF-8') {
10226 8
            if (self::$SUPPORT['intl'] === true) {
10227
                // try "grapheme" first: https://stackoverflow.com/questions/17496493/strrev-dosent-support-utf-8
10228 8
                $i = (int) \grapheme_strlen($str);
10229 8
                while ($i--) {
10230 8
                    $reversed_tmp = \grapheme_substr($str, $i, 1);
10231 8
                    if ($reversed_tmp !== false) {
10232 8
                        $reversed .= $reversed_tmp;
10233
                    }
10234
                }
10235
            } else {
10236
                $i = (int) \mb_strlen($str);
10237 8
                while ($i--) {
10238
                    $reversed_tmp = \mb_substr($str, $i, 1);
10239
                    if ($reversed_tmp !== false) {
10240
                        $reversed .= $reversed_tmp;
10241
                    }
10242
                }
10243
            }
10244
        } else {
10245
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
10246
10247
            $i = (int) self::strlen($str, $encoding);
10248
            while ($i--) {
10249
                $reversed_tmp = self::substr($str, $i, 1, $encoding);
10250
                if ($reversed_tmp !== false) {
10251
                    $reversed .= $reversed_tmp;
10252
                }
10253
            }
10254
        }
10255
10256 8
        return self::emoji_decode($reversed, true);
10257
    }
10258
10259
    /**
10260
     * Find the last occurrence of a character in a string within another, case-insensitive.
10261
     *
10262
     * EXAMPLE: <code>UTF8::strrichr('Aκόσμεκόσμε-äöü', 'aκόσμε'); // 'Aκόσμεκόσμε-äöü'</code>
10263
     *
10264
     * @see http://php.net/manual/en/function.mb-strrichr.php
10265
     *
10266
     * @param string $haystack      <p>The string from which to get the last occurrence of needle.</p>
10267
     * @param string $needle        <p>The string to find in haystack.</p>
10268
     * @param bool   $before_needle [optional] <p>
10269
     *                              Determines which portion of haystack
10270
     *                              this function returns.
10271
     *                              If set to true, it returns all of haystack
10272
     *                              from the beginning to the last occurrence of needle.
10273
     *                              If set to false, it returns all of haystack
10274
     *                              from the last occurrence of needle to the end,
10275
     *                              </p>
10276
     * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
10277
     * @param bool   $clean_utf8    [optional] <p>Remove non UTF-8 chars from the string.</p>
10278
     *
10279
     * @psalm-pure
10280
     *
10281
     * @return false|string
10282
     *                      <p>The portion of haystack or<br>false if needle is not found.</p>
10283
     */
10284 3
    public static function strrichr(
10285
        string $haystack,
10286
        string $needle,
10287
        bool $before_needle = false,
10288
        string $encoding = 'UTF-8',
10289
        bool $clean_utf8 = false
10290
    ) {
10291 3
        if ($haystack === '' || $needle === '') {
10292 2
            return false;
10293
        }
10294
10295 3
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
10296 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
10297
        }
10298
10299 3
        if ($clean_utf8) {
10300
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
10301
            // if invalid characters are found in $haystack before $needle
10302 2
            $needle = self::clean($needle);
10303 2
            $haystack = self::clean($haystack);
10304
        }
10305
10306
        //
10307
        // fallback via mbstring
10308
        //
10309
10310 3
        if (self::$SUPPORT['mbstring'] === true) {
10311 3
            if ($encoding === 'UTF-8') {
10312 3
                return \mb_strrichr($haystack, $needle, $before_needle);
10313
            }
10314
10315 2
            return \mb_strrichr($haystack, $needle, $before_needle, $encoding);
10316
        }
10317
10318
        //
10319
        // fallback via vanilla php
10320
        //
10321
10322
        $needle_tmp = self::substr($needle, 0, 1, $encoding);
10323
        if ($needle_tmp === false) {
10324
            return false;
10325
        }
10326
        $needle = $needle_tmp;
10327
10328
        $pos = self::strripos($haystack, $needle, 0, $encoding);
10329
        if ($pos === false) {
10330
            return false;
10331
        }
10332
10333
        if ($before_needle) {
10334
            return self::substr($haystack, 0, $pos, $encoding);
10335
        }
10336
10337
        return self::substr($haystack, $pos, null, $encoding);
10338
    }
10339
10340
    /**
10341
     * Find the position of the last occurrence of a substring in a string, case-insensitive.
10342
     *
10343
     * EXAMPLE: <code>UTF8::strripos('ABC-ÖÄÜ-中文空白-中文空白', '中'); // 13</code>
10344
     *
10345
     * @param string     $haystack   <p>The string to look in.</p>
10346
     * @param int|string $needle     <p>The string to look for.</p>
10347
     * @param int        $offset     [optional] <p>Number of characters to ignore in the beginning or end.</p>
10348
     * @param string     $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
10349
     * @param bool       $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
10350
     *
10351
     * @psalm-pure
10352
     *
10353
     * @return false|int
10354
     *                   <p>The <strong>(int)</strong> numeric position of the last occurrence of needle in the haystack
10355
     *                   string.<br>If needle is not found, it returns false.</p>
10356
     */
10357 14
    public static function strripos(
10358
        string $haystack,
10359
        $needle,
10360
        int $offset = 0,
10361
        string $encoding = 'UTF-8',
10362
        bool $clean_utf8 = false
10363
    ) {
10364 14
        if ($haystack === '') {
10365 3
            if (\PHP_VERSION_ID >= 80000) {
10366
                if ($needle === '') {
10367
                    return 0;
10368
                }
10369
            } else {
10370 3
                return false;
10371
            }
10372
        }
10373
10374
        // iconv and mbstring do not support integer $needle
10375 14
        if ((int) $needle === $needle && $needle >= 0) {
10376
            $needle = (string) self::chr($needle);
10377
        }
10378 14
        $needle = (string) $needle;
10379
10380 14
        if ($haystack === '') {
10381
            if (\PHP_VERSION_ID >= 80000 && $needle === '') {
10382
                return 0;
10383
            }
10384
10385
            return false;
10386
        }
10387
10388 14
        if ($needle === '' && \PHP_VERSION_ID < 80000) {
10389 3
            return false;
10390
        }
10391
10392 14
        if ($clean_utf8) {
10393
            // mb_strripos() && iconv_strripos() is not tolerant to invalid characters
10394 3
            $needle = self::clean($needle);
10395 3
            $haystack = self::clean($haystack);
10396
        }
10397
10398 14
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
10399 9
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
10400
        }
10401
10402
        //
10403
        // fallback via mbstrig
10404
        //
10405
10406 14
        if (self::$SUPPORT['mbstring'] === true) {
10407 14
            if ($encoding === 'UTF-8') {
10408 14
                return \mb_strripos($haystack, $needle, $offset);
10409
            }
10410
10411
            return \mb_strripos($haystack, $needle, $offset, $encoding);
10412
        }
10413
10414
        //
10415
        // fallback for binary || ascii only
10416
        //
10417
10418
        if (
10419
            $encoding === 'CP850'
10420
            ||
10421
            $encoding === 'ASCII'
10422
        ) {
10423
            return \strripos($haystack, $needle, $offset);
10424
        }
10425
10426
        if (
10427
            $encoding !== 'UTF-8'
10428
            &&
10429
            self::$SUPPORT['mbstring'] === false
10430
        ) {
10431
            /**
10432
             * @psalm-suppress ImpureFunctionCall - is is only a warning
10433
             */
10434
            \trigger_error('UTF8::strripos() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
10435
        }
10436
10437
        //
10438
        // fallback via intl
10439
        //
10440
10441
        if (
10442
            $encoding === 'UTF-8' // INFO: "grapheme_strripos()" can't handle other encodings
10443
            &&
10444
            $offset >= 0 // grapheme_strripos() can't handle negative offset
10445
            &&
10446
            self::$SUPPORT['intl'] === true
10447
        ) {
10448
            $return_tmp = \grapheme_strripos($haystack, $needle, $offset);
10449
            if ($return_tmp !== false) {
10450
                return $return_tmp;
10451
            }
10452
        }
10453
10454
        //
10455
        // fallback for ascii only
10456
        //
10457
10458
        if (ASCII::is_ascii($haystack . $needle)) {
10459
            return \strripos($haystack, $needle, $offset);
10460
        }
10461
10462
        //
10463
        // fallback via vanilla php
10464
        //
10465
10466
        $haystack = self::strtocasefold($haystack, true, false, $encoding);
10467
        $needle = self::strtocasefold($needle, true, false, $encoding);
10468
10469
        return self::strrpos($haystack, $needle, $offset, $encoding, $clean_utf8);
10470
    }
10471
10472
    /**
10473
     * Finds position of last occurrence of a string within another, case-insensitive.
10474
     *
10475
     * @param string $haystack <p>
10476
     *                         The string from which to get the position of the last occurrence
10477
     *                         of needle.
10478
     *                         </p>
10479
     * @param string $needle   <p>
10480
     *                         The string to find in haystack.
10481
     *                         </p>
10482
     * @param int    $offset   [optional] <p>
10483
     *                         The position in haystack
10484
     *                         to start searching.
10485
     *                         </p>
10486
     *
10487
     * @psalm-pure
10488
     *
10489
     * @return false|int
10490
     *                   <p>eturn the numeric position of the last occurrence of needle in the
10491
     *                   haystack string, or false if needle is not found.</p>
10492
     */
10493 2
    public static function strripos_in_byte(string $haystack, string $needle, int $offset = 0)
10494
    {
10495 2
        if ($haystack === '' || $needle === '') {
10496
            return false;
10497
        }
10498
10499 2
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
10500
            // "mb_" is available if overload is used, so use it ...
10501
            return \mb_strripos($haystack, $needle, $offset, 'CP850'); // 8-BIT
10502
        }
10503
10504 2
        return \strripos($haystack, $needle, $offset);
10505
    }
10506
10507
    /**
10508
     * Find the position of the last occurrence of a substring in a string.
10509
     *
10510
     * EXAMPLE: <code>UTF8::strrpos('ABC-ÖÄÜ-中文空白-中文空白', '中'); // 13</code>
10511
     *
10512
     * @see http://php.net/manual/en/function.mb-strrpos.php
10513
     *
10514
     * @param string     $haystack   <p>The string being checked, for the last occurrence of needle</p>
10515
     * @param int|string $needle     <p>The string to find in haystack.<br>Or a code point as int.</p>
10516
     * @param int        $offset     [optional] <p>May be specified to begin searching an arbitrary number of characters
10517
     *                               into the string. Negative values will stop searching at an arbitrary point prior to
10518
     *                               the end of the string.
10519
     *                               </p>
10520
     * @param string     $encoding   [optional] <p>Set the charset.</p>
10521
     * @param bool       $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
10522
     *
10523
     * @psalm-pure
10524
     *
10525
     * @return false|int
10526
     *                   <p>The <strong>(int)</strong> numeric position of the last occurrence of needle in the haystack
10527
     *                   string.<br>If needle is not found, it returns false.</p>
10528
     */
10529 35
    public static function strrpos(
10530
        string $haystack,
10531
        $needle,
10532
        int $offset = 0,
10533
        string $encoding = 'UTF-8',
10534
        bool $clean_utf8 = false
10535
    ) {
10536 35
        if ($haystack === '') {
10537 4
            if (\PHP_VERSION_ID >= 80000) {
10538
                if ($needle === '') {
10539
                    return 0;
10540
                }
10541
            } else {
10542 4
                return false;
10543
            }
10544
        }
10545
10546
        // iconv and mbstring do not support integer $needle
10547 34
        if ((int) $needle === $needle && $needle >= 0) {
10548 1
            $needle = (string) self::chr($needle);
10549
        }
10550 34
        $needle = (string) $needle;
10551
10552 34
        if ($haystack === '') {
10553
            if (\PHP_VERSION_ID >= 80000 && $needle === '') {
10554
                return 0;
10555
            }
10556
10557
            return false;
10558
        }
10559
10560 34
        if ($needle === '' && \PHP_VERSION_ID < 80000) {
10561 2
            return false;
10562
        }
10563
10564 34
        if ($clean_utf8) {
10565
            // mb_strrpos && iconv_strrpos is not tolerant to invalid characters
10566 4
            $needle = self::clean($needle);
10567 4
            $haystack = self::clean($haystack);
10568
        }
10569
10570 34
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
10571 8
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
10572
        }
10573
10574
        //
10575
        // fallback via mbstring
10576
        //
10577
10578 34
        if (self::$SUPPORT['mbstring'] === true) {
10579 34
            if ($encoding === 'UTF-8') {
10580 34
                return \mb_strrpos($haystack, $needle, $offset);
10581
            }
10582
10583 2
            return \mb_strrpos($haystack, $needle, $offset, $encoding);
10584
        }
10585
10586
        //
10587
        // fallback for binary || ascii only
10588
        //
10589
10590
        if (
10591
            $encoding === 'CP850'
10592
            ||
10593
            $encoding === 'ASCII'
10594
        ) {
10595
            return \strrpos($haystack, $needle, $offset);
10596
        }
10597
10598
        if (
10599
            $encoding !== 'UTF-8'
10600
            &&
10601
            self::$SUPPORT['mbstring'] === false
10602
        ) {
10603
            /**
10604
             * @psalm-suppress ImpureFunctionCall - is is only a warning
10605
             */
10606
            \trigger_error('UTF8::strrpos() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
10607
        }
10608
10609
        //
10610
        // fallback via intl
10611
        //
10612
10613
        if (
10614
            $offset >= 0 // grapheme_strrpos() can't handle negative offset
10615
            &&
10616
            $encoding === 'UTF-8' // INFO: "grapheme_strrpos()" can't handle other encodings
10617
            &&
10618
            self::$SUPPORT['intl'] === true
10619
        ) {
10620
            $return_tmp = \grapheme_strrpos($haystack, $needle, $offset);
10621
            if ($return_tmp !== false) {
10622
                return $return_tmp;
10623
            }
10624
        }
10625
10626
        //
10627
        // fallback for ascii only
10628
        //
10629
10630
        if (ASCII::is_ascii($haystack . $needle)) {
10631
            return \strrpos($haystack, $needle, $offset);
10632
        }
10633
10634
        //
10635
        // fallback via vanilla php
10636
        //
10637
10638
        $haystack_tmp = null;
10639
        if ($offset > 0) {
10640
            $haystack_tmp = self::substr($haystack, $offset);
10641
        } elseif ($offset < 0) {
10642
            $haystack_tmp = self::substr($haystack, 0, $offset);
10643
            $offset = 0;
10644
        }
10645
10646
        if ($haystack_tmp !== null) {
10647
            if ($haystack_tmp === false) {
10648
                $haystack_tmp = '';
10649
            }
10650
            $haystack = (string) $haystack_tmp;
10651
        }
10652
10653
        $pos = \strrpos($haystack, $needle);
10654
        if ($pos === false) {
10655
            return false;
10656
        }
10657
10658
        /** @var false|string $str_tmp - needed for PhpStan (stubs error) */
10659
        $str_tmp = \substr($haystack, 0, $pos);
10660
        if ($str_tmp === false) {
10661
            return false;
10662
        }
10663
10664
        return $offset + (int) self::strlen($str_tmp);
10665
    }
10666
10667
    /**
10668
     * Find the position of the last occurrence of a substring in a string.
10669
     *
10670
     * @param string $haystack <p>
10671
     *                         The string being checked, for the last occurrence
10672
     *                         of needle.
10673
     *                         </p>
10674
     * @param string $needle   <p>
10675
     *                         The string to find in haystack.
10676
     *                         </p>
10677
     * @param int    $offset   [optional] <p>May be specified to begin searching an arbitrary number of characters into
10678
     *                         the string. Negative values will stop searching at an arbitrary point
10679
     *                         prior to the end of the string.
10680
     *                         </p>
10681
     *
10682
     * @psalm-pure
10683
     *
10684
     * @return false|int
10685
     *                   <p>The numeric position of the last occurrence of needle in the
10686
     *                   haystack string. If needle is not found, it returns false.</p>
10687
     */
10688 2
    public static function strrpos_in_byte(string $haystack, string $needle, int $offset = 0)
10689
    {
10690 2
        if ($haystack === '' || $needle === '') {
10691
            return false;
10692
        }
10693
10694 2
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
10695
            // "mb_" is available if overload is used, so use it ...
10696
            return \mb_strrpos($haystack, $needle, $offset, 'CP850'); // 8-BIT
10697
        }
10698
10699 2
        return \strrpos($haystack, $needle, $offset);
10700
    }
10701
10702
    /**
10703
     * Finds the length of the initial segment of a string consisting entirely of characters contained within a given
10704
     * mask.
10705
     *
10706
     * EXAMPLE: <code>UTF8::strspn('iñtërnâtiônàlizætiøn', 'itñ'); // '3'</code>
10707
     *
10708
     * @param string   $str      <p>The input string.</p>
10709
     * @param string   $mask     <p>The mask of chars</p>
10710
     * @param int      $offset   [optional]
10711
     * @param int|null $length   [optional]
10712
     * @param string   $encoding [optional] <p>Set the charset.</p>
10713
     *
10714
     * @psalm-pure
10715
     *
10716
     * @return false|int
10717
     */
10718 10
    public static function strspn(
10719
        string $str,
10720
        string $mask,
10721
        int $offset = 0,
10722
        int $length = null,
10723
        string $encoding = 'UTF-8'
10724
    ) {
10725 10
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
10726
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
10727
        }
10728
10729 10
        if ($offset || $length !== null) {
10730 2
            if ($encoding === 'UTF-8') {
10731 2
                if ($length === null) {
10732
                    $str = (string) \mb_substr($str, $offset);
10733
                } else {
10734 2
                    $str = (string) \mb_substr($str, $offset, $length);
10735
                }
10736
            } else {
10737
                $str = (string) self::substr($str, $offset, $length, $encoding);
10738
            }
10739
        }
10740
10741 10
        if ($str === '' || $mask === '') {
10742 2
            return 0;
10743
        }
10744
10745 8
        $matches = [];
10746
10747 8
        return \preg_match('/^' . self::rxClass($mask) . '+/u', $str, $matches) ? (int) self::strlen($matches[0], $encoding) : 0;
10748
    }
10749
10750
    /**
10751
     * Returns part of haystack string from the first occurrence of needle to the end of haystack.
10752
     *
10753
     * EXAMPLE: <code>
10754
     * $str = 'iñtërnâtiônàlizætiøn';
10755
     * $search = 'nât';
10756
     *
10757
     * UTF8::strstr($str, $search)); // 'nâtiônàlizætiøn'
10758
     * UTF8::strstr($str, $search, true)); // 'iñtër'
10759
     * </code>
10760
     *
10761
     * @param string $haystack      <p>The input string. Must be valid UTF-8.</p>
10762
     * @param string $needle        <p>The string to look for. Must be valid UTF-8.</p>
10763
     * @param bool   $before_needle [optional] <p>
10764
     *                              If <b>TRUE</b>, strstr() returns the part of the
10765
     *                              haystack before the first occurrence of the needle (excluding the needle).
10766
     *                              </p>
10767
     * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
10768
     * @param bool   $clean_utf8    [optional] <p>Remove non UTF-8 chars from the string.</p>
10769
     *
10770
     * @psalm-pure
10771
     *
10772
     * @return false|string
10773
     *                      <p>A sub-string,<br>or <strong>false</strong> if needle is not found.</p>
10774
     */
10775 3
    public static function strstr(
10776
        string $haystack,
10777
        string $needle,
10778
        bool $before_needle = false,
10779
        string $encoding = 'UTF-8',
10780
        bool $clean_utf8 = false
10781
    ) {
10782 3
        if ($haystack === '') {
10783 2
            if (\PHP_VERSION_ID >= 80000 && $needle === '') {
10784
                return '';
10785
            }
10786
10787 2
            return false;
10788
        }
10789
10790 3
        if ($clean_utf8) {
10791
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
10792
            // if invalid characters are found in $haystack before $needle
10793
            $needle = self::clean($needle);
10794
            $haystack = self::clean($haystack);
10795
        }
10796
10797 3
        if ($needle === '') {
10798 1
            if (\PHP_VERSION_ID >= 80000) {
10799
                return $haystack;
10800
            }
10801
10802 1
            return false;
10803
        }
10804
10805 3
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
10806 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
10807
        }
10808
10809
        //
10810
        // fallback via mbstring
10811
        //
10812
10813 3
        if (self::$SUPPORT['mbstring'] === true) {
10814 3
            if ($encoding === 'UTF-8') {
10815 3
                return \mb_strstr($haystack, $needle, $before_needle);
10816
            }
10817
10818 2
            return \mb_strstr($haystack, $needle, $before_needle, $encoding);
10819
        }
10820
10821
        //
10822
        // fallback for binary || ascii only
10823
        //
10824
10825
        if (
10826
            $encoding === 'CP850'
10827
            ||
10828
            $encoding === 'ASCII'
10829
        ) {
10830
            return \strstr($haystack, $needle, $before_needle);
10831
        }
10832
10833
        if (
10834
            $encoding !== 'UTF-8'
10835
            &&
10836
            self::$SUPPORT['mbstring'] === false
10837
        ) {
10838
            /**
10839
             * @psalm-suppress ImpureFunctionCall - is is only a warning
10840
             */
10841
            \trigger_error('UTF8::strstr() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
10842
        }
10843
10844
        //
10845
        // fallback via intl
10846
        //
10847
10848
        if (
10849
            $encoding === 'UTF-8' // INFO: "grapheme_strstr()" can't handle other encodings
10850
            &&
10851
            self::$SUPPORT['intl'] === true
10852
        ) {
10853
            $return_tmp = \grapheme_strstr($haystack, $needle, $before_needle);
10854
            if ($return_tmp !== false) {
10855
                return $return_tmp;
10856
            }
10857
        }
10858
10859
        //
10860
        // fallback for ascii only
10861
        //
10862
10863
        if (ASCII::is_ascii($haystack . $needle)) {
10864
            return \strstr($haystack, $needle, $before_needle);
10865
        }
10866
10867
        //
10868
        // fallback via vanilla php
10869
        //
10870
10871
        \preg_match('/^(.*?)' . \preg_quote($needle, '/') . '/us', $haystack, $match);
10872
10873
        if (!isset($match[1])) {
10874
            return false;
10875
        }
10876
10877
        if ($before_needle) {
10878
            return $match[1];
10879
        }
10880
10881
        return self::substr($haystack, (int) self::strlen($match[1]));
10882
    }
10883
10884
    /**
10885
     * Finds first occurrence of a string within another.
10886
     *
10887
     * @param string $haystack      <p>
10888
     *                              The string from which to get the first occurrence
10889
     *                              of needle.
10890
     *                              </p>
10891
     * @param string $needle        <p>
10892
     *                              The string to find in haystack.
10893
     *                              </p>
10894
     * @param bool   $before_needle [optional] <p>
10895
     *                              Determines which portion of haystack
10896
     *                              this function returns.
10897
     *                              If set to true, it returns all of haystack
10898
     *                              from the beginning to the first occurrence of needle.
10899
     *                              If set to false, it returns all of haystack
10900
     *                              from the first occurrence of needle to the end,
10901
     *                              </p>
10902
     *
10903
     * @psalm-pure
10904
     *
10905
     * @return false|string
10906
     *                      <p>The portion of haystack,
10907
     *                      or false if needle is not found.</p>
10908
     */
10909 2
    public static function strstr_in_byte(
10910
        string $haystack,
10911
        string $needle,
10912
        bool $before_needle = false
10913
    ) {
10914 2
        if ($haystack === '' || $needle === '') {
10915
            return false;
10916
        }
10917
10918 2
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
10919
            // "mb_" is available if overload is used, so use it ...
10920
            return \mb_strstr($haystack, $needle, $before_needle, 'CP850'); // 8-BIT
10921
        }
10922
10923 2
        return \strstr($haystack, $needle, $before_needle);
10924
    }
10925
10926
    /**
10927
     * Unicode transformation for case-less matching.
10928
     *
10929
     * EXAMPLE: <code>UTF8::strtocasefold('ǰ◌̱'); // 'ǰ◌̱'</code>
10930
     *
10931
     * @see http://unicode.org/reports/tr21/tr21-5.html
10932
     *
10933
     * @param string      $str        <p>The input string.</p>
10934
     * @param bool        $full       [optional] <p>
10935
     *                                <b>true</b>, replace full case folding chars (default)<br>
10936
     *                                <b>false</b>, use only limited static array [UTF8::$COMMON_CASE_FOLD]
10937
     *                                </p>
10938
     * @param bool        $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
10939
     * @param string      $encoding   [optional] <p>Set the charset.</p>
10940
     * @param string|null $lang       [optional] <p>Set the language for special cases: az, el, lt, tr</p>
10941
     * @param bool        $lower      [optional] <p>Use lowercase string, otherwise use uppercase string. PS: uppercase
10942
     *                                is for some languages better ...</p>
10943
     *
10944
     * @psalm-pure
10945
     *
10946
     * @return string
10947
     */
10948 32
    public static function strtocasefold(
10949
        string $str,
10950
        bool $full = true,
10951
        bool $clean_utf8 = false,
10952
        string $encoding = 'UTF-8',
10953
        string $lang = null,
10954
        bool $lower = true
10955
    ): string {
10956 32
        if ($str === '') {
10957 5
            return '';
10958
        }
10959
10960 31
        if ($clean_utf8) {
10961
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
10962
            // if invalid characters are found in $haystack before $needle
10963 2
            $str = self::clean($str);
10964
        }
10965
10966 31
        $str = self::fixStrCaseHelper($str, $lower, $full);
10967
10968 31
        if ($lang === null && $encoding === 'UTF-8') {
10969 31
            if ($lower) {
10970 2
                return \mb_strtolower($str);
10971
            }
10972
10973 29
            return \mb_strtoupper($str);
10974
        }
10975
10976 2
        if ($lower) {
10977
            return self::strtolower($str, $encoding, false, $lang);
10978
        }
10979
10980 2
        return self::strtoupper($str, $encoding, false, $lang);
10981
    }
10982
10983
    /**
10984
     * Make a string lowercase.
10985
     *
10986
     * EXAMPLE: <code>UTF8::strtolower('DÉJÀ Σσς Iıİi'); // 'déjà σσς iıii'</code>
10987
     *
10988
     * @see http://php.net/manual/en/function.mb-strtolower.php
10989
     *
10990
     * @param string      $str                           <p>The string being lowercased.</p>
10991
     * @param string      $encoding                      [optional] <p>Set the charset for e.g. "mb_" function</p>
10992
     * @param bool        $clean_utf8                    [optional] <p>Remove non UTF-8 chars from the string.</p>
10993
     * @param string|null $lang                          [optional] <p>Set the language for special cases: az, el, lt,
10994
     *                                                   tr</p>
10995
     * @param bool        $try_to_keep_the_string_length [optional] <p>true === try to keep the string length: e.g. ẞ
10996
     *                                                   -> ß</p>
10997
     *
10998
     * @psalm-pure
10999
     *
11000
     * @return string
11001
     *                <p>String with all alphabetic characters converted to lowercase.</p>
11002
     */
11003 73
    public static function strtolower(
11004
        $str,
11005
        string $encoding = 'UTF-8',
11006
        bool $clean_utf8 = false,
11007
        string $lang = null,
11008
        bool $try_to_keep_the_string_length = false
11009
    ): string {
11010
        // init
11011 73
        $str = (string) $str;
11012
11013 73
        if ($str === '') {
11014 1
            return '';
11015
        }
11016
11017 72
        if ($clean_utf8) {
11018
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
11019
            // if invalid characters are found in $haystack before $needle
11020 2
            $str = self::clean($str);
11021
        }
11022
11023
        // hack for old php version or for the polyfill ...
11024 72
        if ($try_to_keep_the_string_length) {
11025
            $str = self::fixStrCaseHelper($str, true);
11026
        }
11027
11028 72
        if ($lang === null && $encoding === 'UTF-8') {
11029 13
            return \mb_strtolower($str);
11030
        }
11031
11032 61
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
11033
11034 61
        if ($lang !== null) {
11035 2
            if (self::$SUPPORT['intl'] === true) {
11036 2
                if (self::$INTL_TRANSLITERATOR_LIST === null) {
11037
                    self::$INTL_TRANSLITERATOR_LIST = self::getData('transliterator_list');
11038
                }
11039
11040 2
                $language_code = $lang . '-Lower';
11041 2
                if (!\in_array($language_code, self::$INTL_TRANSLITERATOR_LIST, true)) {
11042
                    /**
11043
                     * @psalm-suppress ImpureFunctionCall - is is only a warning
11044
                     */
11045
                    \trigger_error('UTF8::strtolower() cannot handle special language: ' . $lang . ' | supported: ' . \print_r(self::$INTL_TRANSLITERATOR_LIST, true), \E_USER_WARNING);
0 ignored issues
show
Bug introduced by
Are you sure print_r(self::INTL_TRANSLITERATOR_LIST, true) of type string|true can be used in concatenation? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

11045
                    \trigger_error('UTF8::strtolower() cannot handle special language: ' . $lang . ' | supported: ' . /** @scrutinizer ignore-type */ \print_r(self::$INTL_TRANSLITERATOR_LIST, true), \E_USER_WARNING);
Loading history...
11046
11047
                    $language_code = 'Any-Lower';
11048
                }
11049
11050 2
                return (string) \transliterator_transliterate($language_code, $str);
11051
            }
11052
11053
            /**
11054
             * @psalm-suppress ImpureFunctionCall - is is only a warning
11055
             */
11056
            \trigger_error('UTF8::strtolower() without intl cannot handle the "lang" parameter: ' . $lang, \E_USER_WARNING);
11057
        }
11058
11059
        // always fallback via symfony polyfill
11060 61
        return \mb_strtolower($str, $encoding);
11061
    }
11062
11063
    /**
11064
     * Make a string uppercase.
11065
     *
11066
     * EXAMPLE: <code>UTF8::strtoupper('Déjà Σσς Iıİi'); // 'DÉJÀ ΣΣΣ IIİI'</code>
11067
     *
11068
     * @see http://php.net/manual/en/function.mb-strtoupper.php
11069
     *
11070
     * @param string      $str                           <p>The string being uppercased.</p>
11071
     * @param string      $encoding                      [optional] <p>Set the charset.</p>
11072
     * @param bool        $clean_utf8                    [optional] <p>Remove non UTF-8 chars from the string.</p>
11073
     * @param string|null $lang                          [optional] <p>Set the language for special cases: az, el, lt,
11074
     *                                                   tr</p>
11075
     * @param bool        $try_to_keep_the_string_length [optional] <p>true === try to keep the string length: e.g. ẞ
11076
     *                                                   -> ß</p>
11077
     *
11078
     * @psalm-pure
11079
     *
11080
     * @return string
11081
     *                <p>String with all alphabetic characters converted to uppercase.</p>
11082
     */
11083 17
    public static function strtoupper(
11084
        $str,
11085
        string $encoding = 'UTF-8',
11086
        bool $clean_utf8 = false,
11087
        string $lang = null,
11088
        bool $try_to_keep_the_string_length = false
11089
    ): string {
11090
        // init
11091 17
        $str = (string) $str;
11092
11093 17
        if ($str === '') {
11094 1
            return '';
11095
        }
11096
11097 16
        if ($clean_utf8) {
11098
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
11099
            // if invalid characters are found in $haystack before $needle
11100 2
            $str = self::clean($str);
11101
        }
11102
11103
        // hack for old php version or for the polyfill ...
11104 16
        if ($try_to_keep_the_string_length) {
11105 2
            $str = self::fixStrCaseHelper($str);
11106
        }
11107
11108 16
        if ($lang === null && $encoding === 'UTF-8') {
11109 8
            return \mb_strtoupper($str);
11110
        }
11111
11112 10
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
11113
11114 10
        if ($lang !== null) {
11115 2
            if (self::$SUPPORT['intl'] === true) {
11116 2
                if (self::$INTL_TRANSLITERATOR_LIST === null) {
11117
                    self::$INTL_TRANSLITERATOR_LIST = self::getData('transliterator_list');
11118
                }
11119
11120 2
                $language_code = $lang . '-Upper';
11121 2
                if (!\in_array($language_code, self::$INTL_TRANSLITERATOR_LIST, true)) {
11122
                    /**
11123
                     * @psalm-suppress ImpureFunctionCall - is is only a warning
11124
                     */
11125
                    \trigger_error('UTF8::strtoupper() without intl for special language: ' . $lang, \E_USER_WARNING);
11126
11127
                    $language_code = 'Any-Upper';
11128
                }
11129
11130 2
                return (string) \transliterator_transliterate($language_code, $str);
11131
            }
11132
11133
            /**
11134
             * @psalm-suppress ImpureFunctionCall - is is only a warning
11135
             */
11136
            \trigger_error('UTF8::strtolower() without intl cannot handle the "lang"-parameter: ' . $lang, \E_USER_WARNING);
11137
        }
11138
11139
        // always fallback via symfony polyfill
11140 10
        return \mb_strtoupper($str, $encoding);
11141
    }
11142
11143
    /**
11144
     * Translate characters or replace sub-strings.
11145
     *
11146
     * EXAMPLE:
11147
     * <code>
11148
     * $array = [
11149
     *     'Hello'   => '○●◎',
11150
     *     '中文空白' => 'earth',
11151
     * ];
11152
     * UTF8::strtr('Hello 中文空白', $array); // '○●◎ earth'
11153
     * </code>
11154
     *
11155
     * @see http://php.net/manual/en/function.strtr.php
11156
     *
11157
     * @param string          $str  <p>The string being translated.</p>
11158
     * @param string|string[] $from <p>The string replacing from.</p>
11159
     * @param string|string[] $to   [optional] <p>The string being translated to to.</p>
11160
     *
11161
     * @psalm-pure
11162
     *
11163
     * @return string
11164
     *                <p>This function returns a copy of str, translating all occurrences of each character in "from"
11165
     *                to the corresponding character in "to".</p>
11166
     */
11167 2
    public static function strtr(string $str, $from, $to = ''): string
11168
    {
11169 2
        if ($str === '') {
11170
            return '';
11171
        }
11172
11173 2
        if ($from === $to) {
11174
            return $str;
11175
        }
11176
11177 2
        if ($to !== '') {
11178 2
            if (!\is_array($from)) {
11179 2
                $from = self::str_split($from);
11180
            }
11181
11182 2
            if (!\is_array($to)) {
11183 2
                $to = self::str_split($to);
11184
            }
11185
11186 2
            $count_from = \count($from);
11187 2
            $count_to = \count($to);
11188
11189 2
            if ($count_from !== $count_to) {
11190 2
                if ($count_from > $count_to) {
11191 2
                    $from = \array_slice($from, 0, $count_to);
11192 2
                } elseif ($count_from < $count_to) {
11193 2
                    $to = \array_slice($to, 0, $count_from);
11194
                }
11195
            }
11196
11197 2
            $from = \array_combine($from, $to);
11198 2
            if ($from === false) {
11199
                throw new \InvalidArgumentException('The number of elements for each array isn\'t equal or the arrays are empty: (from: ' . \print_r($from, true) . ' | to: ' . \print_r($to, true) . ')');
0 ignored issues
show
Bug introduced by
Are you sure print_r($to, true) of type string|true can be used in concatenation? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

11199
                throw new \InvalidArgumentException('The number of elements for each array isn\'t equal or the arrays are empty: (from: ' . \print_r($from, true) . ' | to: ' . /** @scrutinizer ignore-type */ \print_r($to, true) . ')');
Loading history...
Bug introduced by
Are you sure print_r($from, true) of type string|true can be used in concatenation? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

11199
                throw new \InvalidArgumentException('The number of elements for each array isn\'t equal or the arrays are empty: (from: ' . /** @scrutinizer ignore-type */ \print_r($from, true) . ' | to: ' . \print_r($to, true) . ')');
Loading history...
11200
            }
11201
        }
11202
11203 2
        if (\is_string($from)) {
11204 2
            return \str_replace($from, $to, $str);
0 ignored issues
show
Bug introduced by
It seems like $to can also be of type array<mixed,string[]>; however, parameter $replace of str_replace() does only seem to accept string|string[], maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

11204
            return \str_replace($from, /** @scrutinizer ignore-type */ $to, $str);
Loading history...
11205
        }
11206
11207 2
        return \strtr($str, $from);
11208
    }
11209
11210
    /**
11211
     * Return the width of a string.
11212
     *
11213
     * INFO: use UTF8::strlen() for the byte-length
11214
     *
11215
     * EXAMPLE: <code>UTF8::strwidth("Iñtërnâtiôn\xE9àlizætiøn")); // 21</code>
11216
     *
11217
     * @param string $str        <p>The input string.</p>
11218
     * @param string $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
11219
     * @param bool   $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
11220
     *
11221
     * @psalm-pure
11222
     *
11223
     * @return int
11224
     */
11225 2
    public static function strwidth(
11226
        string $str,
11227
        string $encoding = 'UTF-8',
11228
        bool $clean_utf8 = false
11229
    ): int {
11230 2
        if ($str === '') {
11231 2
            return 0;
11232
        }
11233
11234 2
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
11235 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
11236
        }
11237
11238 2
        if ($clean_utf8) {
11239
            // iconv and mbstring are not tolerant to invalid encoding
11240
            // further, their behaviour is inconsistent with that of PHP's substr
11241 2
            $str = self::clean($str);
11242
        }
11243
11244
        //
11245
        // fallback via mbstring
11246
        //
11247
11248 2
        if (self::$SUPPORT['mbstring'] === true) {
11249 2
            if ($encoding === 'UTF-8') {
11250 2
                return \mb_strwidth($str);
11251
            }
11252
11253
            return \mb_strwidth($str, $encoding);
11254
        }
11255
11256
        //
11257
        // fallback via vanilla php
11258
        //
11259
11260
        if ($encoding !== 'UTF-8') {
11261
            $str = self::encode('UTF-8', $str, false, $encoding);
11262
        }
11263
11264
        $wide = 0;
11265
        $str = (string) \preg_replace('/[\x{1100}-\x{115F}\x{2329}\x{232A}\x{2E80}-\x{303E}\x{3040}-\x{A4CF}\x{AC00}-\x{D7A3}\x{F900}-\x{FAFF}\x{FE10}-\x{FE19}\x{FE30}-\x{FE6F}\x{FF00}-\x{FF60}\x{FFE0}-\x{FFE6}\x{20000}-\x{2FFFD}\x{30000}-\x{3FFFD}]/u', '', $str, -1, $wide);
11266
11267
        return ($wide << 1) + (int) self::strlen($str);
11268
    }
11269
11270
    /**
11271
     * Get part of a string.
11272
     *
11273
     * EXAMPLE: <code>UTF8::substr('中文空白', 1, 2); // '文空'</code>
11274
     *
11275
     * @see http://php.net/manual/en/function.mb-substr.php
11276
     *
11277
     * @param string   $str        <p>The string being checked.</p>
11278
     * @param int      $offset     <p>The first position used in str.</p>
11279
     * @param int|null $length     [optional] <p>The maximum length of the returned string.</p>
11280
     * @param string   $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
11281
     * @param bool     $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
11282
     *
11283
     * @psalm-pure
11284
     *
11285
     * @return false|string
11286
     *                      The portion of <i>str</i> specified by the <i>offset</i> and
11287
     *                      <i>length</i> parameters.</p><p>If <i>str</i> is shorter than <i>offset</i>
11288
     *                      characters long, <b>FALSE</b> will be returned.
11289
     */
11290 172
    public static function substr(
11291
        string $str,
11292
        int $offset = 0,
11293
        int $length = null,
11294
        string $encoding = 'UTF-8',
11295
        bool $clean_utf8 = false
11296
    ) {
11297
        // empty string
11298 172
        if ($str === '' || $length === 0) {
11299 8
            return '';
11300
        }
11301
11302 168
        if ($clean_utf8) {
11303
            // iconv and mbstring are not tolerant to invalid encoding
11304
            // further, their behaviour is inconsistent with that of PHP's substr
11305 2
            $str = self::clean($str);
11306
        }
11307
11308
        // whole string
11309 168
        if (!$offset && $length === null) {
11310 7
            return $str;
11311
        }
11312
11313 163
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
11314 19
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
11315
        }
11316
11317
        //
11318
        // fallback via mbstring
11319
        //
11320
11321 163
        if (self::$SUPPORT['mbstring'] === true && $encoding === 'UTF-8') {
11322 161
            if ($length === null) {
11323 64
                return \mb_substr($str, $offset);
11324
            }
11325
11326 102
            return \mb_substr($str, $offset, $length);
11327
        }
11328
11329
        //
11330
        // fallback for binary || ascii only
11331
        //
11332
11333
        if (
11334 4
            $encoding === 'CP850'
11335
            ||
11336 4
            $encoding === 'ASCII'
11337
        ) {
11338
            if ($length === null) {
11339
                return \substr($str, $offset);
11340
            }
11341
11342
            return \substr($str, $offset, $length);
11343
        }
11344
11345
        // otherwise we need the string-length
11346 4
        $str_length = 0;
11347 4
        if ($offset || $length === null) {
11348 4
            $str_length = self::strlen($str, $encoding);
11349
        }
11350
11351
        // e.g.: invalid chars + mbstring not installed
11352 4
        if ($str_length === false) {
11353
            return false;
11354
        }
11355
11356
        // empty string
11357 4
        if ($offset === $str_length && !$length) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $length of type integer|null is loosely compared to false; this is ambiguous if the integer can be 0. You might want to explicitly use === null instead.

In PHP, under loose comparison (like ==, or !=, or switch conditions), values of different types might be equal.

For integer values, zero is a special case, in particular the following results might be unexpected:

0   == false // true
0   == null  // true
123 == false // false
123 == null  // false

// It is often better to use strict comparison
0 === false // false
0 === null  // false
Loading history...
11358
            return '';
11359
        }
11360
11361
        // impossible
11362 4
        if ($offset && $offset > $str_length) {
11363
            return '';
11364
        }
11365
11366 4
        $length = $length ?? $str_length;
11367
11368
        if (
11369 4
            $encoding !== 'UTF-8'
11370
            &&
11371 4
            self::$SUPPORT['mbstring'] === false
11372
        ) {
11373
            /**
11374
             * @psalm-suppress ImpureFunctionCall - is is only a warning
11375
             */
11376 2
            \trigger_error('UTF8::substr() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
11377
        }
11378
11379
        //
11380
        // fallback via intl
11381
        //
11382
11383
        if (
11384 4
            $encoding === 'UTF-8' // INFO: "grapheme_substr()" can't handle other encodings
11385
            &&
11386 4
            $offset >= 0 // grapheme_substr() can't handle negative offset
11387
            &&
11388 4
            self::$SUPPORT['intl'] === true
11389
        ) {
11390
            $return_tmp = \grapheme_substr($str, $offset, $length);
11391
            if ($return_tmp !== false) {
11392
                return $return_tmp;
11393
            }
11394
        }
11395
11396
        //
11397
        // fallback via iconv
11398
        //
11399
11400
        if (
11401 4
            $length >= 0 // "iconv_substr()" can't handle negative length
11402
            &&
11403 4
            self::$SUPPORT['iconv'] === true
11404
        ) {
11405
            $return_tmp = \iconv_substr($str, $offset, $length);
11406
            if ($return_tmp !== false) {
11407
                return $return_tmp;
11408
            }
11409
        }
11410
11411
        //
11412
        // fallback for ascii only
11413
        //
11414
11415 4
        if (ASCII::is_ascii($str)) {
11416
            return \substr($str, $offset, $length);
11417
        }
11418
11419
        //
11420
        // fallback via vanilla php
11421
        //
11422
11423
        // split to array, and remove invalid characters
11424
        // &&
11425
        // extract relevant part, and join to make sting again
11426 4
        return \implode('', \array_slice(self::str_split($str), $offset, $length));
11427
    }
11428
11429
    /**
11430
     * Binary-safe comparison of two strings from an offset, up to a length of characters.
11431
     *
11432
     * EXAMPLE: <code>
11433
     * UTF8::substr_compare("○●◎\r", '●◎', 0, 2); // -1
11434
     * UTF8::substr_compare("○●◎\r", '◎●', 1, 2); // 1
11435
     * UTF8::substr_compare("○●◎\r", '●◎', 1, 2); // 0
11436
     * </code>
11437
     *
11438
     * @param string   $str1               <p>The main string being compared.</p>
11439
     * @param string   $str2               <p>The secondary string being compared.</p>
11440
     * @param int      $offset             [optional] <p>The start position for the comparison. If negative, it starts
11441
     *                                     counting from the end of the string.</p>
11442
     * @param int|null $length             [optional] <p>The length of the comparison. The default value is the largest
11443
     *                                     of the length of the str compared to the length of main_str less the
11444
     *                                     offset.</p>
11445
     * @param bool     $case_insensitivity [optional] <p>If case_insensitivity is TRUE, comparison is case
11446
     *                                     insensitive.</p>
11447
     * @param string   $encoding           [optional] <p>Set the charset for e.g. "mb_" function</p>
11448
     *
11449
     * @psalm-pure
11450
     *
11451
     * @return int
11452
     *             <strong>&lt; 0</strong> if str1 is less than str2;<br>
11453
     *             <strong>&gt; 0</strong> if str1 is greater than str2,<br>
11454
     *             <strong>0</strong> if they are equal
11455
     */
11456 2
    public static function substr_compare(
11457
        string $str1,
11458
        string $str2,
11459
        int $offset = 0,
11460
        int $length = null,
11461
        bool $case_insensitivity = false,
11462
        string $encoding = 'UTF-8'
11463
    ): int {
11464
        if (
11465 2
            $offset !== 0
11466
            ||
11467 2
            $length !== null
11468
        ) {
11469 2
            if ($encoding === 'UTF-8') {
11470 2
                if ($length === null) {
11471 2
                    $str1 = (string) \mb_substr($str1, $offset);
11472
                } else {
11473 2
                    $str1 = (string) \mb_substr($str1, $offset, $length);
11474
                }
11475 2
                $str2 = (string) \mb_substr($str2, 0, (int) self::strlen($str1));
11476
            } else {
11477
                $encoding = self::normalize_encoding($encoding, 'UTF-8');
11478
11479
                $str1 = (string) self::substr($str1, $offset, $length, $encoding);
11480
                $str2 = (string) self::substr($str2, 0, (int) self::strlen($str1), $encoding);
11481
            }
11482
        }
11483
11484 2
        if ($case_insensitivity) {
11485 2
            return self::strcasecmp($str1, $str2, $encoding);
11486
        }
11487
11488 2
        return self::strcmp($str1, $str2);
11489
    }
11490
11491
    /**
11492
     * Count the number of substring occurrences.
11493
     *
11494
     * EXAMPLE: <code>UTF8::substr_count('中文空白', '文空', 1, 2); // 1</code>
11495
     *
11496
     * @see http://php.net/manual/en/function.substr-count.php
11497
     *
11498
     * @param string   $haystack   <p>The string to search in.</p>
11499
     * @param string   $needle     <p>The substring to search for.</p>
11500
     * @param int      $offset     [optional] <p>The offset where to start counting.</p>
11501
     * @param int|null $length     [optional] <p>
11502
     *                             The maximum length after the specified offset to search for the
11503
     *                             substring. It outputs a warning if the offset plus the length is
11504
     *                             greater than the haystack length.
11505
     *                             </p>
11506
     * @param string   $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
11507
     * @param bool     $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
11508
     *
11509
     * @psalm-pure
11510
     *
11511
     * @return false|int
11512
     *                   <p>This functions returns an integer or false if there isn't a string.</p>
11513
     */
11514 5
    public static function substr_count(
11515
        string $haystack,
11516
        string $needle,
11517
        int $offset = 0,
11518
        int $length = null,
11519
        string $encoding = 'UTF-8',
11520
        bool $clean_utf8 = false
11521
    ) {
11522 5
        if ($needle === '') {
11523 2
            return false;
11524
        }
11525
11526 5
        if ($haystack === '') {
11527 2
            if (\PHP_VERSION_ID >= 80000) {
11528
                return 0;
11529
            }
11530
11531 2
            return 0;
11532
        }
11533
11534 5
        if ($length === 0) {
11535 2
            return 0;
11536
        }
11537
11538 5
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
11539 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
11540
        }
11541
11542 5
        if ($clean_utf8) {
11543
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
11544
            // if invalid characters are found in $haystack before $needle
11545
            $needle = self::clean($needle);
11546
            $haystack = self::clean($haystack);
11547
        }
11548
11549 5
        if ($offset || $length > 0) {
11550 2
            if ($length === null) {
11551 2
                $length_tmp = self::strlen($haystack, $encoding);
11552 2
                if ($length_tmp === false) {
11553
                    return false;
11554
                }
11555 2
                $length = $length_tmp;
11556
            }
11557
11558 2
            if ($encoding === 'UTF-8') {
11559 2
                $haystack = (string) \mb_substr($haystack, $offset, $length);
11560
            } else {
11561 2
                $haystack = (string) \mb_substr($haystack, $offset, $length, $encoding);
11562
            }
11563
        }
11564
11565
        if (
11566 5
            $encoding !== 'UTF-8'
11567
            &&
11568 5
            self::$SUPPORT['mbstring'] === false
11569
        ) {
11570
            /**
11571
             * @psalm-suppress ImpureFunctionCall - is is only a warning
11572
             */
11573
            \trigger_error('UTF8::substr_count() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
11574
        }
11575
11576 5
        if (self::$SUPPORT['mbstring'] === true) {
11577 5
            if ($encoding === 'UTF-8') {
11578 5
                return \mb_substr_count($haystack, $needle);
11579
            }
11580
11581 2
            return \mb_substr_count($haystack, $needle, $encoding);
11582
        }
11583
11584
        \preg_match_all('/' . \preg_quote($needle, '/') . '/us', $haystack, $matches, \PREG_SET_ORDER);
11585
11586
        return \count($matches);
11587
    }
11588
11589
    /**
11590
     * Count the number of substring occurrences.
11591
     *
11592
     * @param string   $haystack <p>
11593
     *                           The string being checked.
11594
     *                           </p>
11595
     * @param string   $needle   <p>
11596
     *                           The string being found.
11597
     *                           </p>
11598
     * @param int      $offset   [optional] <p>
11599
     *                           The offset where to start counting
11600
     *                           </p>
11601
     * @param int|null $length   [optional] <p>
11602
     *                           The maximum length after the specified offset to search for the
11603
     *                           substring. It outputs a warning if the offset plus the length is
11604
     *                           greater than the haystack length.
11605
     *                           </p>
11606
     *
11607
     * @psalm-pure
11608
     *
11609
     * @return false|int
11610
     *                   <p>The number of times the
11611
     *                   needle substring occurs in the
11612
     *                   haystack string.</p>
11613
     */
11614 4
    public static function substr_count_in_byte(
11615
        string $haystack,
11616
        string $needle,
11617
        int $offset = 0,
11618
        int $length = null
11619
    ) {
11620 4
        if ($haystack === '' || $needle === '') {
11621 1
            return 0;
11622
        }
11623
11624
        if (
11625 3
            ($offset || $length !== null)
11626
            &&
11627 3
            self::$SUPPORT['mbstring_func_overload'] === true
11628
        ) {
11629
            if ($length === null) {
11630
                $length_tmp = self::strlen($haystack);
11631
                if ($length_tmp === false) {
11632
                    return false;
11633
                }
11634
                $length = $length_tmp;
11635
            }
11636
11637
            if (
11638
                (
11639
                    $length !== 0
11640
                    &&
11641
                    $offset !== 0
11642
                )
11643
                &&
11644
                ($length + $offset) <= 0
11645
                &&
11646
                \PHP_VERSION_ID < 71000 // output from "substr_count()" have changed in PHP 7.1
11647
            ) {
11648
                return false;
11649
            }
11650
11651
            /** @var false|string $haystack_tmp - needed for PhpStan (stubs error) */
11652
            $haystack_tmp = \substr($haystack, $offset, $length);
11653
            if ($haystack_tmp === false) {
11654
                $haystack_tmp = '';
11655
            }
11656
            $haystack = (string) $haystack_tmp;
11657
        }
11658
11659 3
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
11660
            // "mb_" is available if overload is used, so use it ...
11661
            return \mb_substr_count($haystack, $needle, 'CP850'); // 8-BIT
11662
        }
11663
11664 3
        if ($length === null) {
11665 3
            return \substr_count($haystack, $needle, $offset);
11666
        }
11667
11668
        return \substr_count($haystack, $needle, $offset, $length);
11669
    }
11670
11671
    /**
11672
     * Returns the number of occurrences of $substring in the given string.
11673
     * By default, the comparison is case-sensitive, but can be made insensitive
11674
     * by setting $case_sensitive to false.
11675
     *
11676
     * @param string $str            <p>The input string.</p>
11677
     * @param string $substring      <p>The substring to search for.</p>
11678
     * @param bool   $case_sensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
11679
     * @param string $encoding       [optional] <p>Set the charset for e.g. "mb_" function</p>
11680
     *
11681
     * @psalm-pure
11682
     *
11683
     * @return int
11684
     */
11685 15
    public static function substr_count_simple(
11686
        string $str,
11687
        string $substring,
11688
        bool $case_sensitive = true,
11689
        string $encoding = 'UTF-8'
11690
    ): int {
11691 15
        if ($str === '' || $substring === '') {
11692 2
            return 0;
11693
        }
11694
11695 13
        if ($encoding === 'UTF-8') {
11696 7
            if ($case_sensitive) {
11697
                return (int) \mb_substr_count($str, $substring);
11698
            }
11699
11700 7
            return (int) \mb_substr_count(
11701 7
                \mb_strtoupper($str),
11702 7
                \mb_strtoupper($substring)
11703
            );
11704
        }
11705
11706 6
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
11707
11708 6
        if ($case_sensitive) {
11709 3
            return (int) \mb_substr_count($str, $substring, $encoding);
11710
        }
11711
11712 3
        return (int) \mb_substr_count(
11713 3
            self::strtocasefold($str, true, false, $encoding, null, false),
11714 3
            self::strtocasefold($substring, true, false, $encoding, null, false),
11715 3
            $encoding
11716
        );
11717
    }
11718
11719
    /**
11720
     * Removes a prefix ($needle) from the beginning of the string ($haystack), case-insensitive.
11721
     *
11722
     * EXMAPLE: <code>
11723
     * UTF8::substr_ileft('ΚόσμεMiddleEnd', 'Κόσμε'); // 'MiddleEnd'
11724
     * UTF8::substr_ileft('ΚόσμεMiddleEnd', 'κόσμε'); // 'MiddleEnd'
11725
     * </code>
11726
     *
11727
     * @param string $haystack <p>The string to search in.</p>
11728
     * @param string $needle   <p>The substring to search for.</p>
11729
     *
11730
     * @psalm-pure
11731
     *
11732
     * @return string
11733
     *                <p>Return the sub-string.</p>
11734
     */
11735 2
    public static function substr_ileft(string $haystack, string $needle): string
11736
    {
11737 2
        if ($haystack === '') {
11738 2
            return '';
11739
        }
11740
11741 2
        if ($needle === '') {
11742 2
            return $haystack;
11743
        }
11744
11745 2
        if (self::str_istarts_with($haystack, $needle)) {
11746 2
            $haystack = (string) \mb_substr($haystack, (int) self::strlen($needle));
11747
        }
11748
11749 2
        return $haystack;
11750
    }
11751
11752
    /**
11753
     * Get part of a string process in bytes.
11754
     *
11755
     * @param string   $str    <p>The string being checked.</p>
11756
     * @param int      $offset <p>The first position used in str.</p>
11757
     * @param int|null $length [optional] <p>The maximum length of the returned string.</p>
11758
     *
11759
     * @psalm-pure
11760
     *
11761
     * @return false|string
11762
     *                      The portion of <i>str</i> specified by the <i>offset</i> and
11763
     *                      <i>length</i> parameters.</p><p>If <i>str</i> is shorter than <i>offset</i>
11764
     *                      characters long, <b>FALSE</b> will be returned.
11765
     */
11766 1
    public static function substr_in_byte(string $str, int $offset = 0, int $length = null)
11767
    {
11768
        // empty string
11769 1
        if ($str === '' || $length === 0) {
11770
            return '';
11771
        }
11772
11773
        // whole string
11774 1
        if (!$offset && $length === null) {
11775
            return $str;
11776
        }
11777
11778 1
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
11779
            // "mb_" is available if overload is used, so use it ...
11780
            return \mb_substr($str, $offset, $length, 'CP850'); // 8-BIT
11781
        }
11782
11783 1
        return \substr($str, $offset, $length ?? 2147483647);
11784
    }
11785
11786
    /**
11787
     * Removes a suffix ($needle) from the end of the string ($haystack), case-insensitive.
11788
     *
11789
     * EXAMPLE: <code>
11790
     * UTF8::substr_iright('BeginMiddleΚόσμε', 'Κόσμε'); // 'BeginMiddle'
11791
     * UTF8::substr_iright('BeginMiddleΚόσμε', 'κόσμε'); // 'BeginMiddle'
11792
     * </code>
11793
     *
11794
     * @param string $haystack <p>The string to search in.</p>
11795
     * @param string $needle   <p>The substring to search for.</p>
11796
     *
11797
     * @psalm-pure
11798
     *
11799
     * @return string
11800
     *                <p>Return the sub-string.<p>
11801
     */
11802 2
    public static function substr_iright(string $haystack, string $needle): string
11803
    {
11804 2
        if ($haystack === '') {
11805 2
            return '';
11806
        }
11807
11808 2
        if ($needle === '') {
11809 2
            return $haystack;
11810
        }
11811
11812 2
        if (self::str_iends_with($haystack, $needle)) {
11813 2
            $haystack = (string) \mb_substr($haystack, 0, (int) self::strlen($haystack) - (int) self::strlen($needle));
11814
        }
11815
11816 2
        return $haystack;
11817
    }
11818
11819
    /**
11820
     * Removes a prefix ($needle) from the beginning of the string ($haystack).
11821
     *
11822
     * EXAMPLE: <code>
11823
     * UTF8::substr_left('ΚόσμεMiddleEnd', 'Κόσμε'); // 'MiddleEnd'
11824
     * UTF8::substr_left('ΚόσμεMiddleEnd', 'κόσμε'); // 'ΚόσμεMiddleEnd'
11825
     * </code>
11826
     *
11827
     * @param string $haystack <p>The string to search in.</p>
11828
     * @param string $needle   <p>The substring to search for.</p>
11829
     *
11830
     * @psalm-pure
11831
     *
11832
     * @return string
11833
     *                <p>Return the sub-string.</p>
11834
     */
11835 2
    public static function substr_left(string $haystack, string $needle): string
11836
    {
11837 2
        if ($haystack === '') {
11838 2
            return '';
11839
        }
11840
11841 2
        if ($needle === '') {
11842 2
            return $haystack;
11843
        }
11844
11845 2
        if (self::str_starts_with($haystack, $needle)) {
11846 2
            $haystack = (string) \mb_substr($haystack, (int) self::strlen($needle));
11847
        }
11848
11849 2
        return $haystack;
11850
    }
11851
11852
    /**
11853
     * Replace text within a portion of a string.
11854
     *
11855
     * EXAMPLE: <code>UTF8::substr_replace(array('Iñtërnâtiônàlizætiøn', 'foo'), 'æ', 1); // array('Iæñtërnâtiônàlizætiøn', 'fæoo')</code>
11856
     *
11857
     * source: https://gist.github.com/stemar/8287074
11858
     *
11859
     * @param string|string[] $str         <p>The input string or an array of stings.</p>
11860
     * @param string|string[] $replacement <p>The replacement string or an array of stings.</p>
11861
     * @param int|int[]       $offset      <p>
11862
     *                                     If start is positive, the replacing will begin at the start'th offset
11863
     *                                     into string.
11864
     *                                     <br><br>
11865
     *                                     If start is negative, the replacing will begin at the start'th character
11866
     *                                     from the end of string.
11867
     *                                     </p>
11868
     * @param int|int[]|null  $length      [optional] <p>If given and is positive, it represents the length of the
11869
     *                                     portion of string which is to be replaced. If it is negative, it
11870
     *                                     represents the number of characters from the end of string at which to
11871
     *                                     stop replacing. If it is not given, then it will default to strlen(
11872
     *                                     string ); i.e. end the replacing at the end of string. Of course, if
11873
     *                                     length is zero then this function will have the effect of inserting
11874
     *                                     replacement into string at the given start offset.</p>
11875
     * @param string          $encoding    [optional] <p>Set the charset for e.g. "mb_" function</p>
11876
     *
11877
     * @psalm-pure
11878
     *
11879
     * @return string|string[]
11880
     *                         <p>The result string is returned. If string is an array then array is returned.</p>
11881
     *
11882
     * @template TSubstrReplace
11883
     * @phpstan-param TSubstrReplace $str
11884
     * @phpstan-return TSubstrReplace
11885
     */
11886 10
    public static function substr_replace(
11887
        $str,
11888
        $replacement,
11889
        $offset,
11890
        $length = null,
11891
        string $encoding = 'UTF-8'
11892
    ) {
11893 10
        if (\is_array($str)) {
11894 1
            $num = \count($str);
11895
11896
            // the replacement
11897 1
            if (\is_array($replacement)) {
11898 1
                $replacement = \array_slice($replacement, 0, $num);
11899
            } else {
11900 1
                $replacement = \array_pad([$replacement], $num, $replacement);
11901
            }
11902
11903
            // the offset
11904 1
            if (\is_array($offset)) {
11905 1
                $offset = \array_slice($offset, 0, $num);
11906 1
                foreach ($offset as &$value_tmp) {
11907 1
                    $value_tmp = (int) $value_tmp === $value_tmp ? $value_tmp : 0;
11908
                }
11909 1
                unset($value_tmp);
11910
            } else {
11911 1
                $offset = \array_pad([$offset], $num, $offset);
11912
            }
11913
11914
            // the length
11915 1
            if ($length === null) {
11916 1
                $length = \array_fill(0, $num, 0);
11917 1
            } elseif (\is_array($length)) {
11918 1
                $length = \array_slice($length, 0, $num);
11919 1
                foreach ($length as &$value_tmp_V2) {
11920 1
                    $value_tmp_V2 = (int) $value_tmp_V2 === $value_tmp_V2 ? $value_tmp_V2 : $num;
11921
                }
11922 1
                unset($value_tmp_V2);
11923
            } else {
11924 1
                $length = \array_pad([$length], $num, $length);
11925
            }
11926
11927
            // recursive call
11928
            /** @phpstan-ignore-next-line - phpstan currently can't handle recursive calls */
11929 1
            return \array_map([self::class, 'substr_replace'], $str, $replacement, $offset, $length);
11930
        }
11931
11932 10
        if (\is_array($replacement)) {
11933 1
            if ($replacement !== []) {
11934 1
                $replacement = $replacement[0];
11935
            } else {
11936 1
                $replacement = '';
11937
            }
11938
        }
11939
11940
        // init
11941 10
        $str = (string) $str;
11942 10
        $replacement = (string) $replacement;
11943
11944 10
        if (\is_array($length)) {
11945
            throw new \InvalidArgumentException('Parameter "$length" can only be an array, if "$str" is also an array.');
11946
        }
11947
11948 10
        if (\is_array($offset)) {
11949
            throw new \InvalidArgumentException('Parameter "$offset" can only be an array, if "$str" is also an array.');
11950
        }
11951
11952 10
        if ($str === '') {
11953 1
            return $replacement;
11954
        }
11955
11956 9
        if (self::$SUPPORT['mbstring'] === true) {
11957 9
            $string_length = (int) self::strlen($str, $encoding);
11958
11959 9
            if ($offset < 0) {
11960 1
                $offset = (int) \max(0, $string_length + $offset);
11961 9
            } elseif ($offset > $string_length) {
11962 1
                $offset = $string_length;
11963
            }
11964
11965 9
            if ($length !== null && $length < 0) {
11966 1
                $length = (int) \max(0, $string_length - $offset + $length);
11967 9
            } elseif ($length === null || $length > $string_length) {
11968 4
                $length = $string_length;
11969
            }
11970
11971 9
            if (($offset + $length) > $string_length) {
11972 4
                $length = $string_length - $offset;
11973
            }
11974
11975 9
            return ((string) \mb_substr($str, 0, $offset, $encoding)) .
11976 9
                   $replacement .
11977 9
                   ((string) \mb_substr($str, $offset + $length, $string_length - $offset - $length, $encoding));
11978
        }
11979
11980
        //
11981
        // fallback for ascii only
11982
        //
11983
11984
        if (ASCII::is_ascii($str)) {
11985
            return ($length === null) ?
11986
                \substr_replace($str, $replacement, $offset) :
11987
                \substr_replace($str, $replacement, $offset, $length);
11988
        }
11989
11990
        //
11991
        // fallback via vanilla php
11992
        //
11993
11994
        \preg_match_all('/./us', $str, $str_matches);
11995
        \preg_match_all('/./us', $replacement, $replacement_matches);
11996
11997
        if ($length === null) {
11998
            $length_tmp = self::strlen($str, $encoding);
11999
            if ($length_tmp === false) {
12000
                // e.g.: non mbstring support + invalid chars
12001
                return '';
12002
            }
12003
            $length = $length_tmp;
12004
        }
12005
12006
        \array_splice($str_matches[0], $offset, $length, $replacement_matches[0]);
12007
12008
        return \implode('', $str_matches[0]);
12009
    }
12010
12011
    /**
12012
     * Removes a suffix ($needle) from the end of the string ($haystack).
12013
     *
12014
     * EXAMPLE: <code>
12015
     * UTF8::substr_right('BeginMiddleΚόσμε', 'Κόσμε'); // 'BeginMiddle'
12016
     * UTF8::substr_right('BeginMiddleΚόσμε', 'κόσμε'); // 'BeginMiddleΚόσμε'
12017
     * </code>
12018
     *
12019
     * @param string $haystack <p>The string to search in.</p>
12020
     * @param string $needle   <p>The substring to search for.</p>
12021
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
12022
     *
12023
     * @psalm-pure
12024
     *
12025
     * @return string
12026
     *                <p>Return the sub-string.</p>
12027
     */
12028 2
    public static function substr_right(
12029
        string $haystack,
12030
        string $needle,
12031
        string $encoding = 'UTF-8'
12032
    ): string {
12033 2
        if ($haystack === '') {
12034 2
            return '';
12035
        }
12036
12037 2
        if ($needle === '') {
12038 2
            return $haystack;
12039
        }
12040
12041
        if (
12042 2
            $encoding === 'UTF-8'
12043
            &&
12044 2
            \substr($haystack, -\strlen($needle)) === $needle
12045
        ) {
12046 2
            return (string) \mb_substr($haystack, 0, (int) \mb_strlen($haystack) - (int) \mb_strlen($needle));
12047
        }
12048
12049 2
        if (\substr($haystack, -\strlen($needle)) === $needle) {
12050
            return (string) self::substr(
12051
                $haystack,
12052
                0,
12053
                (int) self::strlen($haystack, $encoding) - (int) self::strlen($needle, $encoding),
12054
                $encoding
12055
            );
12056
        }
12057
12058 2
        return $haystack;
12059
    }
12060
12061
    /**
12062
     * Returns a case swapped version of the string.
12063
     *
12064
     * EXAMPLE: <code>UTF8::swapCase('déJÀ σσς iıII'); // 'DÉjà ΣΣΣ IIii'</code>
12065
     *
12066
     * @param string $str        <p>The input string.</p>
12067
     * @param string $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
12068
     * @param bool   $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
12069
     *
12070
     * @psalm-pure
12071
     *
12072
     * @return string
12073
     *                <p>Each character's case swapped.</p>
12074
     */
12075 6
    public static function swapCase(string $str, string $encoding = 'UTF-8', bool $clean_utf8 = false): string
12076
    {
12077 6
        if ($str === '') {
12078 1
            return '';
12079
        }
12080
12081 6
        if ($clean_utf8) {
12082
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
12083
            // if invalid characters are found in $haystack before $needle
12084 2
            $str = self::clean($str);
12085
        }
12086
12087 6
        if ($encoding === 'UTF-8') {
12088 4
            return (string) (\mb_strtolower($str) ^ \mb_strtoupper($str) ^ $str);
12089
        }
12090
12091 4
        return (string) (self::strtolower($str, $encoding) ^ self::strtoupper($str, $encoding) ^ $str);
12092
    }
12093
12094
    /**
12095
     * Checks whether symfony-polyfills are used.
12096
     *
12097
     * @psalm-pure
12098
     *
12099
     * @return bool
12100
     *              <p><strong>true</strong> if in use, <strong>false</strong> otherwise</p>
12101
     *
12102
     * @internal <p>Please do not use it anymore, we will make is private in next major version.</p>
12103
     */
12104
    public static function symfony_polyfill_used(): bool
12105
    {
12106
        // init
12107
        $return = false;
12108
12109
        $return_tmp = \extension_loaded('mbstring');
12110
        if (!$return_tmp && \function_exists('mb_strlen')) {
12111
            $return = true;
12112
        }
12113
12114
        $return_tmp = \extension_loaded('iconv');
12115
        if (!$return_tmp && \function_exists('iconv')) {
12116
            $return = true;
12117
        }
12118
12119
        return $return;
12120
    }
12121
12122
    /**
12123
     * @param string $str
12124
     * @param int    $tab_length
12125
     *
12126
     * @psalm-pure
12127
     *
12128
     * @return string
12129
     */
12130 6
    public static function tabs_to_spaces(string $str, int $tab_length = 4): string
12131
    {
12132 6
        if ($tab_length === 4) {
12133 3
            $spaces = '    ';
12134 3
        } elseif ($tab_length === 2) {
12135 1
            $spaces = '  ';
12136
        } else {
12137 2
            $spaces = \str_repeat(' ', $tab_length);
12138
        }
12139
12140 6
        return \str_replace("\t", $spaces, $str);
12141
    }
12142
12143
    /**
12144
     * Converts the first character of each word in the string to uppercase
12145
     * and all other chars to lowercase.
12146
     *
12147
     * @param string      $str                           <p>The input string.</p>
12148
     * @param string      $encoding                      [optional] <p>Set the charset for e.g. "mb_" function</p>
12149
     * @param bool        $clean_utf8                    [optional] <p>Remove non UTF-8 chars from the string.</p>
12150
     * @param string|null $lang                          [optional] <p>Set the language for special cases: az, el, lt,
12151
     *                                                   tr</p>
12152
     * @param bool        $try_to_keep_the_string_length [optional] <p>true === try to keep the string length: e.g. ẞ
12153
     *                                                   -> ß</p>
12154
     *
12155
     * @psalm-pure
12156
     *
12157
     * @return string
12158
     *                <p>A string with all characters of $str being title-cased.</p>
12159
     */
12160 5
    public static function titlecase(
12161
        string $str,
12162
        string $encoding = 'UTF-8',
12163
        bool $clean_utf8 = false,
12164
        string $lang = null,
12165
        bool $try_to_keep_the_string_length = false
12166
    ): string {
12167 5
        if ($clean_utf8) {
12168
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
12169
            // if invalid characters are found in $haystack before $needle
12170
            $str = self::clean($str);
12171
        }
12172
12173
        if (
12174 5
            $lang === null
12175
            &&
12176 5
            !$try_to_keep_the_string_length
12177
        ) {
12178 5
            if ($encoding === 'UTF-8') {
12179 3
                return \mb_convert_case($str, \MB_CASE_TITLE);
12180
            }
12181
12182 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
12183
12184 2
            return \mb_convert_case($str, \MB_CASE_TITLE, $encoding);
12185
        }
12186
12187
        return self::str_titleize(
12188
            $str,
12189
            null,
12190
            $encoding,
12191
            false,
12192
            $lang,
12193
            $try_to_keep_the_string_length,
12194
            false
12195
        );
12196
    }
12197
12198
    /**
12199
     * Convert a string into ASCII.
12200
     *
12201
     * EXAMPLE: <code>UTF8::to_ascii('déjà σσς iıii'); // 'deja sss iiii'</code>
12202
     *
12203
     * @param string $str     <p>The input string.</p>
12204
     * @param string $unknown [optional] <p>Character use if character unknown. (default is ?)</p>
12205
     * @param bool   $strict  [optional] <p>Use "transliterator_transliterate()" from PHP-Intl | WARNING: bad
12206
     *                        performance</p>
12207
     *
12208
     * @psalm-pure
12209
     *
12210
     * @return string
12211
     */
12212 37
    public static function to_ascii(
12213
        string $str,
12214
        string $unknown = '?',
12215
        bool $strict = false
12216
    ): string {
12217 37
        return ASCII::to_transliterate($str, $unknown, $strict);
12218
    }
12219
12220
    /**
12221
     * @param bool|float|int|string $str
12222
     *
12223
     * @psalm-pure
12224
     *
12225
     * @return bool
12226
     */
12227 25
    public static function to_boolean($str): bool
12228
    {
12229
        // init
12230 25
        $str = (string) $str;
12231
12232 25
        if ($str === '') {
12233 2
            return false;
12234
        }
12235
12236
        // Info: http://php.net/manual/en/filter.filters.validate.php
12237
        $map = [
12238 23
            'true'  => true,
12239
            '1'     => true,
12240
            'on'    => true,
12241
            'yes'   => true,
12242
            'false' => false,
12243
            '0'     => false,
12244
            'off'   => false,
12245
            'no'    => false,
12246
        ];
12247
12248 23
        if (isset($map[$str])) {
12249 13
            return $map[$str];
12250
        }
12251
12252 10
        $key = \strtolower($str);
12253 10
        if (isset($map[$key])) {
12254 2
            return $map[$key];
12255
        }
12256
12257 8
        if (\is_numeric($str)) {
12258 6
            return ((float) $str) > 0;
12259
        }
12260
12261 2
        return (bool) \trim($str);
12262
    }
12263
12264
    /**
12265
     * Convert given string to safe filename (and keep string case).
12266
     *
12267
     * @param string $str
12268
     * @param bool   $use_transliterate No transliteration, conversion etc. is done by default - unsafe characters are
12269
     *                                  simply replaced with hyphen.
12270
     * @param string $fallback_char
12271
     *
12272
     * @psalm-pure
12273
     *
12274
     * @return string
12275
     */
12276 1
    public static function to_filename(
12277
        string $str,
12278
        bool $use_transliterate = false,
12279
        string $fallback_char = '-'
12280
    ): string {
12281 1
        return ASCII::to_filename(
12282 1
            $str,
12283
            $use_transliterate,
12284
            $fallback_char
12285
        );
12286
    }
12287
12288
    /**
12289
     * Convert a string into "ISO-8859"-encoding (Latin-1).
12290
     *
12291
     * EXAMPLE: <code>UTF8::to_utf8(UTF8::to_iso8859('  -ABC-中文空白-  ')); // '  -ABC-????-  '</code>
12292
     *
12293
     * @param string|string[] $str
12294
     *
12295
     * @psalm-pure
12296
     *
12297
     * @return string|string[]
12298
     *
12299
     * @template TToIso8859
12300
     * @phpstan-param TToIso8859 $str
12301
     * @phpstan-return TToIso8859
12302
     */
12303 8
    public static function to_iso8859($str)
12304
    {
12305 8
        if (\is_array($str)) {
12306 2
            foreach ($str as &$v) {
12307 2
                $v = self::to_iso8859($v);
12308
            }
12309
12310 2
            return $str;
12311
        }
12312
12313 8
        $str = (string) $str;
12314 8
        if ($str === '') {
12315 2
            return '';
12316
        }
12317
12318 8
        return self::utf8_decode($str);
12319
    }
12320
12321
    /**
12322
     * This function leaves UTF-8 characters alone, while converting almost all non-UTF8 to UTF8.
12323
     *
12324
     * <ul>
12325
     * <li>It decode UTF-8 codepoints and Unicode escape sequences.</li>
12326
     * <li>It assumes that the encoding of the original string is either WINDOWS-1252 or ISO-8859.</li>
12327
     * <li>WARNING: It does not remove invalid UTF-8 characters, so you maybe need to use "UTF8::clean()" for this
12328
     * case.</li>
12329
     * </ul>
12330
     *
12331
     * EXAMPLE: <code>UTF8::to_utf8(["\u0063\u0061\u0074"]); // array('cat')</code>
12332
     *
12333
     * @param string|string[] $str                        <p>Any string or array of strings.</p>
12334
     * @param bool            $decode_html_entity_to_utf8 <p>Set to true, if you need to decode html-entities.</p>
12335
     *
12336
     * @psalm-pure
12337
     *
12338
     * @return string|string[]
12339
     *                         <p>The UTF-8 encoded string</p>
12340
     *
12341
     * @template TToUtf8
12342
     * @phpstan-param TToUtf8 $str
12343
     * @phpstan-return TToUtf8
12344
     */
12345 42
    public static function to_utf8($str, bool $decode_html_entity_to_utf8 = false)
12346
    {
12347 42
        if (\is_array($str)) {
12348 4
            foreach ($str as &$v) {
12349 4
                $v = self::to_utf8_string($v, $decode_html_entity_to_utf8);
12350
            }
12351
12352
            /** @phpstan-var TToUtf8 $str */
12353 4
            return $str;
12354
        }
12355
12356
        /** @phpstan-var TToUtf8 $str */
12357 42
        $str = self::to_utf8_string($str, $decode_html_entity_to_utf8);
12358
12359 42
        return $str;
12360
    }
12361
12362
    /**
12363
     * This function leaves UTF-8 characters alone, while converting almost all non-UTF8 to UTF8.
12364
     *
12365
     * <ul>
12366
     * <li>It decode UTF-8 codepoints and Unicode escape sequences.</li>
12367
     * <li>It assumes that the encoding of the original string is either WINDOWS-1252 or ISO-8859.</li>
12368
     * <li>WARNING: It does not remove invalid UTF-8 characters, so you maybe need to use "UTF8::clean()" for this
12369
     * case.</li>
12370
     * </ul>
12371
     *
12372
     * EXAMPLE: <code>UTF8::to_utf8_string("\u0063\u0061\u0074"); // 'cat'</code>
12373
     *
12374
     * @param string $str                        <p>Any string.</p>
12375
     * @param bool   $decode_html_entity_to_utf8 <p>Set to true, if you need to decode html-entities.</p>
12376
     *
12377
     * @psalm-pure
12378
     *
12379
     * @return string
12380
     *                <p>The UTF-8 encoded string</p>
12381
     */
12382 42
    public static function to_utf8_string(string $str, bool $decode_html_entity_to_utf8 = false): string
12383
    {
12384 42
        if ($str === '') {
12385 7
            return $str;
12386
        }
12387
12388 42
        $max = \strlen($str);
12389 42
        $buf = '';
12390
12391 42
        for ($i = 0; $i < $max; ++$i) {
12392 42
            $c1 = $str[$i];
12393
12394 42
            if ($c1 >= "\xC0") { // should be converted to UTF8, if it's not UTF8 already
12395
12396 38
                if ($c1 <= "\xDF") { // looks like 2 bytes UTF8
12397
12398 35
                    $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
12399
12400 35
                    if ($c2 >= "\x80" && $c2 <= "\xBF") { // yeah, almost sure it's UTF8 already
12401 21
                        $buf .= $c1 . $c2;
12402 21
                        ++$i;
12403
                    } else { // not valid UTF8 - convert it
12404 35
                        $buf .= self::to_utf8_convert_helper($c1);
12405
                    }
12406 35
                } elseif ($c1 >= "\xE0" && $c1 <= "\xEF") { // looks like 3 bytes UTF8
12407
12408 34
                    $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
12409 34
                    $c3 = $i + 2 >= $max ? "\x00" : $str[$i + 2];
12410
12411 34
                    if ($c2 >= "\x80" && $c2 <= "\xBF" && $c3 >= "\x80" && $c3 <= "\xBF") { // yeah, almost sure it's UTF8 already
12412 17
                        $buf .= $c1 . $c2 . $c3;
12413 17
                        $i += 2;
12414
                    } else { // not valid UTF8 - convert it
12415 34
                        $buf .= self::to_utf8_convert_helper($c1);
12416
                    }
12417 28
                } elseif ($c1 >= "\xF0" && $c1 <= "\xF7") { // looks like 4 bytes UTF8
12418
12419 28
                    $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
12420 28
                    $c3 = $i + 2 >= $max ? "\x00" : $str[$i + 2];
12421 28
                    $c4 = $i + 3 >= $max ? "\x00" : $str[$i + 3];
12422
12423 28
                    if ($c2 >= "\x80" && $c2 <= "\xBF" && $c3 >= "\x80" && $c3 <= "\xBF" && $c4 >= "\x80" && $c4 <= "\xBF") { // yeah, almost sure it's UTF8 already
12424 10
                        $buf .= $c1 . $c2 . $c3 . $c4;
12425 10
                        $i += 3;
12426
                    } else { // not valid UTF8 - convert it
12427 28
                        $buf .= self::to_utf8_convert_helper($c1);
12428
                    }
12429
                } else { // doesn't look like UTF8, but should be converted
12430
12431 38
                    $buf .= self::to_utf8_convert_helper($c1);
12432
                }
12433 40
            } elseif (($c1 & "\xC0") === "\x80") { // needs conversion
12434
12435 4
                $buf .= self::to_utf8_convert_helper($c1);
12436
            } else { // it doesn't need conversion
12437
12438 40
                $buf .= $c1;
12439
            }
12440
        }
12441
12442
        // decode unicode escape sequences + unicode surrogate pairs
12443 42
        $buf = \preg_replace_callback(
12444 42
            '/\\\\u([dD][89abAB][0-9a-fA-F]{2})\\\\u([dD][cdefCDEF][\da-fA-F]{2})|\\\\u([0-9a-fA-F]{4})/',
12445
            /**
12446
             * @param array $matches
12447
             *
12448
             * @psalm-pure
12449
             *
12450
             * @return string
12451
             */
12452 42
            static function (array $matches): string {
12453 13
                if (isset($matches[3])) {
12454 13
                    $cp = (int) \hexdec($matches[3]);
12455
                } else {
12456
                    // http://unicode.org/faq/utf_bom.html#utf16-4
12457 1
                    $cp = ((int) \hexdec($matches[1]) << 10)
12458 1
                          + (int) \hexdec($matches[2])
12459 1
                          + 0x10000
12460 1
                          - (0xD800 << 10)
12461 1
                          - 0xDC00;
12462
                }
12463
12464
                // https://github.com/php/php-src/blob/php-7.3.2/ext/standard/html.c#L471
12465
                //
12466
                // php_utf32_utf8(unsigned char *buf, unsigned k)
12467
12468 13
                if ($cp < 0x80) {
12469 8
                    return (string) self::chr($cp);
12470
                }
12471
12472 10
                if ($cp < 0xA0) {
12473
                    /** @noinspection UnnecessaryCastingInspection */
12474
                    return (string) self::chr(0xC0 | $cp >> 6) . (string) self::chr(0x80 | $cp & 0x3F);
12475
                }
12476
12477 10
                return self::decimal_to_chr($cp);
12478 42
            },
12479 42
            $buf
12480
        );
12481
12482 42
        if ($buf === null) {
12483
            return '';
12484
        }
12485
12486
        // decode UTF-8 codepoints
12487 42
        if ($decode_html_entity_to_utf8) {
12488 3
            $buf = self::html_entity_decode($buf);
12489
        }
12490
12491 42
        return $buf;
12492
    }
12493
12494
    /**
12495
     * Returns the given string as an integer, or null if the string isn't numeric.
12496
     *
12497
     * @param string $str
12498
     *
12499
     * @psalm-pure
12500
     *
12501
     * @return int|null
12502
     *                  <p>null if the string isn't numeric</p>
12503
     */
12504 1
    public static function to_int(string $str)
12505
    {
12506 1
        if (\is_numeric($str)) {
12507 1
            return (int) $str;
12508
        }
12509
12510 1
        return null;
12511
    }
12512
12513
    /**
12514
     * Returns the given input as string, or null if the input isn't int|float|string
12515
     * and do not implement the "__toString()" method.
12516
     *
12517
     * @param float|int|object|string|null $input
12518
     *
12519
     * @psalm-pure
12520
     *
12521
     * @return string|null
12522
     *                     <p>null if the input isn't int|float|string and has no "__toString()" method</p>
12523
     */
12524 1
    public static function to_string($input)
12525
    {
12526 1
        if ($input === null) {
12527
            return null;
12528
        }
12529
12530
        /** @var string $input_type - hack for psalm */
12531 1
        $input_type = \gettype($input);
12532
12533
        if (
12534 1
            $input_type === 'string'
12535
            ||
12536 1
            $input_type === 'integer'
12537
            ||
12538 1
            $input_type === 'float'
12539
            ||
12540 1
            $input_type === 'double'
12541
        ) {
12542 1
            return (string) $input;
12543
        }
12544
12545
        /** @phpstan-ignore-next-line - "gettype": FP? */
12546 1
        if ($input_type === 'object' && \method_exists($input, '__toString')) {
12547 1
            return (string) $input;
12548
        }
12549
12550 1
        return null;
12551
    }
12552
12553
    /**
12554
     * Strip whitespace or other characters from the beginning and end of a UTF-8 string.
12555
     *
12556
     * INFO: This is slower then "trim()"
12557
     *
12558
     * We can only use the original-function, if we use <= 7-Bit in the string / chars
12559
     * but the check for ASCII (7-Bit) cost more time, then we can safe here.
12560
     *
12561
     * EXAMPLE: <code>UTF8::trim('   -ABC-中文空白-  '); // '-ABC-中文空白-'</code>
12562
     *
12563
     * @param string      $str   <p>The string to be trimmed</p>
12564
     * @param string|null $chars [optional] <p>Optional characters to be stripped</p>
12565
     *
12566
     * @psalm-pure
12567
     *
12568
     * @return string
12569
     *                <p>The trimmed string.</p>
12570
     */
12571 57
    public static function trim(string $str = '', string $chars = null): string
12572
    {
12573 57
        if ($str === '') {
12574 9
            return '';
12575
        }
12576
12577 50
        if (self::$SUPPORT['mbstring'] === true) {
12578 50
            if ($chars !== null) {
12579
                /** @noinspection PregQuoteUsageInspection */
12580 28
                $chars = \preg_quote($chars);
12581 28
                $pattern = "^[${chars}]+|[${chars}]+\$";
12582
            } else {
12583 22
                $pattern = '^[\\s]+|[\\s]+$';
12584
            }
12585
12586 50
            return (string) \mb_ereg_replace($pattern, '', $str);
12587
        }
12588
12589 8
        if ($chars !== null) {
12590
            $chars = \preg_quote($chars, '/');
12591
            $pattern = "^[${chars}]+|[${chars}]+\$";
12592
        } else {
12593 8
            $pattern = '^[\\s]+|[\\s]+$';
12594
        }
12595
12596 8
        return self::regex_replace($str, $pattern, '');
12597
    }
12598
12599
    /**
12600
     * Makes string's first char uppercase.
12601
     *
12602
     * EXAMPLE: <code>UTF8::ucfirst('ñtërnâtiônàlizætiøn foo'); // 'Ñtërnâtiônàlizætiøn foo'</code>
12603
     *
12604
     * @param string      $str                           <p>The input string.</p>
12605
     * @param string      $encoding                      [optional] <p>Set the charset for e.g. "mb_" function</p>
12606
     * @param bool        $clean_utf8                    [optional] <p>Remove non UTF-8 chars from the string.</p>
12607
     * @param string|null $lang                          [optional] <p>Set the language for special cases: az, el, lt,
12608
     *                                                   tr</p>
12609
     * @param bool        $try_to_keep_the_string_length [optional] <p>true === try to keep the string length: e.g. ẞ
12610
     *                                                   -> ß</p>
12611
     *
12612
     * @psalm-pure
12613
     *
12614
     * @return string
12615
     *                <p>The resulting string with with char uppercase.</p>
12616
     */
12617 69
    public static function ucfirst(
12618
        string $str,
12619
        string $encoding = 'UTF-8',
12620
        bool $clean_utf8 = false,
12621
        string $lang = null,
12622
        bool $try_to_keep_the_string_length = false
12623
    ): string {
12624 69
        if ($str === '') {
12625 3
            return '';
12626
        }
12627
12628 68
        if ($clean_utf8) {
12629
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
12630
            // if invalid characters are found in $haystack before $needle
12631 1
            $str = self::clean($str);
12632
        }
12633
12634 68
        $use_mb_functions = $lang === null && !$try_to_keep_the_string_length;
12635
12636 68
        if ($encoding === 'UTF-8') {
12637 22
            $str_part_two = (string) \mb_substr($str, 1);
12638
12639 22
            if ($use_mb_functions) {
12640 22
                $str_part_one = \mb_strtoupper(
12641 22
                    (string) \mb_substr($str, 0, 1)
12642
                );
12643
            } else {
12644
                $str_part_one = self::strtoupper(
12645
                    (string) \mb_substr($str, 0, 1),
12646
                    $encoding,
12647
                    false,
12648
                    $lang,
12649 22
                    $try_to_keep_the_string_length
12650
                );
12651
            }
12652
        } else {
12653 47
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
12654
12655 47
            $str_part_two = (string) self::substr($str, 1, null, $encoding);
12656
12657 47
            if ($use_mb_functions) {
12658 47
                $str_part_one = \mb_strtoupper(
12659 47
                    (string) \mb_substr($str, 0, 1, $encoding),
12660 47
                    $encoding
12661
                );
12662
            } else {
12663
                $str_part_one = self::strtoupper(
12664
                    (string) self::substr($str, 0, 1, $encoding),
12665
                    $encoding,
12666
                    false,
12667
                    $lang,
12668
                    $try_to_keep_the_string_length
12669
                );
12670
            }
12671
        }
12672
12673 68
        return $str_part_one . $str_part_two;
12674
    }
12675
12676
    /**
12677
     * Uppercase for all words in the string.
12678
     *
12679
     * EXAMPLE: <code>UTF8::ucwords('iñt ërn âTi ônà liz æti øn'); // 'Iñt Ërn ÂTi Ônà Liz Æti Øn'</code>
12680
     *
12681
     * @param string   $str        <p>The input string.</p>
12682
     * @param string[] $exceptions [optional] <p>Exclusion for some words.</p>
12683
     * @param string   $char_list  [optional] <p>Additional chars that contains to words and do not start a new
12684
     *                             word.</p>
12685
     * @param string   $encoding   [optional] <p>Set the charset.</p>
12686
     * @param bool     $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
12687
     *
12688
     * @psalm-pure
12689
     *
12690
     * @return string
12691
     */
12692 9
    public static function ucwords(
12693
        string $str,
12694
        array $exceptions = [],
12695
        string $char_list = '',
12696
        string $encoding = 'UTF-8',
12697
        bool $clean_utf8 = false
12698
    ): string {
12699 9
        if (!$str) {
12700 2
            return '';
12701
        }
12702
12703
        // INFO: mb_convert_case($str, MB_CASE_TITLE);
12704
        // -> MB_CASE_TITLE didn't only uppercase the first letter, it also lowercase all other letters
12705
12706 8
        if ($clean_utf8) {
12707
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
12708
            // if invalid characters are found in $haystack before $needle
12709 1
            $str = self::clean($str);
12710
        }
12711
12712 8
        $use_php_default_functions = !(bool) ($char_list . \implode('', $exceptions));
12713
12714
        if (
12715 8
            $use_php_default_functions
12716
            &&
12717 8
            ASCII::is_ascii($str)
12718
        ) {
12719
            return \ucwords($str);
12720
        }
12721
12722 8
        $words = self::str_to_words($str, $char_list);
12723 8
        $use_exceptions = $exceptions !== [];
12724
12725 8
        $words_str = '';
12726 8
        foreach ($words as &$word) {
12727 8
            if (!$word) {
12728 8
                continue;
12729
            }
12730
12731
            if (
12732 8
                !$use_exceptions
12733
                ||
12734 8
                !\in_array($word, $exceptions, true)
12735
            ) {
12736 8
                $words_str .= self::ucfirst($word, $encoding);
12737
            } else {
12738 1
                $words_str .= $word;
12739
            }
12740
        }
12741
12742 8
        return $words_str;
12743
    }
12744
12745
    /**
12746
     * Multi decode HTML entity + fix urlencoded-win1252-chars.
12747
     *
12748
     * EXAMPLE: <code>UTF8::urldecode('tes%20öäü%20\u00edtest+test'); // 'tes öäü ítest test'</code>
12749
     *
12750
     * e.g:
12751
     * 'test+test'                     => 'test test'
12752
     * 'D&#252;sseldorf'               => 'Düsseldorf'
12753
     * 'D%FCsseldorf'                  => 'Düsseldorf'
12754
     * 'D&#xFC;sseldorf'               => 'Düsseldorf'
12755
     * 'D%26%23xFC%3Bsseldorf'         => 'Düsseldorf'
12756
     * 'Düsseldorf'                   => 'Düsseldorf'
12757
     * 'D%C3%BCsseldorf'               => 'Düsseldorf'
12758
     * 'D%C3%83%C2%BCsseldorf'         => 'Düsseldorf'
12759
     * 'D%25C3%2583%25C2%25BCsseldorf' => 'Düsseldorf'
12760
     *
12761
     * @param string $str          <p>The input string.</p>
12762
     * @param bool   $multi_decode <p>Decode as often as possible.</p>
12763
     *
12764
     * @psalm-pure
12765
     *
12766
     * @return string
12767
     */
12768 4
    public static function urldecode(string $str, bool $multi_decode = true): string
12769
    {
12770 4
        if ($str === '') {
12771 3
            return '';
12772
        }
12773
12774 4
        $str = self::urldecode_unicode_helper($str);
12775
12776 4
        if ($multi_decode) {
12777
            do {
12778 3
                $str_compare = $str;
12779
12780
                /**
12781
                 * @psalm-suppress PossiblyInvalidArgument
12782
                 */
12783 3
                $str = \urldecode(
12784 3
                    self::html_entity_decode(
12785 3
                        self::to_utf8($str),
12786 3
                        \ENT_QUOTES | \ENT_HTML5
12787
                    )
12788
                );
12789 3
            } while ($str_compare !== $str);
12790
        } else {
12791
            /**
12792
             * @psalm-suppress PossiblyInvalidArgument
12793
             */
12794 1
            $str = \urldecode(
12795 1
                self::html_entity_decode(
12796 1
                    self::to_utf8($str),
12797 1
                    \ENT_QUOTES | \ENT_HTML5
12798
                )
12799
            );
12800
        }
12801
12802 4
        return self::fix_simple_utf8($str);
12803
    }
12804
12805
    /**
12806
     * Decodes a UTF-8 string to ISO-8859-1.
12807
     *
12808
     * EXAMPLE: <code>UTF8::encode('UTF-8', UTF8::utf8_decode('-ABC-中文空白-')); // '-ABC-????-'</code>
12809
     *
12810
     * @param string $str             <p>The input string.</p>
12811
     * @param bool   $keep_utf8_chars
12812
     *
12813
     * @psalm-pure
12814
     *
12815
     * @return string
12816
     */
12817 14
    public static function utf8_decode(string $str, bool $keep_utf8_chars = false): string
12818
    {
12819 14
        if ($str === '') {
12820 6
            return '';
12821
        }
12822
12823
        // save for later comparision
12824 14
        $str_backup = $str;
12825 14
        $len = \strlen($str);
12826
12827 14
        if (self::$ORD === null) {
12828
            self::$ORD = self::getData('ord');
12829
        }
12830
12831 14
        if (self::$CHR === null) {
12832
            self::$CHR = self::getData('chr');
12833
        }
12834
12835 14
        $no_char_found = '?';
12836 14
        for ($i = 0, $j = 0; $i < $len; ++$i, ++$j) {
12837 14
            switch ($str[$i] & "\xF0") {
12838 14
                case "\xC0":
12839 13
                case "\xD0":
12840 13
                    $c = (self::$ORD[$str[$i] & "\x1F"] << 6) | self::$ORD[$str[++$i] & "\x3F"];
12841 13
                    $str[$j] = $c < 256 ? self::$CHR[$c] : $no_char_found;
12842
12843 13
                    break;
12844
12845 13
                case "\xF0":
12846
                    ++$i;
12847
12848
                // no break
12849
12850 13
                case "\xE0":
12851 11
                    $str[$j] = $no_char_found;
12852 11
                    $i += 2;
12853
12854 11
                    break;
12855
12856
                default:
12857 12
                    $str[$j] = $str[$i];
12858
            }
12859
        }
12860
12861
        /** @var false|string $return - needed for PhpStan (stubs error) */
12862 14
        $return = \substr($str, 0, $j);
12863 14
        if ($return === false) {
12864
            $return = '';
12865
        }
12866
12867
        if (
12868 14
            $keep_utf8_chars
12869
            &&
12870 14
            (int) self::strlen($return) >= (int) self::strlen($str_backup)
12871
        ) {
12872 2
            return $str_backup;
12873
        }
12874
12875 14
        return $return;
12876
    }
12877
12878
    /**
12879
     * Encodes an ISO-8859-1 string to UTF-8.
12880
     *
12881
     * EXAMPLE: <code>UTF8::utf8_decode(UTF8::utf8_encode('-ABC-中文空白-')); // '-ABC-中文空白-'</code>
12882
     *
12883
     * @param string $str <p>The input string.</p>
12884
     *
12885
     * @psalm-pure
12886
     *
12887
     * @return string
12888
     */
12889 16
    public static function utf8_encode(string $str): string
12890
    {
12891 16
        if ($str === '') {
12892 14
            return '';
12893
        }
12894
12895
        /** @var false|string $str - the polyfill maybe return false */
12896 16
        $str = \utf8_encode($str);
0 ignored issues
show
Bug introduced by
It seems like $str can also be of type false; however, parameter $string of utf8_encode() does only seem to accept string, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

12896
        $str = \utf8_encode(/** @scrutinizer ignore-type */ $str);
Loading history...
12897
12898 16
        if ($str === false) {
12899
            return '';
12900
        }
12901
12902 16
        return $str;
12903
    }
12904
12905
    /**
12906
     * Returns an array with all utf8 whitespace characters.
12907
     *
12908
     * @see http://www.bogofilter.org/pipermail/bogofilter/2003-March/001889.html
12909
     *
12910
     * @psalm-pure
12911
     *
12912
     * @return string[]
12913
     *                  An array with all known whitespace characters as values and the type of whitespace as keys
12914
     *                  as defined in above URL
12915
     */
12916 2
    public static function whitespace_table(): array
12917
    {
12918 2
        return self::$WHITESPACE_TABLE;
12919
    }
12920
12921
    /**
12922
     * Limit the number of words in a string.
12923
     *
12924
     * EXAMPLE: <code>UTF8::words_limit('fòô bàř fòô', 2, ''); // 'fòô bàř'</code>
12925
     *
12926
     * @param string $str        <p>The input string.</p>
12927
     * @param int    $limit      <p>The limit of words as integer.</p>
12928
     * @param string $str_add_on <p>Replacement for the striped string.</p>
12929
     *
12930
     * @psalm-pure
12931
     *
12932
     * @return string
12933
     */
12934 2
    public static function words_limit(
12935
        string $str,
12936
        int $limit = 100,
12937
        string $str_add_on = '…'
12938
    ): string {
12939 2
        if ($str === '' || $limit < 1) {
12940 2
            return '';
12941
        }
12942
12943 2
        \preg_match('/^\\s*+(?:[^\\s]++\\s*+){1,' . $limit . '}/u', $str, $matches);
12944
12945
        if (
12946 2
            !isset($matches[0])
12947
            ||
12948 2
            \mb_strlen($str) === (int) \mb_strlen($matches[0])
12949
        ) {
12950 2
            return $str;
12951
        }
12952
12953 2
        return \rtrim($matches[0]) . $str_add_on;
12954
    }
12955
12956
    /**
12957
     * Wraps a string to a given number of characters
12958
     *
12959
     * EXAMPLE: <code>UTF8::wordwrap('Iñtërnâtiônàlizætiøn', 2, '<br>', true)); // 'Iñ<br>të<br>rn<br>ât<br>iô<br>nà<br>li<br>zæ<br>ti<br>øn'</code>
12960
     *
12961
     * @see http://php.net/manual/en/function.wordwrap.php
12962
     *
12963
     * @param string $str   <p>The input string.</p>
12964
     * @param int    $width [optional] <p>The column width.</p>
12965
     * @param string $break [optional] <p>The line is broken using the optional break parameter.</p>
12966
     * @param bool   $cut   [optional] <p>
12967
     *                      If the cut is set to true, the string is
12968
     *                      always wrapped at or before the specified width. So if you have
12969
     *                      a word that is larger than the given width, it is broken apart.
12970
     *                      </p>
12971
     *
12972
     * @psalm-pure
12973
     *
12974
     * @return string
12975
     *                <p>The given string wrapped at the specified column.</p>
12976
     */
12977 12
    public static function wordwrap(
12978
        string $str,
12979
        int $width = 75,
12980
        string $break = "\n",
12981
        bool $cut = false
12982
    ): string {
12983 12
        if ($str === '' || $break === '') {
12984 4
            return '';
12985
        }
12986
12987 10
        $str_split = \explode($break, $str);
12988
12989
        /** @var string[] $charsArray */
12990 10
        $charsArray = [];
12991 10
        $word_split = '';
12992 10
        foreach ($str_split as $i => $i_value) {
12993 10
            if ($i) {
12994 3
                $charsArray[] = $break;
12995 3
                $word_split .= '#';
12996
            }
12997
12998 10
            foreach (self::str_split($i_value) as $c) {
12999 10
                $charsArray[] = $c;
13000 10
                if ($c === ' ') {
13001 3
                    $word_split .= ' ';
13002
                } else {
13003 10
                    $word_split .= '?';
13004
                }
13005
            }
13006
        }
13007
13008 10
        $str_return = '';
13009 10
        $j = 0;
13010 10
        $b = -1;
13011 10
        $i = -1;
13012 10
        $word_split = \wordwrap($word_split, $width, '#', $cut);
13013
13014 10
        $max = \mb_strlen($word_split);
13015
        /** @noinspection PhpAssignmentInConditionInspection - is ok here */
13016 10
        while (($b = \mb_strpos($word_split, '#', $b + 1)) !== false) {
13017 8
            for (++$i; $i < $b; ++$i) {
13018 8
                if (isset($charsArray[$j])) {
13019 8
                    $str_return .= $charsArray[$j];
13020 8
                    unset($charsArray[$j]);
13021
                }
13022 8
                ++$j;
13023
13024
                // prevent endless loop, e.g. if there is a error in the "mb_*" polyfill
13025 8
                if ($i > $max) {
13026
                    break 2;
13027
                }
13028
            }
13029
13030
            if (
13031 8
                $break === $charsArray[$j]
13032
                ||
13033 8
                $charsArray[$j] === ' '
13034
            ) {
13035 5
                unset($charsArray[$j++]);
13036
            }
13037
13038 8
            $str_return .= $break;
13039
13040
            // prevent endless loop, e.g. if there is a error in the "mb_*" polyfill
13041 8
            if ($b > $max) {
13042
                break;
13043
            }
13044
        }
13045
13046 10
        return $str_return . \implode('', $charsArray);
13047
    }
13048
13049
    /**
13050
     * Line-Wrap the string after $limit, but split the string by "$delimiter" before ...
13051
     *    ... so that we wrap the per line.
13052
     *
13053
     * @param string      $str             <p>The input string.</p>
13054
     * @param int         $width           [optional] <p>The column width.</p>
13055
     * @param string      $break           [optional] <p>The line is broken using the optional break parameter.</p>
13056
     * @param bool        $cut             [optional] <p>
13057
     *                                     If the cut is set to true, the string is
13058
     *                                     always wrapped at or before the specified width. So if you have
13059
     *                                     a word that is larger than the given width, it is broken apart.
13060
     *                                     </p>
13061
     * @param bool        $add_final_break [optional] <p>
13062
     *                                     If this flag is true, then the method will add a $break at the end
13063
     *                                     of the result string.
13064
     *                                     </p>
13065
     * @param string|null $delimiter       [optional] <p>
13066
     *                                     You can change the default behavior, where we split the string by newline.
13067
     *                                     </p>
13068
     *
13069
     * @psalm-pure
13070
     *
13071
     * @return string
13072
     */
13073 1
    public static function wordwrap_per_line(
13074
        string $str,
13075
        int $width = 75,
13076
        string $break = "\n",
13077
        bool $cut = false,
13078
        bool $add_final_break = true,
13079
        string $delimiter = null
13080
    ): string {
13081 1
        if ($delimiter === null) {
13082 1
            $strings = \preg_split('/\\r\\n|\\r|\\n/', $str);
13083
        } else {
13084 1
            $strings = \explode($delimiter, $str);
13085
        }
13086
13087 1
        $string_helper_array = [];
13088 1
        if ($strings !== false) {
13089 1
            foreach ($strings as $value) {
13090 1
                $string_helper_array[] = self::wordwrap($value, $width, $break, $cut);
13091
            }
13092
        }
13093
13094 1
        if ($add_final_break) {
13095 1
            $final_break = $break;
13096
        } else {
13097 1
            $final_break = '';
13098
        }
13099
13100 1
        return \implode($delimiter ?? "\n", $string_helper_array) . $final_break;
13101
    }
13102
13103
    /**
13104
     * Returns an array of Unicode White Space characters.
13105
     *
13106
     * @psalm-pure
13107
     *
13108
     * @return string[]
13109
     *                  <p>An array with numeric code point as key and White Space Character as value.</p>
13110
     */
13111 2
    public static function ws(): array
13112
    {
13113 2
        return self::$WHITESPACE;
13114
    }
13115
13116
    /**
13117
     * Checks whether the passed string contains only byte sequences that are valid UTF-8 characters.
13118
     *
13119
     * EXAMPLE: <code>
13120
     * UTF8::is_utf8_string('Iñtërnâtiônàlizætiøn']); // true
13121
     * //
13122
     * UTF8::is_utf8_string("Iñtërnâtiônàlizætiøn\xA0\xA1"); // false
13123
     * </code>
13124
     *
13125
     * @see          http://hsivonen.iki.fi/php-utf8/
13126
     *
13127
     * @param string $str    <p>The string to be checked.</p>
13128
     * @param bool   $strict <p>Check also if the string is not UTF-16 or UTF-32.</p>
13129
     *
13130
     * @psalm-pure
13131
     *
13132
     * @return bool
13133
     *
13134
     * @noinspection ReturnTypeCanBeDeclaredInspection
13135
     */
13136 110
    private static function is_utf8_string(string $str, bool $strict = false)
13137
    {
13138 110
        if ($str === '') {
13139 15
            return true;
13140
        }
13141
13142 103
        if ($strict) {
13143 2
            $is_binary = self::is_binary($str, true);
13144
13145 2
            if ($is_binary && self::is_utf16($str, false) !== false) {
13146 2
                return false;
13147
            }
13148
13149 2
            if ($is_binary && self::is_utf32($str, false) !== false) {
13150
                return false;
13151
            }
13152
        }
13153
13154 103
        if (self::$SUPPORT['pcre_utf8']) {
13155
            // If even just the first character can be matched, when the /u
13156
            // modifier is used, then it's valid UTF-8. If the UTF-8 is somehow
13157
            // invalid, nothing at all will match, even if the string contains
13158
            // some valid sequences
13159 103
            return \preg_match('/^./us', $str) === 1;
13160
        }
13161
13162 2
        $mState = 0; // cached expected number of octets after the current octet
13163
        // until the beginning of the next UTF8 character sequence
13164 2
        $mUcs4 = 0; // cached Unicode character
13165 2
        $mBytes = 1; // cached expected number of octets in the current sequence
13166
13167 2
        if (self::$ORD === null) {
13168
            self::$ORD = self::getData('ord');
13169
        }
13170
13171 2
        $len = \strlen($str);
13172 2
        for ($i = 0; $i < $len; ++$i) {
13173 2
            $in = self::$ORD[$str[$i]];
13174
13175 2
            if ($mState === 0) {
13176
                // When mState is zero we expect either a US-ASCII character or a
13177
                // multi-octet sequence.
13178 2
                if ((0x80 & $in) === 0) {
13179
                    // US-ASCII, pass straight through.
13180 2
                    $mBytes = 1;
13181 2
                } elseif ((0xE0 & $in) === 0xC0) {
13182
                    // First octet of 2 octet sequence.
13183 2
                    $mUcs4 = $in;
13184 2
                    $mUcs4 = ($mUcs4 & 0x1F) << 6;
13185 2
                    $mState = 1;
13186 2
                    $mBytes = 2;
13187 2
                } elseif ((0xF0 & $in) === 0xE0) {
13188
                    // First octet of 3 octet sequence.
13189 2
                    $mUcs4 = $in;
13190 2
                    $mUcs4 = ($mUcs4 & 0x0F) << 12;
13191 2
                    $mState = 2;
13192 2
                    $mBytes = 3;
13193
                } elseif ((0xF8 & $in) === 0xF0) {
13194
                    // First octet of 4 octet sequence.
13195
                    $mUcs4 = $in;
13196
                    $mUcs4 = ($mUcs4 & 0x07) << 18;
13197
                    $mState = 3;
13198
                    $mBytes = 4;
13199
                } elseif ((0xFC & $in) === 0xF8) {
13200
                    /* First octet of 5 octet sequence.
13201
                     *
13202
                     * This is illegal because the encoded codepoint must be either
13203
                     * (a) not the shortest form or
13204
                     * (b) outside the Unicode range of 0-0x10FFFF.
13205
                     * Rather than trying to resynchronize, we will carry on until the end
13206
                     * of the sequence and let the later error handling code catch it.
13207
                     */
13208
                    $mUcs4 = $in;
13209
                    $mUcs4 = ($mUcs4 & 0x03) << 24;
13210
                    $mState = 4;
13211
                    $mBytes = 5;
13212
                } elseif ((0xFE & $in) === 0xFC) {
13213
                    // First octet of 6 octet sequence, see comments for 5 octet sequence.
13214
                    $mUcs4 = $in;
13215
                    $mUcs4 = ($mUcs4 & 1) << 30;
13216
                    $mState = 5;
13217
                    $mBytes = 6;
13218
                } else {
13219
                    // Current octet is neither in the US-ASCII range nor a legal first
13220
                    // octet of a multi-octet sequence.
13221 2
                    return false;
13222
                }
13223 2
            } elseif ((0xC0 & $in) === 0x80) {
13224
13225
                // When mState is non-zero, we expect a continuation of the multi-octet
13226
                // sequence
13227
13228
                // Legal continuation.
13229 2
                $shift = ($mState - 1) * 6;
13230 2
                $tmp = $in;
13231 2
                $tmp = ($tmp & 0x0000003F) << $shift;
13232 2
                $mUcs4 |= $tmp;
13233
                // Prefix: End of the multi-octet sequence. mUcs4 now contains the final
13234
                // Unicode code point to be output.
13235 2
                if (--$mState === 0) {
13236
                    // Check for illegal sequences and code points.
13237
                    //
13238
                    // From Unicode 3.1, non-shortest form is illegal
13239
                    if (
13240 2
                        ($mBytes === 2 && $mUcs4 < 0x0080)
13241
                        ||
13242 2
                        ($mBytes === 3 && $mUcs4 < 0x0800)
13243
                        ||
13244 2
                        ($mBytes === 4 && $mUcs4 < 0x10000)
13245
                        ||
13246 2
                        ($mBytes > 4)
13247
                        ||
13248
                        // From Unicode 3.2, surrogate characters are illegal.
13249 2
                        (($mUcs4 & 0xFFFFF800) === 0xD800)
13250
                        ||
13251
                        // Code points outside the Unicode range are illegal.
13252 2
                        ($mUcs4 > 0x10FFFF)
13253
                    ) {
13254
                        return false;
13255
                    }
13256
                    // initialize UTF8 cache
13257 2
                    $mState = 0;
13258 2
                    $mUcs4 = 0;
13259 2
                    $mBytes = 1;
13260
                }
13261
            } else {
13262
                // ((0xC0 & (*in) != 0x80) && (mState != 0))
13263
                // Incomplete multi-octet sequence.
13264
                return false;
13265
            }
13266
        }
13267
13268 2
        return $mState === 0;
13269
    }
13270
13271
    /**
13272
     * @param string $str
13273
     * @param bool   $use_lowercase      <p>Use uppercase by default, otherwise use lowercase.</p>
13274
     * @param bool   $use_full_case_fold <p>Convert not only common cases.</p>
13275
     *
13276
     * @psalm-pure
13277
     *
13278
     * @return string
13279
     *
13280
     * @noinspection ReturnTypeCanBeDeclaredInspection
13281
     */
13282 33
    private static function fixStrCaseHelper(
13283
        string $str,
13284
        bool $use_lowercase = false,
13285
        bool $use_full_case_fold = false
13286
    ) {
13287 33
        $upper = self::$COMMON_CASE_FOLD['upper'];
13288 33
        $lower = self::$COMMON_CASE_FOLD['lower'];
13289
13290 33
        if ($use_lowercase) {
13291 2
            $str = \str_replace(
13292 2
                $upper,
13293 2
                $lower,
13294 2
                $str
13295
            );
13296
        } else {
13297 31
            $str = \str_replace(
13298 31
                $lower,
13299 31
                $upper,
13300 31
                $str
13301
            );
13302
        }
13303
13304 33
        if ($use_full_case_fold) {
13305
            /**
13306
             * @psalm-suppress ImpureStaticVariable
13307
             *
13308
             * @var array<mixed>|null
13309
             */
13310 31
            static $FULL_CASE_FOLD = null;
13311 31
            if ($FULL_CASE_FOLD === null) {
13312 1
                $FULL_CASE_FOLD = self::getData('caseFolding_full');
13313
            }
13314
13315 31
            if ($use_lowercase) {
13316 2
                $str = \str_replace($FULL_CASE_FOLD[0], $FULL_CASE_FOLD[1], $str);
13317
            } else {
13318 29
                $str = \str_replace($FULL_CASE_FOLD[1], $FULL_CASE_FOLD[0], $str);
13319
            }
13320
        }
13321
13322 33
        return $str;
13323
    }
13324
13325
    /**
13326
     * get data from "/data/*.php"
13327
     *
13328
     * @param string $file
13329
     *
13330
     * @psalm-pure
13331
     *
13332
     * @return array
13333
     *
13334
     * @noinspection ReturnTypeCanBeDeclaredInspection
13335
     */
13336 7
    private static function getData(string $file)
13337
    {
13338
        /** @noinspection PhpIncludeInspection */
13339
        /** @noinspection UsingInclusionReturnValueInspection */
13340
        /** @psalm-suppress UnresolvableInclude */
13341 7
        return include __DIR__ . '/data/' . $file . '.php';
13342
    }
13343
13344
    /**
13345
     * @psalm-pure
13346
     *
13347
     * @return true|null
13348
     *
13349
     * @noinspection ReturnTypeCanBeDeclaredInspection
13350
     */
13351 1
    private static function initEmojiData()
13352
    {
13353 1
        if (self::$EMOJI_KEYS_CACHE === null) {
13354 1
            if (self::$EMOJI === null) {
13355 1
                self::$EMOJI = self::getData('emoji');
13356
            }
13357
13358
            /**
13359
             * @psalm-suppress ImpureFunctionCall - static sort function is used
13360
             */
13361 1
            \uksort(
13362 1
                self::$EMOJI,
13363 1
                static function (string $a, string $b): int {
13364 1
                    return \strlen($b) <=> \strlen($a);
13365 1
                }
13366
            );
13367
13368 1
            self::$EMOJI_KEYS_CACHE = \array_keys(self::$EMOJI);
13369 1
            self::$EMOJI_VALUES_CACHE = self::$EMOJI;
13370
13371 1
            foreach (self::$EMOJI_KEYS_CACHE as $key) {
13372 1
                $tmp_key = \crc32($key);
13373 1
                self::$EMOJI_KEYS_REVERSIBLE_CACHE[] = '_-_PORTABLE_UTF8_-_' . $tmp_key . '_-_' . \strrev((string) $tmp_key) . '_-_8FTU_ELBATROP_-_';
13374
            }
13375
13376 1
            return true;
13377
        }
13378
13379
        return null;
13380
    }
13381
13382
    /**
13383
     * Checks whether mbstring "overloaded" is active on the server.
13384
     *
13385
     * @psalm-pure
13386
     *
13387
     * @return bool
13388
     */
13389
    private static function mbstring_overloaded(): bool
13390
    {
13391
        /**
13392
         * INI directive 'mbstring.func_overload' is deprecated since PHP 7.2
13393
         */
13394
13395
        /** @noinspection PhpComposerExtensionStubsInspection */
13396
        /** @noinspection PhpUsageOfSilenceOperatorInspection */
13397
        /** @noinspection DeprecatedIniOptionsInspection */
13398
        return \defined('MB_OVERLOAD_STRING')
13399
               &&
13400
               ((int) @\ini_get('mbstring.func_overload') & \MB_OVERLOAD_STRING);
13401
    }
13402
13403
    /**
13404
     * @param array    $strings
13405
     * @param bool     $remove_empty_values
13406
     * @param int|null $remove_short_values
13407
     *
13408
     * @psalm-pure
13409
     *
13410
     * @return array
13411
     *
13412
     * @noinspection ReturnTypeCanBeDeclaredInspection
13413
     */
13414 2
    private static function reduce_string_array(
13415
        array $strings,
13416
        bool $remove_empty_values,
13417
        int $remove_short_values = null
13418
    ) {
13419
        // init
13420 2
        $return = [];
13421
13422 2
        foreach ($strings as &$str) {
13423
            if (
13424 2
                $remove_short_values !== null
13425
                &&
13426 2
                \mb_strlen($str) <= $remove_short_values
13427
            ) {
13428 2
                continue;
13429
            }
13430
13431
            if (
13432 2
                $remove_empty_values
13433
                &&
13434 2
                \trim($str) === ''
13435
            ) {
13436 2
                continue;
13437
            }
13438
13439 2
            $return[] = $str;
13440
        }
13441
13442 2
        return $return;
13443
    }
13444
13445
    /**
13446
     * rxClass
13447
     *
13448
     * @param string $s
13449
     * @param string $class
13450
     *
13451
     * @return string
13452
     *                    *
13453
     * @psalm-pure
13454
     */
13455 36
    private static function rxClass(string $s, string $class = '')
13456
    {
13457
        /**
13458
         * @psalm-suppress ImpureStaticVariable
13459
         *
13460
         * @var array<string,string>
13461
         */
13462 36
        static $RX_CLASS_CACHE = [];
13463
13464 36
        $cache_key = $s . '_' . $class;
13465
13466 36
        if (isset($RX_CLASS_CACHE[$cache_key])) {
13467 24
            return $RX_CLASS_CACHE[$cache_key];
13468
        }
13469
13470 16
        $class_array[] = $class;
0 ignored issues
show
Comprehensibility Best Practice introduced by
$class_array was never initialized. Although not strictly required by PHP, it is generally a good practice to add $class_array = array(); before regardless.
Loading history...
13471
13472
        /** @noinspection SuspiciousLoopInspection */
13473
        /** @noinspection AlterInForeachInspection */
13474 16
        foreach (self::str_split($s) as &$s) {
0 ignored issues
show
Bug introduced by
The expression self::str_split($s) cannot be used as a reference.

Let?s assume that you have the following foreach statement:

foreach ($array as &$itemValue) { }

$itemValue is assigned by reference. This is possible because the expression (in the example $array) can be used as a reference target.

However, if we were to replace $array with something different like the result of a function call as in

foreach (getArray() as &$itemValue) { }

then assigning by reference is not possible anymore as there is no target that could be modified.

Available Fixes

1. Do not assign by reference
foreach (getArray() as $itemValue) { }
2. Assign to a local variable first
$array = getArray();
foreach ($array as &$itemValue) {}
3. Return a reference
function &getArray() { $array = array(); return $array; }

foreach (getArray() as &$itemValue) { }
Loading history...
13475 15
            if ($s === '-') {
13476
                $class_array[0] = '-' . $class_array[0];
13477 15
            } elseif (!isset($s[2])) {
13478 15
                $class_array[0] .= \preg_quote($s, '/');
13479 1
            } elseif (self::strlen($s) === 1) {
13480 1
                $class_array[0] .= $s;
13481
            } else {
13482
                $class_array[] = $s;
13483
            }
13484
        }
13485
13486 16
        if ($class_array[0]) {
13487 16
            $class_array[0] = '[' . $class_array[0] . ']';
13488
        }
13489
13490 16
        if (\count($class_array) === 1) {
13491 16
            $return = $class_array[0];
13492
        } else {
13493
            $return = '(?:' . \implode('|', $class_array) . ')';
13494
        }
13495
13496 16
        $RX_CLASS_CACHE[$cache_key] = $return;
13497
13498 16
        return $return;
13499
    }
13500
13501
    /**
13502
     * Personal names such as "Marcus Aurelius" are sometimes typed incorrectly using lowercase ("marcus aurelius").
13503
     *
13504
     * @param string $names
13505
     * @param string $delimiter
13506
     * @param string $encoding
13507
     *
13508
     * @psalm-pure
13509
     *
13510
     * @return string
13511
     *
13512
     * @noinspection ReturnTypeCanBeDeclaredInspection
13513
     */
13514 1
    private static function str_capitalize_name_helper(
13515
        string $names,
13516
        string $delimiter,
13517
        string $encoding = 'UTF-8'
13518
    ) {
13519
        // init
13520 1
        $name_helper_array = \explode($delimiter, $names);
13521 1
        if ($name_helper_array === false) {
13522
            return '';
13523
        }
13524
13525
        $special_cases = [
13526
            'names' => [
13527 1
                'ab',
13528
                'af',
13529
                'al',
13530
                'and',
13531
                'ap',
13532
                'bint',
13533
                'binte',
13534
                'da',
13535
                'de',
13536
                'del',
13537
                'den',
13538
                'der',
13539
                'di',
13540
                'dit',
13541
                'ibn',
13542
                'la',
13543
                'mac',
13544
                'nic',
13545
                'of',
13546
                'ter',
13547
                'the',
13548
                'und',
13549
                'van',
13550
                'von',
13551
                'y',
13552
                'zu',
13553
            ],
13554
            'prefixes' => [
13555
                'al-',
13556
                "d'",
13557
                'ff',
13558
                "l'",
13559
                'mac',
13560
                'mc',
13561
                'nic',
13562
            ],
13563
        ];
13564
13565 1
        foreach ($name_helper_array as &$name) {
13566 1
            if (\in_array($name, $special_cases['names'], true)) {
13567 1
                continue;
13568
            }
13569
13570 1
            $continue = false;
13571
13572 1
            if ($delimiter === '-') {
13573 1
                foreach ((array) $special_cases['names'] as &$beginning) {
13574 1
                    if (\strncmp($name, $beginning, \strlen($beginning)) === 0) {
13575 1
                        $continue = true;
13576
13577 1
                        break;
13578
                    }
13579
                }
13580 1
                unset($beginning);
13581
            }
13582
13583 1
            foreach ((array) $special_cases['prefixes'] as &$beginning) {
13584 1
                if (\strncmp($name, $beginning, \strlen($beginning)) === 0) {
13585 1
                    $continue = true;
13586
13587 1
                    break;
13588
                }
13589
            }
13590 1
            unset($beginning);
13591
13592 1
            if ($continue) {
13593 1
                continue;
13594
            }
13595
13596 1
            $name = self::ucfirst($name, $encoding);
13597
        }
13598
13599 1
        return \implode($delimiter, $name_helper_array);
13600
    }
13601
13602
    /**
13603
     * Generic case-sensitive transformation for collation matching.
13604
     *
13605
     * @param string $str <p>The input string</p>
13606
     *
13607
     * @psalm-pure
13608
     *
13609
     * @return string|null
13610
     *
13611
     * @noinspection ReturnTypeCanBeDeclaredInspection
13612
     */
13613 6
    private static function strtonatfold(string $str)
13614
    {
13615 6
        $str = \Normalizer::normalize($str, \Normalizer::NFD);
13616 6
        if ($str === false) {
13617 2
            return '';
13618
        }
13619
13620 6
        return \preg_replace(
13621 6
            '/\p{Mn}+/u',
13622 6
            '',
13623 6
            $str
13624
        );
13625
    }
13626
13627
    /**
13628
     * @param int|string $input
13629
     *
13630
     * @psalm-pure
13631
     *
13632
     * @return string
13633
     *
13634
     * @noinspection ReturnTypeCanBeDeclaredInspection
13635
     */
13636 30
    private static function to_utf8_convert_helper($input)
13637
    {
13638
        // init
13639 30
        $buf = '';
13640
13641 30
        if (self::$ORD === null) {
13642
            self::$ORD = self::getData('ord');
13643
        }
13644
13645 30
        if (self::$CHR === null) {
13646
            self::$CHR = self::getData('chr');
13647
        }
13648
13649 30
        if (self::$WIN1252_TO_UTF8 === null) {
13650 1
            self::$WIN1252_TO_UTF8 = self::getData('win1252_to_utf8');
13651
        }
13652
13653 30
        $ordC1 = self::$ORD[$input];
13654 30
        if (isset(self::$WIN1252_TO_UTF8[$ordC1])) { // found in Windows-1252 special cases
13655 30
            $buf .= self::$WIN1252_TO_UTF8[$ordC1];
13656
        } else {
13657
            /** @noinspection OffsetOperationsInspection */
13658 1
            $cc1 = self::$CHR[$ordC1 / 64] | "\xC0";
13659 1
            $cc2 = ((string) $input & "\x3F") | "\x80";
0 ignored issues
show
Bug introduced by
Are you sure you want to use the bitwise & or did you mean &&?
Loading history...
Bug introduced by
Are you sure you want to use the bitwise | or did you mean ||?
Loading history...
13660 1
            $buf .= $cc1 . $cc2;
13661
        }
13662
13663 30
        return $buf;
13664
    }
13665
13666
    /**
13667
     * @param string $str
13668
     *
13669
     * @psalm-pure
13670
     *
13671
     * @return string
13672
     *
13673
     * @noinspection ReturnTypeCanBeDeclaredInspection
13674
     */
13675 9
    private static function urldecode_unicode_helper(string $str)
13676
    {
13677 9
        if (\strpos($str, '%u') === false) {
13678 9
            return $str;
13679
        }
13680
13681 7
        $pattern = '/%u([0-9a-fA-F]{3,4})/';
13682 7
        if (\preg_match($pattern, $str)) {
13683 7
            $str = (string) \preg_replace($pattern, '&#x\\1;', $str);
13684
        }
13685
13686 7
        return $str;
13687
    }
13688
}
13689