Passed
Push — master ( d0d0f0...91c9d5 )
by Lars
18:21 queued 15:46
created

UTF8   F

Complexity

Total Complexity 1742

Size/Duplication

Total Lines 13653
Duplicated Lines 0 %

Test Coverage

Coverage 79.81%

Importance

Changes 110
Bugs 53 Features 6
Metric Value
eloc 4219
c 110
b 53
f 6
dl 0
loc 13653
ccs 2957
cts 3705
cp 0.7981
rs 0.8
wmc 1742

272 Methods

Rating   Name   Duplication   Size   Complexity  
A filter_input() 0 16 3
A encode_mimeheader() 0 26 5
A filter_var_array() 0 15 2
F extract_text() 0 175 34
A filter_var() 0 15 2
A filter_input_array() 0 15 3
A first_char() 0 14 4
A finfo_loaded() 0 3 1
A fits_inside() 0 3 1
B chr_to_decimal() 0 38 8
A file_has_bom() 0 8 2
A add_bom_to_string() 0 7 2
A is_bom() 0 10 3
A is_hexadecimal() 0 7 2
A ctype_loaded() 0 3 1
A has_uppercase() 0 7 2
A remove_left() 0 28 4
A emoji_decode() 0 21 3
A is_utf8() 0 13 4
D chr() 0 107 19
B get_file_type() 0 60 7
C is_utf16() 0 71 16
C filter() 0 59 14
A is_html() 0 14 2
A decode_mimeheader() 0 8 3
A chunk_split() 0 3 1
A emoji_encode() 0 21 3
A is_alpha() 0 7 2
B get_random_string() 0 54 10
A fix_utf8() 0 30 4
A css_identifier() 0 55 6
A css_stripe_media_queries() 0 6 1
A clean() 0 47 6
A is_serialized() 0 11 3
A is_uppercase() 0 7 2
A is_ascii() 0 3 1
A is_blank() 0 7 2
A htmlspecialchars() 0 15 3
A __construct() 0 2 1
A decimal_to_chr() 0 3 1
A has_whitespace() 0 7 2
A codepoints() 0 36 5
A chr_map() 0 5 1
A cleanup() 0 24 2
A remove_right() 0 25 4
A char_at() 0 7 2
A chars() 0 4 1
B is_binary() 0 37 9
A intlChar_loaded() 0 3 1
B is_url() 0 40 7
A is_binary_file() 0 16 4
A intl_loaded() 0 3 1
A html_stripe_empty_tags() 0 6 1
A chr_size_list() 0 17 3
A json_loaded() 0 3 1
A is_lowercase() 0 7 2
A iconv_loaded() 0 3 1
A lcwords() 0 34 6
A is_empty() 0 3 1
B html_encode() 0 54 11
F encode() 0 144 37
C is_utf32() 0 71 16
A is_alphanumeric() 0 7 2
A json_decode() 0 13 2
A checkForSupport() 0 46 4
B is_json() 0 26 8
A is_printable() 0 3 1
A int_to_hex() 0 7 2
A has_lowercase() 0 7 2
A json_encode() 0 9 2
A is_base64() 0 17 5
A hex_to_int() 0 14 3
A hex_to_chr() 0 4 1
A htmlentities() 0 28 3
A getSupportInfo() 0 13 3
A chr_to_hex() 0 11 3
A is_punctuation() 0 3 1
A collapse_whitespace() 0 7 2
C html_entity_decode() 0 58 13
A access() 0 11 4
B file_get_contents() 0 56 11
A callback() 0 3 1
A binary_to_str() 0 12 3
A bom() 0 3 1
A emoji_from_country_code() 0 17 3
A strncasecmp() 0 10 1
D getCharDirection() 0 104 117
A fix_simple_utf8() 0 32 5
A array_change_key_case() 0 23 5
A str_substr_after_first_separator() 0 28 6
A max() 0 14 3
B str_camelize() 0 74 10
A parse_str() 0 18 4
A str_contains() 0 15 3
B str_to_lines() 0 28 8
A substr_in_byte() 0 18 6
A stripos_in_byte() 0 12 4
A get_unique_string() 0 21 3
A strnatcasecmp() 0 5 1
A substr_left() 0 15 4
A count_chars() 0 11 1
D strlen() 0 104 19
A str_isubstr_last() 0 25 4
A to_int() 0 7 2
A str_replace_beginning() 0 25 6
C stripos() 0 67 14
A str_offset_exists() 0 10 2
D strrchr() 0 104 20
A to_filename() 0 9 1
A str_iends_with() 0 11 3
A max_chr_width() 0 8 2
C utf8_decode() 0 59 13
A ltrim() 0 26 5
A remove_html() 0 3 1
B str_longest_common_suffix() 0 54 10
C wordwrap() 0 70 14
B ucfirst() 0 57 7
A str_pad_both() 0 12 1
A str_substr_last() 0 33 6
A mbstring_loaded() 0 3 1
A str_limit() 0 26 6
A html_escape() 0 6 1
A string() 0 16 4
B str_obfuscate() 0 47 8
D normalize_encoding() 0 147 16
B rxClass() 0 44 8
A str_ensure_right() 0 13 4
B str_titleize_for_humans() 0 170 7
A normalize_whitespace() 0 11 1
A str_starts_with() 0 16 4
A str_humanize() 0 15 1
C substr_count_in_byte() 0 55 15
A strlen_in_byte() 0 12 3
A str_ireplace_ending() 0 21 6
A rtrim() 0 26 5
C str_longest_common_substring() 0 76 16
A regex_replace() 0 20 3
A titlecase() 0 35 5
A getData() 0 6 1
B strtolower() 0 58 10
A urldecode() 0 35 4
A str_isubstr_before_first_separator() 0 19 5
B strrev() 0 43 10
A replace_all() 0 11 2
D substr_replace() 0 123 27
A strstr_in_byte() 0 15 4
A str_matches_pattern() 0 3 1
C str_titleize() 0 69 12
A str_split_array() 0 17 2
A ws() 0 3 1
A str_replace_first() 0 20 2
A str_pad_right() 0 12 1
B ucwords() 0 51 9
A to_boolean() 0 35 5
C stristr() 0 79 17
B strwidth() 0 43 8
A trim() 0 26 5
A str_upper_camelize() 0 8 1
A substr_compare() 0 33 6
D substr_count() 0 73 17
A strnatcmp() 0 9 2
D str_pad() 0 146 16
A urldecode_unicode_helper() 0 12 3
A str_ireplace() 0 31 5
A str_replace_ending() 0 24 6
A string_has_bom() 0 9 3
B strtr() 0 41 11
B str_contains_all() 0 22 9
A normalize_line_ending() 0 3 1
A str_isubstr_after_last_separator() 0 26 5
D range() 0 71 23
B strspn() 0 30 10
A strcasecmp() 0 21 1
A rawurldecode() 0 35 4
B str_capitalize_name_helper() 0 86 10
A utf8_encode() 0 14 3
A normalize_msword() 0 3 1
C str_detect_encoding() 0 111 13
A spaces_to_tabs() 0 11 3
A str_istarts_with() 0 11 3
A str_replace() 0 18 1
A substr_iright() 0 15 4
A replace() 0 11 2
A to_iso8859() 0 16 4
A words_limit() 0 20 5
A strip_tags() 0 18 4
A pcre_utf8_support() 0 4 1
B between() 0 48 8
A str_isubstr_before_last_separator() 0 24 6
D str_truncate_safe() 0 86 18
A substr_right() 0 31 6
D str_split() 0 134 29
A str_ends_with_any() 0 13 4
A strrpos_in_byte() 0 12 4
F strrpos() 0 136 31
A remove_html_breaks() 0 3 1
A showSupport() 0 16 3
A remove_invisible_characters() 0 11 1
A single_chr_html_encode() 0 18 4
A str_replace_last() 0 19 2
A str_substr_before_last_separator() 0 31 6
B strtocasefold() 0 33 7
A lcfirst() 0 44 5
A tabs_to_spaces() 0 11 3
B str_truncate() 0 43 7
F strripos() 0 113 25
A strpos_in_byte() 0 12 4
A str_ends_with() 0 16 4
A to_ascii() 0 6 1
A reduce_string_array() 0 29 6
B str_longest_common_prefix() 0 51 8
A mbstring_overloaded() 0 12 2
A str_pad_left() 0 12 1
A str_substr_first() 0 33 6
A remove_bom() 0 22 5
A str_repeat() 0 5 1
A strpbrk() 0 11 4
D to_utf8_string() 0 110 33
A whitespace_table() 0 3 1
A substr_count_simple() 0 31 6
A str_iends_with_any() 0 13 4
A str_substr_after_last_separator() 0 31 6
A str_isubstr_after_first_separator() 0 26 5
B str_snakeize() 0 57 6
A str_sort() 0 16 3
A to_utf8() 0 15 3
A str_underscored() 0 3 1
A str_offset_get() 0 14 4
A strip_whitespace() 0 7 2
A str_capitalize_name() 0 8 1
B str_limit_after_word() 0 51 11
A swapCase() 0 17 4
A substr_ileft() 0 15 4
A str_dasherize() 0 3 1
A str_ensure_left() 0 11 3
C ord() 0 68 16
B to_string() 0 27 8
A strtonatfold() 0 11 2
C strcspn() 0 48 12
A fixStrCaseHelper() 0 41 5
C str_split_pattern() 0 54 13
D strstr() 0 107 21
A str_isubstr_first() 0 25 4
A str_last_char() 0 16 4
A str_ireplace_beginning() 0 22 6
A str_substr_before_first_separator() 0 32 6
F substr() 0 137 31
A wordwrap_per_line() 0 28 5
A str_surround() 0 3 1
A strncmp() 0 19 4
A str_insert() 0 28 4
A replace_diamond_question_mark() 0 43 5
D is_utf8_string() 0 133 28
B str_delimit() 0 31 8
A to_utf8_convert_helper() 0 28 5
B strtoupper() 0 58 10
A min() 0 14 3
A str_starts_with_any() 0 17 5
B strrichr() 0 54 11
A str_istarts_with_any() 0 17 5
B str_contains_any() 0 28 8
A initEmojiData() 0 29 4
A remove_duplicates() 0 16 4
B str_slice() 0 33 10
F strpos() 0 151 33
A str_shuffle() 0 33 6
A strcmp() 0 11 2
A str_word_count() 0 23 5
A strripos_in_byte() 0 12 4
A str_to_binary() 0 10 2
A symfony_polyfill_used() 0 16 5
B str_to_words() 0 36 8

How to fix   Complexity   

Complex Class

Complex classes like UTF8 often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.

Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.

While breaking up the class, it is a good idea to analyze how other classes use UTF8, and based on these observations, apply Extract Interface, too.

1
<?php
2
3
declare(strict_types=1);
4
5
namespace voku\helper;
6
7
/**
8
 * @psalm-immutable
9
 */
10
final class UTF8
11
{
12
    /**
13
     * Bom => Byte-Length
14
     *
15
     * INFO: https://en.wikipedia.org/wiki/Byte_order_mark
16
     *
17
     * @var array<string, int>
18
     */
19
    private static $BOM = [
20
        "\xef\xbb\xbf"     => 3, // UTF-8 BOM
21
        ''              => 6, // UTF-8 BOM as "WINDOWS-1252" (one char has [maybe] more then one byte ...)
22
        "\x00\x00\xfe\xff" => 4, // UTF-32 (BE) BOM
23
        '  þÿ'             => 6, // UTF-32 (BE) BOM as "WINDOWS-1252"
24
        "\xff\xfe\x00\x00" => 4, // UTF-32 (LE) BOM
25
        'ÿþ  '             => 6, // UTF-32 (LE) BOM as "WINDOWS-1252"
26
        "\xfe\xff"         => 2, // UTF-16 (BE) BOM
27
        'þÿ'               => 4, // UTF-16 (BE) BOM as "WINDOWS-1252"
28
        "\xff\xfe"         => 2, // UTF-16 (LE) BOM
29
        'ÿþ'               => 4, // UTF-16 (LE) BOM as "WINDOWS-1252"
30
    ];
31
32
    /**
33
     * Numeric code point => UTF-8 Character
34
     *
35
     * url: http://www.w3schools.com/charsets/ref_utf_punctuation.asp
36
     *
37
     * @var array<int, string>
38
     */
39
    private static $WHITESPACE = [
40
        // NULL Byte
41
        0 => "\x0",
42
        // Tab
43
        9 => "\x9",
44
        // New Line
45
        10 => "\xa",
46
        // Vertical Tab
47
        11 => "\xb",
48
        // Carriage Return
49
        13 => "\xd",
50
        // Ordinary Space
51
        32 => "\x20",
52
        // NO-BREAK SPACE
53
        160 => "\xc2\xa0",
54
        // OGHAM SPACE MARK
55
        5760 => "\xe1\x9a\x80",
56
        // MONGOLIAN VOWEL SEPARATOR
57
        6158 => "\xe1\xa0\x8e",
58
        // EN QUAD
59
        8192 => "\xe2\x80\x80",
60
        // EM QUAD
61
        8193 => "\xe2\x80\x81",
62
        // EN SPACE
63
        8194 => "\xe2\x80\x82",
64
        // EM SPACE
65
        8195 => "\xe2\x80\x83",
66
        // THREE-PER-EM SPACE
67
        8196 => "\xe2\x80\x84",
68
        // FOUR-PER-EM SPACE
69
        8197 => "\xe2\x80\x85",
70
        // SIX-PER-EM SPACE
71
        8198 => "\xe2\x80\x86",
72
        // FIGURE SPACE
73
        8199 => "\xe2\x80\x87",
74
        // PUNCTUATION SPACE
75
        8200 => "\xe2\x80\x88",
76
        // THIN SPACE
77
        8201 => "\xe2\x80\x89",
78
        // HAIR SPACE
79
        8202 => "\xe2\x80\x8a",
80
        // LINE SEPARATOR
81
        8232 => "\xe2\x80\xa8",
82
        // PARAGRAPH SEPARATOR
83
        8233 => "\xe2\x80\xa9",
84
        // NARROW NO-BREAK SPACE
85
        8239 => "\xe2\x80\xaf",
86
        // MEDIUM MATHEMATICAL SPACE
87
        8287 => "\xe2\x81\x9f",
88
        // HALFWIDTH HANGUL FILLER
89
        65440 => "\xef\xbe\xa0",
90
        // IDEOGRAPHIC SPACE
91
        12288 => "\xe3\x80\x80",
92
    ];
93
94
    /**
95
     * @var array<string, string>
96
     */
97
    private static $WHITESPACE_TABLE = [
98
        'SPACE'                     => "\x20",
99
        'NO-BREAK SPACE'            => "\xc2\xa0",
100
        'OGHAM SPACE MARK'          => "\xe1\x9a\x80",
101
        'EN QUAD'                   => "\xe2\x80\x80",
102
        'EM QUAD'                   => "\xe2\x80\x81",
103
        'EN SPACE'                  => "\xe2\x80\x82",
104
        'EM SPACE'                  => "\xe2\x80\x83",
105
        'THREE-PER-EM SPACE'        => "\xe2\x80\x84",
106
        'FOUR-PER-EM SPACE'         => "\xe2\x80\x85",
107
        'SIX-PER-EM SPACE'          => "\xe2\x80\x86",
108
        'FIGURE SPACE'              => "\xe2\x80\x87",
109
        'PUNCTUATION SPACE'         => "\xe2\x80\x88",
110
        'THIN SPACE'                => "\xe2\x80\x89",
111
        'HAIR SPACE'                => "\xe2\x80\x8a",
112
        'LINE SEPARATOR'            => "\xe2\x80\xa8",
113
        'PARAGRAPH SEPARATOR'       => "\xe2\x80\xa9",
114
        'ZERO WIDTH SPACE'          => "\xe2\x80\x8b",
115
        'NARROW NO-BREAK SPACE'     => "\xe2\x80\xaf",
116
        'MEDIUM MATHEMATICAL SPACE' => "\xe2\x81\x9f",
117
        'IDEOGRAPHIC SPACE'         => "\xe3\x80\x80",
118
        'HALFWIDTH HANGUL FILLER'   => "\xef\xbe\xa0",
119
    ];
120
121
    /**
122
     * @var array
123
     *
124
     * @phpstan-var array{upper: string[], lower: string[]}
125
     */
126
    private static $COMMON_CASE_FOLD = [
127
        'upper' => [
128
            'µ',
129
            'ſ',
130
            "\xCD\x85",
131
            'ς',
132
            'ẞ',
133
            "\xCF\x90",
134
            "\xCF\x91",
135
            "\xCF\x95",
136
            "\xCF\x96",
137
            "\xCF\xB0",
138
            "\xCF\xB1",
139
            "\xCF\xB5",
140
            "\xE1\xBA\x9B",
141
            "\xE1\xBE\xBE",
142
        ],
143
        'lower' => [
144
            'μ',
145
            's',
146
            'ι',
147
            'σ',
148
            'ß',
149
            'β',
150
            'θ',
151
            'φ',
152
            'π',
153
            'κ',
154
            'ρ',
155
            'ε',
156
            "\xE1\xB9\xA1",
157
            'ι',
158
        ],
159
    ];
160
161
    /**
162
     * @var array
163
     *
164
     * @phpstan-var array<string, mixed>
165
     */
166
    private static $SUPPORT = [];
167
168
    /**
169
     * @var string[]|null
170
     *
171
     * @phpstan-var array<string, string>|null
172
     */
173
    private static $BROKEN_UTF8_FIX;
174
175
    /**
176
     * @var string[]|null
177
     *
178
     * @phpstan-var array<int, string>|null
179
     */
180
    private static $WIN1252_TO_UTF8;
181
182
    /**
183
     * @var string[]|null
184
     *
185
     * @phpstan-var array<int ,string>|null
186
     */
187
    private static $INTL_TRANSLITERATOR_LIST;
188
189
    /**
190
     * @var string[]|null
191
     *
192
     * @phpstan-var array<string>|null
193
     */
194
    private static $ENCODINGS;
195
196
    /**
197
     * @var int[]|null
198
     *
199
     * @phpstan-var array<string ,int>|null
200
     */
201
    private static $ORD;
202
203
    /**
204
     * @var string[]|null
205
     *
206
     * @phpstan-var array<string, string>|null
207
     */
208
    private static $EMOJI;
209
210
    /**
211
     * @var string[]|null
212
     *
213
     * @phpstan-var array<string>|null
214
     */
215
    private static $EMOJI_VALUES_CACHE;
216
217
    /**
218
     * @var string[]|null
219
     *
220
     * @phpstan-var array<string>|null
221
     */
222
    private static $EMOJI_KEYS_CACHE;
223
224
    /**
225
     * @var string[]|null
226
     *
227
     * @phpstan-var array<string>|null
228
     */
229
    private static $EMOJI_KEYS_REVERSIBLE_CACHE;
230
231
    /**
232
     * @var string[]|null
233
     *
234
     * @phpstan-var array<int, string>|null
235
     */
236
    private static $CHR;
237
238
    /**
239
     * __construct()
240
     */
241 34
    public function __construct()
242
    {
243 34
    }
244
245
    /**
246
     * Return the character at the specified position: $str[1] like functionality.
247
     *
248
     * EXAMPLE: <code>UTF8::access('fòô', 1); // 'ò'</code>
249
     *
250
     * @param string $str      <p>A UTF-8 string.</p>
251
     * @param int    $pos      <p>The position of character to return.</p>
252
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
253
     *
254
     * @psalm-pure
255
     *
256
     * @return string
257
     *                <p>Single multi-byte character.</p>
258
     */
259 3
    public static function access(string $str, int $pos, string $encoding = 'UTF-8'): string
260
    {
261 3
        if ($str === '' || $pos < 0) {
262 2
            return '';
263
        }
264
265 3
        if ($encoding === 'UTF-8') {
266 3
            return (string) \mb_substr($str, $pos, 1);
267
        }
268
269
        return (string) self::substr($str, $pos, 1, $encoding);
270
    }
271
272
    /**
273
     * Prepends UTF-8 BOM character to the string and returns the whole string.
274
     *
275
     * INFO: If BOM already existed there, the Input string is returned.
276
     *
277
     * EXAMPLE: <code>UTF8::add_bom_to_string('fòô'); // "\xEF\xBB\xBF" . 'fòô'</code>
278
     *
279
     * @param string $str <p>The input string.</p>
280
     *
281
     * @psalm-pure
282
     *
283
     * @return string
284
     *                <p>The output string that contains BOM.</p>
285
     */
286 2
    public static function add_bom_to_string(string $str): string
287
    {
288 2
        if (!self::string_has_bom($str)) {
289 2
            $str = self::bom() . $str;
290
        }
291
292 2
        return $str;
293
    }
294
295
    /**
296
     * Changes all keys in an array.
297
     *
298
     * @param array<string, mixed> $array    <p>The array to work on</p>
299
     * @param int                  $case     [optional] <p> Either <strong>CASE_UPPER</strong><br>
300
     *                                       or <strong>CASE_LOWER</strong> (default)</p>
301
     * @param string               $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
302
     *
303
     * @psalm-pure
304
     *
305
     * @return string[]
306
     *                  <p>An array with its keys lower- or uppercased.</p>
307
     */
308 2
    public static function array_change_key_case(
309
        array $array,
310
        int $case = \CASE_LOWER,
311
        string $encoding = 'UTF-8'
312
    ): array {
313
        if (
314 2
            $case !== \CASE_LOWER
315
            &&
316 2
            $case !== \CASE_UPPER
317
        ) {
318
            $case = \CASE_LOWER;
319
        }
320
321 2
        $return = [];
322 2
        foreach ($array as $key => &$value) {
323 2
            $key = $case === \CASE_LOWER
324 2
                ? self::strtolower($key, $encoding)
325 2
                : self::strtoupper($key, $encoding);
326
327 2
            $return[$key] = $value;
328
        }
329
330 2
        return $return;
331
    }
332
333
    /**
334
     * Returns the substring between $start and $end, if found, or an empty
335
     * string. An optional offset may be supplied from which to begin the
336
     * search for the start string.
337
     *
338
     * @param string $str
339
     * @param string $start    <p>Delimiter marking the start of the substring.</p>
340
     * @param string $end      <p>Delimiter marking the end of the substring.</p>
341
     * @param int    $offset   [optional] <p>Index from which to begin the search. Default: 0</p>
342
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
343
     *
344
     * @psalm-pure
345
     *
346
     * @return string
347
     */
348 16
    public static function between(
349
        string $str,
350
        string $start,
351
        string $end,
352
        int $offset = 0,
353
        string $encoding = 'UTF-8'
354
    ): string {
355 16
        if ($encoding === 'UTF-8') {
356 8
            $start_position = \mb_strpos($str, $start, $offset);
357 8
            if ($start_position === false) {
358 1
                return '';
359
            }
360
361 7
            $substr_index = $start_position + (int) \mb_strlen($start);
362 7
            $end_position = \mb_strpos($str, $end, $substr_index);
363
            if (
364 7
                $end_position === false
365
                ||
366 7
                $end_position === $substr_index
367
            ) {
368 2
                return '';
369
            }
370
371 5
            return (string) \mb_substr($str, $substr_index, $end_position - $substr_index);
372
        }
373
374 8
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
375
376 8
        $start_position = self::strpos($str, $start, $offset, $encoding);
377 8
        if ($start_position === false) {
378 1
            return '';
379
        }
380
381 7
        $substr_index = $start_position + (int) self::strlen($start, $encoding);
382 7
        $end_position = self::strpos($str, $end, $substr_index, $encoding);
383
        if (
384 7
            $end_position === false
385
            ||
386 7
            $end_position === $substr_index
387
        ) {
388 2
            return '';
389
        }
390
391 5
        return (string) self::substr(
392 5
            $str,
393 5
            $substr_index,
394 5
            $end_position - $substr_index,
395 5
            $encoding
396
        );
397
    }
398
399
    /**
400
     * Convert binary into a string.
401
     *
402
     * INFO: opposite to UTF8::str_to_binary()
403
     *
404
     * EXAMPLE: <code>UTF8::binary_to_str('11110000100111111001100010000011'); // '😃'</code>
405
     *
406
     * @param string $bin 1|0
407
     *
408
     * @psalm-pure
409
     *
410
     * @return string
411
     */
412 2
    public static function binary_to_str($bin): string
413
    {
414 2
        if (!isset($bin[0])) {
415
            return '';
416
        }
417
418 2
        $convert = \base_convert($bin, 2, 16);
419 2
        if ($convert === '0') {
420 1
            return '';
421
        }
422
423 2
        return \pack('H*', $convert);
424
    }
425
426
    /**
427
     * Returns the UTF-8 Byte Order Mark Character.
428
     *
429
     * INFO: take a look at UTF8::$bom for e.g. UTF-16 and UTF-32 BOM values
430
     *
431
     * EXAMPLE: <code>UTF8::bom(); // "\xEF\xBB\xBF"</code>
432
     *
433
     * @psalm-pure
434
     *
435
     * @return string
436
     *                <p>UTF-8 Byte Order Mark.</p>
437
     */
438 4
    public static function bom(): string
439
    {
440 4
        return "\xef\xbb\xbf";
441
    }
442
443
    /**
444
     * @alias of UTF8::chr_map()
445
     *
446
     * @param callable $callback
447
     * @param string   $str
448
     *
449
     * @psalm-pure
450
     *
451
     * @return string[]
452
     *
453
     * @see   UTF8::chr_map()
454
     */
455 2
    public static function callback($callback, string $str): array
456
    {
457 2
        return self::chr_map($callback, $str);
458
    }
459
460
    /**
461
     * Returns the character at $index, with indexes starting at 0.
462
     *
463
     * @param string $str      <p>The input string.</p>
464
     * @param int    $index    <p>Position of the character.</p>
465
     * @param string $encoding [optional] <p>Default is UTF-8</p>
466
     *
467
     * @psalm-pure
468
     *
469
     * @return string
470
     *                <p>The character at $index.</p>
471
     */
472 9
    public static function char_at(string $str, int $index, string $encoding = 'UTF-8'): string
473
    {
474 9
        if ($encoding === 'UTF-8') {
475 5
            return (string) \mb_substr($str, $index, 1);
476
        }
477
478 4
        return (string) self::substr($str, $index, 1, $encoding);
479
    }
480
481
    /**
482
     * Returns an array consisting of the characters in the string.
483
     *
484
     * @param string $str <p>The input string.</p>
485
     *
486
     * @psalm-pure
487
     *
488
     * @return string[]
489
     *                  <p>An array of chars.</p>
490
     */
491 4
    public static function chars(string $str): array
492
    {
493
        /** @var string[] */
494 4
        return self::str_split($str);
0 ignored issues
show
Bug Best Practice introduced by
The expression return self::str_split($str) returns an array which contains values of type string[] which are incompatible with the documented value type string.
Loading history...
495
    }
496
497
    /**
498
     * This method will auto-detect your server environment for UTF-8 support.
499
     *
500
     * @return true|null
501
     *
502
     * @internal <p>You don't need to run it manually, it will be triggered if it's needed.</p>
503
     */
504 4
    public static function checkForSupport()
505
    {
506 4
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
507
            self::$SUPPORT['already_checked_via_portable_utf8'] = true;
508
509
            // http://php.net/manual/en/book.mbstring.php
510
            self::$SUPPORT['mbstring'] = self::mbstring_loaded();
511
512
            self::$SUPPORT['mbstring_func_overload'] = self::mbstring_overloaded();
513
            if (self::$SUPPORT['mbstring'] === true) {
514
                \mb_internal_encoding('UTF-8');
515
                \mb_regex_encoding('UTF-8');
516
                self::$SUPPORT['mbstring_internal_encoding'] = 'UTF-8';
517
            }
518
519
            // http://php.net/manual/en/book.iconv.php
520
            self::$SUPPORT['iconv'] = self::iconv_loaded();
521
522
            // http://php.net/manual/en/book.intl.php
523
            self::$SUPPORT['intl'] = self::intl_loaded();
524
525
            // http://php.net/manual/en/class.intlchar.php
526
            self::$SUPPORT['intlChar'] = self::intlChar_loaded();
527
528
            // http://php.net/manual/en/book.ctype.php
529
            self::$SUPPORT['ctype'] = self::ctype_loaded();
530
531
            // http://php.net/manual/en/class.finfo.php
532
            self::$SUPPORT['finfo'] = self::finfo_loaded();
533
534
            // http://php.net/manual/en/book.json.php
535
            self::$SUPPORT['json'] = self::json_loaded();
536
537
            // http://php.net/manual/en/book.pcre.php
538
            self::$SUPPORT['pcre_utf8'] = self::pcre_utf8_support();
539
540
            self::$SUPPORT['symfony_polyfill_used'] = self::symfony_polyfill_used();
541
            if (self::$SUPPORT['symfony_polyfill_used'] === true) {
542
                \mb_internal_encoding('UTF-8');
543
                self::$SUPPORT['mbstring_internal_encoding'] = 'UTF-8';
544
            }
545
546
            return true;
547
        }
548
549 4
        return null;
550
    }
551
552
    /**
553
     * Generates a UTF-8 encoded character from the given code point.
554
     *
555
     * INFO: opposite to UTF8::ord()
556
     *
557
     * EXAMPLE: <code>UTF8::chr(0x2603); // '☃'</code>
558
     *
559
     * @param int    $code_point <p>The code point for which to generate a character.</p>
560
     * @param string $encoding   [optional] <p>Default is UTF-8</p>
561
     *
562
     * @psalm-pure
563
     *
564
     * @return string|null
565
     *                     <p>Multi-byte character, returns null on failure or empty input.</p>
566
     */
567 21
    public static function chr($code_point, string $encoding = 'UTF-8')
568
    {
569
        // init
570
        /**
571
         * @psalm-suppress ImpureStaticVariable
572
         *
573
         * @var array<string,string>
574
         */
575 21
        static $CHAR_CACHE = [];
576
577 21
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
578 5
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
579
        }
580
581
        if (
582 21
            $encoding !== 'UTF-8'
583
            &&
584 21
            $encoding !== 'ISO-8859-1'
585
            &&
586 21
            $encoding !== 'WINDOWS-1252'
587
            &&
588 21
            self::$SUPPORT['mbstring'] === false
589
        ) {
590
            /**
591
             * @psalm-suppress ImpureFunctionCall - is is only a warning
592
             */
593
            \trigger_error('UTF8::chr() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
594
        }
595
596 21
        if (!\is_int($code_point) || $code_point <= 0) {
0 ignored issues
show
introduced by
The condition is_int($code_point) is always true.
Loading history...
597 5
            return null;
598
        }
599
600 21
        $cache_key = $code_point . '_' . $encoding;
601 21
        if (isset($CHAR_CACHE[$cache_key])) {
602 19
            return $CHAR_CACHE[$cache_key];
603
        }
604
605 10
        if ($code_point <= 0x80) { // only for "simple"-chars
606
607 9
            if (self::$CHR === null) {
608 1
                self::$CHR = self::getData('chr');
609
            }
610
611
            /**
612
             * @psalm-suppress PossiblyNullArrayAccess
613
             */
614 9
            $chr = self::$CHR[$code_point];
615
616 9
            if ($encoding !== 'UTF-8') {
617 1
                $chr = self::encode($encoding, $chr);
618
            }
619
620 9
            return $CHAR_CACHE[$cache_key] = $chr;
621
        }
622
623
        //
624
        // fallback via "IntlChar"
625
        //
626
627 6
        if (self::$SUPPORT['intlChar'] === true) {
628 6
            $chr = \IntlChar::chr($code_point);
629
630 6
            if ($encoding !== 'UTF-8') {
631
                $chr = self::encode($encoding, $chr);
632
            }
633
634 6
            return $CHAR_CACHE[$cache_key] = $chr;
635
        }
636
637
        //
638
        // fallback via vanilla php
639
        //
640
641
        if (self::$CHR === null) {
642
            self::$CHR = self::getData('chr');
643
        }
644
645
        $code_point = (int) $code_point;
646
        if ($code_point <= 0x7FF) {
647
            /**
648
             * @psalm-suppress PossiblyNullArrayAccess
649
             */
650
            $chr = self::$CHR[($code_point >> 6) + 0xC0] .
651
                   self::$CHR[($code_point & 0x3F) + 0x80];
652
        } elseif ($code_point <= 0xFFFF) {
653
            /**
654
             * @psalm-suppress PossiblyNullArrayAccess
655
             */
656
            $chr = self::$CHR[($code_point >> 12) + 0xE0] .
657
                   self::$CHR[(($code_point >> 6) & 0x3F) + 0x80] .
658
                   self::$CHR[($code_point & 0x3F) + 0x80];
659
        } else {
660
            /**
661
             * @psalm-suppress PossiblyNullArrayAccess
662
             */
663
            $chr = self::$CHR[($code_point >> 18) + 0xF0] .
664
                   self::$CHR[(($code_point >> 12) & 0x3F) + 0x80] .
665
                   self::$CHR[(($code_point >> 6) & 0x3F) + 0x80] .
666
                   self::$CHR[($code_point & 0x3F) + 0x80];
667
        }
668
669
        if ($encoding !== 'UTF-8') {
670
            $chr = self::encode($encoding, $chr);
671
        }
672
673
        return $CHAR_CACHE[$cache_key] = $chr;
674
    }
675
676
    /**
677
     * Applies callback to all characters of a string.
678
     *
679
     * EXAMPLE: <code>UTF8::chr_map([UTF8::class, 'strtolower'], 'Κόσμε'); // ['κ','ό', 'σ', 'μ', 'ε']</code>
680
     *
681
     * @param callable $callback <p>The callback function.</p>
682
     * @param string   $str      <p>UTF-8 string to run callback on.</p>
683
     *
684
     * @psalm-pure
685
     *
686
     * @return string[]
687
     *                  <p>The outcome of the callback, as array.</p>
688
     */
689 2
    public static function chr_map($callback, string $str): array
690
    {
691 2
        return \array_map(
692 2
            $callback,
693 2
            self::str_split($str)
694
        );
695
    }
696
697
    /**
698
     * Generates an array of byte length of each character of a Unicode string.
699
     *
700
     * 1 byte => U+0000  - U+007F
701
     * 2 byte => U+0080  - U+07FF
702
     * 3 byte => U+0800  - U+FFFF
703
     * 4 byte => U+10000 - U+10FFFF
704
     *
705
     * EXAMPLE: <code>UTF8::chr_size_list('中文空白-test'); // [3, 3, 3, 3, 1, 1, 1, 1, 1]</code>
706
     *
707
     * @param string $str <p>The original unicode string.</p>
708
     *
709
     * @psalm-pure
710
     *
711
     * @return int[]
712
     *               <p>An array of byte lengths of each character.</p>
713
     */
714 4
    public static function chr_size_list(string $str): array
715
    {
716 4
        if ($str === '') {
717 4
            return [];
718
        }
719
720 4
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
721
            return \array_map(
722
                static function (string $data): int {
723
                    // "mb_" is available if overload is used, so use it ...
724
                    return \mb_strlen($data, 'CP850'); // 8-BIT
725
                },
726
                self::str_split($str)
727
            );
728
        }
729
730 4
        return \array_map('\strlen', self::str_split($str));
731
    }
732
733
    /**
734
     * Get a decimal code representation of a specific character.
735
     *
736
     * INFO: opposite to UTF8::decimal_to_chr()
737
     *
738
     * EXAMPLE: <code>UTF8::chr_to_decimal('§'); // 0xa7</code>
739
     *
740
     * @param string $char <p>The input character.</p>
741
     *
742
     * @psalm-pure
743
     *
744
     * @return int
745
     */
746 5
    public static function chr_to_decimal(string $char): int
747
    {
748 5
        if (self::$SUPPORT['iconv'] === true) {
749 5
            $chr_tmp = \iconv('UTF-8', 'UCS-4LE', $char);
750 5
            if ($chr_tmp !== false) {
751
                /** @phpstan-ignore-next-line - "unpack": only false if the format string contains errors */
752 5
                return \unpack('V', $chr_tmp)[1];
753
            }
754
        }
755
756
        $code = self::ord($char[0]);
757
        $bytes = 1;
758
759
        if (!($code & 0x80)) {
760
            // 0xxxxxxx
761
            return $code;
762
        }
763
764
        if (($code & 0xe0) === 0xc0) {
765
            // 110xxxxx
766
            $bytes = 2;
767
            $code &= ~0xc0;
768
        } elseif (($code & 0xf0) === 0xe0) {
769
            // 1110xxxx
770
            $bytes = 3;
771
            $code &= ~0xe0;
772
        } elseif (($code & 0xf8) === 0xf0) {
773
            // 11110xxx
774
            $bytes = 4;
775
            $code &= ~0xf0;
776
        }
777
778
        for ($i = 2; $i <= $bytes; ++$i) {
779
            // 10xxxxxx
780
            $code = ($code << 6) + (self::ord($char[$i - 1]) & ~0x80);
781
        }
782
783
        return $code;
784
    }
785
786
    /**
787
     * Get hexadecimal code point (U+xxxx) of a UTF-8 encoded character.
788
     *
789
     * EXAMPLE: <code>UTF8::chr_to_hex('§'); // U+00a7</code>
790
     *
791
     * @param int|string $char   <p>The input character</p>
792
     * @param string     $prefix [optional]
793
     *
794
     * @psalm-pure
795
     *
796
     * @return string
797
     *                <p>The code point encoded as U+xxxx.</p>
798
     */
799 2
    public static function chr_to_hex($char, string $prefix = 'U+'): string
800
    {
801 2
        if ($char === '') {
802 2
            return '';
803
        }
804
805 2
        if ($char === '&#0;') {
806 2
            $char = '';
807
        }
808
809 2
        return self::int_to_hex(self::ord((string) $char), $prefix);
810
    }
811
812
    /**
813
     * Splits a string into smaller chunks and multiple lines, using the specified line ending character.
814
     *
815
     * EXAMPLE: <code>UTF8::chunk_split('ABC-ÖÄÜ-中文空白-κόσμε', 3); // "ABC\r\n-ÖÄ\r\nÜ-中\r\n文空白\r\n-κό\r\nσμε"</code>
816
     *
817
     * @param string $body         <p>The original string to be split.</p>
818
     * @param int    $chunk_length [optional] <p>The maximum character length of a chunk.</p>
819
     * @param string $end          [optional] <p>The character(s) to be inserted at the end of each chunk.</p>
820
     *
821
     * @psalm-pure
822
     *
823
     * @return string
824
     *                <p>The chunked string.</p>
825
     */
826 4
    public static function chunk_split(string $body, int $chunk_length = 76, string $end = "\r\n"): string
827
    {
828 4
        return \implode($end, self::str_split($body, $chunk_length));
829
    }
830
831
    /**
832
     * Accepts a string and removes all non-UTF-8 characters from it + extras if needed.
833
     *
834
     * EXAMPLE: <code>UTF8::clean("\xEF\xBB\xBF„Abcdef\xc2\xa0\x20…” — 😃 - Düsseldorf", true, true); // '„Abcdef  …” — 😃 - Düsseldorf'</code>
835
     *
836
     * @param string $str                                     <p>The string to be sanitized.</p>
837
     * @param bool   $remove_bom                              [optional] <p>Set to true, if you need to remove
838
     *                                                        UTF-BOM.</p>
839
     * @param bool   $normalize_whitespace                    [optional] <p>Set to true, if you need to normalize the
840
     *                                                        whitespace.</p>
841
     * @param bool   $normalize_msword                        [optional] <p>Set to true, if you need to normalize MS
842
     *                                                        Word chars e.g.: "…"
843
     *                                                        => "..."</p>
844
     * @param bool   $keep_non_breaking_space                 [optional] <p>Set to true, to keep non-breaking-spaces,
845
     *                                                        in
846
     *                                                        combination with
847
     *                                                        $normalize_whitespace</p>
848
     * @param bool   $replace_diamond_question_mark           [optional] <p>Set to true, if you need to remove diamond
849
     *                                                        question mark e.g.: "�"</p>
850
     * @param bool   $remove_invisible_characters             [optional] <p>Set to false, if you not want to remove
851
     *                                                        invisible characters e.g.: "\0"</p>
852
     * @param bool   $remove_invisible_characters_url_encoded [optional] <p>Set to true, if you not want to remove
853
     *                                                        invisible url encoded characters e.g.: "%0B"<br> WARNING:
854
     *                                                        maybe contains false-positives e.g. aa%0Baa -> aaaa.
855
     *                                                        </p>
856
     *
857
     * @psalm-pure
858
     *
859
     * @return string
860
     *                <p>An clean UTF-8 encoded string.</p>
861
     */
862 90
    public static function clean(
863
        string $str,
864
        bool $remove_bom = false,
865
        bool $normalize_whitespace = false,
866
        bool $normalize_msword = false,
867
        bool $keep_non_breaking_space = false,
868
        bool $replace_diamond_question_mark = false,
869
        bool $remove_invisible_characters = true,
870
        bool $remove_invisible_characters_url_encoded = false
871
    ): string {
872
        // http://stackoverflow.com/questions/1401317/remove-non-utf8-characters-from-string
873
        // caused connection reset problem on larger strings
874
875 90
        $regex = '/
876
          (
877
            (?: [\x00-\x7F]               # single-byte sequences   0xxxxxxx
878
            |   [\xC0-\xDF][\x80-\xBF]    # double-byte sequences   110xxxxx 10xxxxxx
879
            |   [\xE0-\xEF][\x80-\xBF]{2} # triple-byte sequences   1110xxxx 10xxxxxx * 2
880
            |   [\xF0-\xF7][\x80-\xBF]{3} # quadruple-byte sequence 11110xxx 10xxxxxx * 3
881
            ){1,100}                      # ...one or more times
882
          )
883
        | ( [\x80-\xBF] )                 # invalid byte in range 10000000 - 10111111
884
        | ( [\xC0-\xFF] )                 # invalid byte in range 11000000 - 11111111
885
        /x';
886 90
        $str = (string) \preg_replace($regex, '$1', $str);
887
888 90
        if ($replace_diamond_question_mark) {
889 33
            $str = self::replace_diamond_question_mark($str);
890
        }
891
892 90
        if ($remove_invisible_characters) {
893 90
            $str = self::remove_invisible_characters($str, $remove_invisible_characters_url_encoded);
894
        }
895
896 90
        if ($normalize_whitespace) {
897 37
            $str = self::normalize_whitespace($str, $keep_non_breaking_space);
898
        }
899
900 90
        if ($normalize_msword) {
901 4
            $str = self::normalize_msword($str);
902
        }
903
904 90
        if ($remove_bom) {
905 37
            $str = self::remove_bom($str);
906
        }
907
908 90
        return $str;
909
    }
910
911
    /**
912
     * Clean-up a string and show only printable UTF-8 chars at the end  + fix UTF-8 encoding.
913
     *
914
     * EXAMPLE: <code>UTF8::cleanup("\xEF\xBB\xBF„Abcdef\xc2\xa0\x20…” — 😃 - Düsseldorf", true, true); // '„Abcdef  …” — 😃 - Düsseldorf'</code>
915
     *
916
     * @param string $str <p>The input string.</p>
917
     *
918
     * @psalm-pure
919
     *
920
     * @return string
921
     */
922 33
    public static function cleanup($str): string
923
    {
924
        // init
925 33
        $str = (string) $str;
926
927 33
        if ($str === '') {
928 5
            return '';
929
        }
930
931
        // fixed ISO <-> UTF-8 Errors
932 33
        $str = self::fix_simple_utf8($str);
933
934
        // remove all none UTF-8 symbols
935
        // && remove diamond question mark (�)
936
        // && remove remove invisible characters (e.g. "\0")
937
        // && remove BOM
938
        // && normalize whitespace chars (but keep non-breaking-spaces)
939 33
        return self::clean(
940 33
            $str,
941 33
            true,
942 33
            true,
943 33
            false,
944 33
            true,
945 33
            true
946
        );
947
    }
948
949
    /**
950
     * Accepts a string or a array of strings and returns an array of Unicode code points.
951
     *
952
     * INFO: opposite to UTF8::string()
953
     *
954
     * EXAMPLE: <code>
955
     * UTF8::codepoints('κöñ'); // array(954, 246, 241)
956
     * // ... OR ...
957
     * UTF8::codepoints('κöñ', true); // array('U+03ba', 'U+00f6', 'U+00f1')
958
     * </code>
959
     *
960
     * @param string|string[] $arg         <p>A UTF-8 encoded string or an array of such strings.</p>
961
     * @param bool            $use_u_style <p>If True, will return code points in U+xxxx format,
962
     *                                     default, code points will be returned as integers.</p>
963
     *
964
     * @psalm-pure
965
     *
966
     * @return int[]|string[]
967
     *                        <p>
968
     *                        The array of code points:<br>
969
     *                        int[] for $u_style === false<br>
970
     *                        string[] for $u_style === true<br>
971
     *                        </p>
972
     */
973 12
    public static function codepoints($arg, bool $use_u_style = false): array
974
    {
975 12
        if (\is_string($arg)) {
976 12
            $arg = self::str_split($arg);
977
        }
978
979
        /**
980
         * @psalm-suppress DocblockTypeContradiction
981
         */
982 12
        if (!\is_array($arg)) {
0 ignored issues
show
introduced by
The condition is_array($arg) is always true.
Loading history...
983 4
            return [];
984
        }
985
986 12
        if ($arg === []) {
987 7
            return [];
988
        }
989
990 11
        $arg = \array_map(
991
            [
992 11
                self::class,
993
                'ord',
994
            ],
995 11
            $arg
996
        );
997
998 11
        if ($use_u_style) {
999 2
            $arg = \array_map(
1000
                [
1001 2
                    self::class,
1002
                    'int_to_hex',
1003
                ],
1004 2
                $arg
1005
            );
1006
        }
1007
1008 11
        return $arg;
1009
    }
1010
1011
    /**
1012
     * Trims the string and replaces consecutive whitespace characters with a
1013
     * single space. This includes tabs and newline characters, as well as
1014
     * multibyte whitespace such as the thin space and ideographic space.
1015
     *
1016
     * @param string $str <p>The input string.</p>
1017
     *
1018
     * @psalm-pure
1019
     *
1020
     * @return string
1021
     *                <p>A string with trimmed $str and condensed whitespace.</p>
1022
     */
1023 13
    public static function collapse_whitespace(string $str): string
1024
    {
1025 13
        if (self::$SUPPORT['mbstring'] === true) {
1026 13
            return \trim((string) \mb_ereg_replace('[[:space:]]+', ' ', $str));
1027
        }
1028
1029
        return \trim(self::regex_replace($str, '[[:space:]]+', ' '));
1030
    }
1031
1032
    /**
1033
     * Returns count of characters used in a string.
1034
     *
1035
     * EXAMPLE: <code>UTF8::count_chars('κaκbκc'); // array('κ' => 3, 'a' => 1, 'b' => 1, 'c' => 1)</code>
1036
     *
1037
     * @param string $str                     <p>The input string.</p>
1038
     * @param bool   $clean_utf8              [optional] <p>Remove non UTF-8 chars from the string.</p>
1039
     * @param bool   $try_to_use_mb_functions [optional] <p>Set to false, if you don't want to use
1040
     *
1041
     * @psalm-pure
1042
     *
1043
     * @return int[]
1044
     *               <p>An associative array of Character as keys and
1045
     *               their count as values.</p>
1046
     */
1047 19
    public static function count_chars(
1048
        string $str,
1049
        bool $clean_utf8 = false,
1050
        bool $try_to_use_mb_functions = true
1051
    ): array {
1052 19
        return \array_count_values(
1053 19
            self::str_split(
1054 19
                $str,
1055 19
                1,
1056 19
                $clean_utf8,
1057 19
                $try_to_use_mb_functions
1058
            )
1059
        );
1060
    }
1061
1062
    /**
1063
     * Create a valid CSS identifier for e.g. "class"- or "id"-attributes.
1064
     *
1065
     * EXAMPLE: <code>UTF8::css_identifier('123foo/bar!!!'); // _23foo-bar</code>
1066
     *
1067
     * copy&past from https://github.com/drupal/core/blob/8.8.x/lib/Drupal/Component/Utility/Html.php#L95
1068
     *
1069
     * @param string   $str        <p>INFO: if no identifier is given e.g. " " or "", we will create a unique string automatically</p>
1070
     * @param string[] $filter
1071
     * @param bool     $strip_tags
1072
     * @param bool     $strtolower
1073
     *
1074
     * @psalm-pure
1075
     *
1076
     * @return string
1077
     *
1078
     * @phpstan-param array<string,string> $filter
1079
     */
1080 1
    public static function css_identifier(
1081
        string $str = '',
1082
        array $filter = [
1083
            ' ' => '-',
1084
            '/' => '-',
1085
            '[' => '',
1086
            ']' => '',
1087
        ],
1088
        bool $strip_tags = false,
1089
        bool $strtolower = true
1090
    ): string {
1091
        // We could also use strtr() here but its much slower than str_replace(). In
1092
        // order to keep '__' to stay '__' we first replace it with a different
1093
        // placeholder after checking that it is not defined as a filter.
1094 1
        $double_underscore_replacements = 0;
1095
1096
        // Fallback ...
1097 1
        if (\trim($str) === '') {
1098 1
            $str = \uniqid('auto-generated-css-class', true);
1099
        } else {
1100 1
            $str = self::clean($str);
1101
        }
1102
1103 1
        if ($strip_tags) {
1104
            $str = \strip_tags($str);
1105
        }
1106
1107 1
        if ($strtolower) {
1108 1
            $str = \strtolower($str);
1109
        }
1110
1111 1
        if (!isset($filter['__'])) {
1112 1
            $str = \str_replace('__', '##', $str, $double_underscore_replacements);
1113
        }
1114
1115 1
        $str = \str_replace(\array_keys($filter), \array_values($filter), $str);
1116
        // Replace temporary placeholder '##' with '__' only if the original
1117
        // $identifier contained '__'.
1118 1
        if ($double_underscore_replacements > 0) {
1119
            $str = \str_replace('##', '__', $str);
1120
        }
1121
1122
        // Valid characters in a CSS identifier are:
1123
        // - the hyphen (U+002D)
1124
        // - a-z (U+0030 - U+0039)
1125
        // - A-Z (U+0041 - U+005A)
1126
        // - the underscore (U+005F)
1127
        // - 0-9 (U+0061 - U+007A)
1128
        // - ISO 10646 characters U+00A1 and higher
1129
        // We strip out any character not in the above list.
1130 1
        $str = (string) \preg_replace('/[^\x{002D}\x{0030}-\x{0039}\x{0041}-\x{005A}\x{005F}\x{0061}-\x{007A}\x{00A1}-\x{FFFF}]/u', '', $str);
1131
        // Identifiers cannot start with a digit, two hyphens, or a hyphen followed by a digit.
1132 1
        $str = (string) \preg_replace(['/^[0-9]/', '/^(-[0-9])|^(--)/'], ['_', '__'], $str);
1133
1134 1
        return \trim($str, '-');
1135
    }
1136
1137
    /**
1138
     * Remove css media-queries.
1139
     *
1140
     * @param string $str
1141
     *
1142
     * @psalm-pure
1143
     *
1144
     * @return string
1145
     */
1146 1
    public static function css_stripe_media_queries(string $str): string
1147
    {
1148 1
        return (string) \preg_replace(
1149 1
            '#@media\\s+(?:only\\s)?(?:[\\s{(]|screen|all)\\s?[^{]+{.*}\\s*}\\s*#isumU',
1150 1
            '',
1151 1
            $str
1152
        );
1153
    }
1154
1155
    /**
1156
     * Checks whether ctype is available on the server.
1157
     *
1158
     * @psalm-pure
1159
     *
1160
     * @return bool
1161
     *              <p><strong>true</strong> if available, <strong>false</strong> otherwise</p>
1162
     *
1163
     * @internal <p>Please do not use it anymore, we will make is private in next major version.</p>
1164
     */
1165
    public static function ctype_loaded(): bool
1166
    {
1167
        return \extension_loaded('ctype');
1168
    }
1169
1170
    /**
1171
     * Converts an int value into a UTF-8 character.
1172
     *
1173
     * INFO: opposite to UTF8::string()
1174
     *
1175
     * EXAMPLE: <code>UTF8::decimal_to_chr(931); // 'Σ'</code>
1176
     *
1177
     * @param int|string $int
1178
     *
1179
     * @phpstan-param int|numeric-string $int
1180
     *
1181
     * @psalm-pure
1182
     *
1183
     * @return string
1184
     */
1185 20
    public static function decimal_to_chr($int): string
1186
    {
1187 20
        return self::html_entity_decode('&#' . $int . ';', \ENT_QUOTES | \ENT_HTML5);
1188
    }
1189
1190
    /**
1191
     * Decodes a MIME header field
1192
     *
1193
     * @param string $str
1194
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
1195
     *
1196
     * @psalm-pure
1197
     *
1198
     * @return false|string
1199
     *                      <p>A decoded MIME field on success,
1200
     *                      or false if an error occurs during the decoding.</p>
1201
     */
1202 2
    public static function decode_mimeheader($str, string $encoding = 'UTF-8')
1203
    {
1204 2
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
1205 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
1206
        }
1207
1208
        // always fallback via symfony polyfill
1209 2
        return \iconv_mime_decode($str, \ICONV_MIME_DECODE_CONTINUE_ON_ERROR, $encoding);
1210
    }
1211
1212
    /**
1213
     * Convert any two-letter country code (ISO 3166-1) to the corresponding Emoji.
1214
     *
1215
     * @see https://en.wikipedia.org/wiki/ISO_3166-1
1216
     *
1217
     * @param string $country_code_iso_3166_1 <p>e.g. DE</p>
1218
     *
1219
     * @return string
1220
     *                <p>Emoji or empty string on error.</p>
1221
     */
1222 1
    public static function emoji_from_country_code(string $country_code_iso_3166_1): string
1223
    {
1224 1
        if ($country_code_iso_3166_1 === '') {
1225 1
            return '';
1226
        }
1227
1228 1
        if (self::strlen($country_code_iso_3166_1) !== 2) {
1229 1
            return '';
1230
        }
1231
1232 1
        $country_code_iso_3166_1 = \strtoupper($country_code_iso_3166_1);
1233
1234 1
        $flagOffset = 0x1F1E6;
1235 1
        $asciiOffset = 0x41;
1236
1237 1
        return (self::chr((self::ord($country_code_iso_3166_1[0]) - $asciiOffset + $flagOffset)) ?? '') .
1238 1
               (self::chr((self::ord($country_code_iso_3166_1[1]) - $asciiOffset + $flagOffset)) ?? '');
1239
    }
1240
1241
    /**
1242
     * Decodes a string which was encoded by "UTF8::emoji_encode()".
1243
     *
1244
     * INFO: opposite to UTF8::emoji_encode()
1245
     *
1246
     * EXAMPLE: <code>
1247
     * UTF8::emoji_decode('foo CHARACTER_OGRE', false); // 'foo 👹'
1248
     * //
1249
     * UTF8::emoji_decode('foo _-_PORTABLE_UTF8_-_308095726_-_627590803_-_8FTU_ELBATROP_-_', true); // 'foo 👹'
1250
     * </code>
1251
     *
1252
     * @param string $str                            <p>The input string.</p>
1253
     * @param bool   $use_reversible_string_mappings [optional] <p>
1254
     *                                               When <b>TRUE</b>, we se a reversible string mapping
1255
     *                                               between "emoji_encode" and "emoji_decode".</p>
1256
     *
1257
     * @psalm-pure
1258
     *
1259
     * @return string
1260
     */
1261 9
    public static function emoji_decode(
1262
        string $str,
1263
        bool $use_reversible_string_mappings = false
1264
    ): string {
1265 9
        if (self::$EMOJI_KEYS_CACHE === null) {
1266
            /** @phpstan-ignore-next-line - we need to load the data first */
1267
            self::initEmojiData();
1268
        }
1269
1270 9
        if ($use_reversible_string_mappings) {
1271 9
            return (string) \str_replace(
1272 9
                (array) self::$EMOJI_KEYS_REVERSIBLE_CACHE,
1273 9
                (array) self::$EMOJI_VALUES_CACHE,
1274 9
                $str
1275
            );
1276
        }
1277
1278 1
        return (string) \str_replace(
1279 1
            (array) self::$EMOJI_KEYS_CACHE,
1280 1
            (array) self::$EMOJI_VALUES_CACHE,
1281 1
            $str
1282
        );
1283
    }
1284
1285
    /**
1286
     * Encode a string with emoji chars into a non-emoji string.
1287
     *
1288
     * INFO: opposite to UTF8::emoji_decode()
1289
     *
1290
     * EXAMPLE: <code>
1291
     * UTF8::emoji_encode('foo 👹', false)); // 'foo CHARACTER_OGRE'
1292
     * //
1293
     * UTF8::emoji_encode('foo 👹', true)); // 'foo _-_PORTABLE_UTF8_-_308095726_-_627590803_-_8FTU_ELBATROP_-_'
1294
     * </code>
1295
     *
1296
     * @param string $str                            <p>The input string</p>
1297
     * @param bool   $use_reversible_string_mappings [optional] <p>
1298
     *                                               when <b>TRUE</b>, we use a reversible string mapping
1299
     *                                               between "emoji_encode" and "emoji_decode"</p>
1300
     *
1301
     * @psalm-pure
1302
     *
1303
     * @return string
1304
     */
1305 12
    public static function emoji_encode(
1306
        string $str,
1307
        bool $use_reversible_string_mappings = false
1308
    ): string {
1309 12
        if (self::$EMOJI_KEYS_CACHE === null) {
1310
            /** @phpstan-ignore-next-line - we need to load the data first */
1311 1
            self::initEmojiData();
1312
        }
1313
1314 12
        if ($use_reversible_string_mappings) {
1315 9
            return (string) \str_replace(
1316 9
                (array) self::$EMOJI_VALUES_CACHE,
1317 9
                (array) self::$EMOJI_KEYS_REVERSIBLE_CACHE,
1318 9
                $str
1319
            );
1320
        }
1321
1322 4
        return (string) \str_replace(
1323 4
            (array) self::$EMOJI_VALUES_CACHE,
1324 4
            (array) self::$EMOJI_KEYS_CACHE,
1325 4
            $str
1326
        );
1327
    }
1328
1329
    /**
1330
     * Encode a string with a new charset-encoding.
1331
     *
1332
     * INFO:  This function will also try to fix broken / double encoding,
1333
     *        so you can call this function also on a UTF-8 string and you don't mess up the string.
1334
     *
1335
     * EXAMPLE: <code>
1336
     * UTF8::encode('ISO-8859-1', '-ABC-中文空白-'); // '-ABC-????-'
1337
     * //
1338
     * UTF8::encode('UTF-8', '-ABC-中文空白-'); // '-ABC-中文空白-'
1339
     * //
1340
     * UTF8::encode('HTML', '-ABC-中文空白-'); // '-ABC-&#20013;&#25991;&#31354;&#30333;-'
1341
     * //
1342
     * UTF8::encode('BASE64', '-ABC-中文空白-'); // 'LUFCQy3kuK3mlofnqbrnmb0t'
1343
     * </code>
1344
     *
1345
     * @param string $to_encoding                   <p>e.g. 'UTF-16', 'UTF-8', 'ISO-8859-1', etc.</p>
1346
     * @param string $str                           <p>The input string</p>
1347
     * @param bool   $auto_detect_the_from_encoding [optional] <p>Force the new encoding (we try to fix broken / double
1348
     *                                              encoding for UTF-8)<br> otherwise we auto-detect the current
1349
     *                                              string-encoding</p>
1350
     * @param string $from_encoding                 [optional] <p>e.g. 'UTF-16', 'UTF-8', 'ISO-8859-1', etc.<br>
1351
     *                                              A empty string will trigger the autodetect anyway.</p>
1352
     *
1353
     * @psalm-pure
1354
     *
1355
     * @return string
1356
     *
1357
     * @psalm-suppress InvalidReturnStatement
1358
     */
1359 28
    public static function encode(
1360
        string $to_encoding,
1361
        string $str,
1362
        bool $auto_detect_the_from_encoding = true,
1363
        string $from_encoding = ''
1364
    ): string {
1365 28
        if ($str === '' || $to_encoding === '') {
1366 13
            return $str;
1367
        }
1368
1369 28
        if ($to_encoding !== 'UTF-8' && $to_encoding !== 'CP850') {
1370 7
            $to_encoding = self::normalize_encoding($to_encoding, 'UTF-8');
1371
        }
1372
1373 28
        if ($from_encoding && $from_encoding !== 'UTF-8' && $from_encoding !== 'CP850') {
1374 2
            $from_encoding = self::normalize_encoding($from_encoding);
1375
        }
1376
1377
        if (
1378 28
            $to_encoding
1379
            &&
1380 28
            $from_encoding
1381
            &&
1382 28
            $from_encoding === $to_encoding
1383
        ) {
1384
            return $str;
1385
        }
1386
1387 28
        if ($to_encoding === 'JSON') {
1388 1
            $return = self::json_encode($str);
1389 1
            if ($return === false) {
1390
                throw new \InvalidArgumentException('The input string [' . $str . '] can not be used for json_encode().');
1391
            }
1392
1393 1
            return $return;
1394
        }
1395 28
        if ($from_encoding === 'JSON') {
1396 1
            $str = self::json_decode($str);
1397 1
            $from_encoding = '';
1398
        }
1399
1400 28
        if ($to_encoding === 'BASE64') {
1401 2
            return \base64_encode($str);
1402
        }
1403 28
        if ($from_encoding === 'BASE64') {
1404 2
            $str = \base64_decode($str, true);
1405 2
            $from_encoding = '';
1406
        }
1407
1408 28
        if ($to_encoding === 'HTML-ENTITIES') {
1409 2
            return self::html_encode($str, true);
1410
        }
1411 28
        if ($from_encoding === 'HTML-ENTITIES') {
1412 2
            $str = self::html_entity_decode($str, \ENT_COMPAT);
1413 2
            $from_encoding = '';
1414
        }
1415
1416 28
        $from_encoding_auto_detected = false;
1417
        if (
1418 28
            $auto_detect_the_from_encoding
1419
            ||
1420 28
            !$from_encoding
1421
        ) {
1422 28
            $from_encoding_auto_detected = self::str_detect_encoding($str);
1423
        }
1424
1425
        // DEBUG
1426
        //var_dump($to_encoding, $from_encoding, $from_encoding_auto_detected, $str, "\n\n");
1427
1428 28
        if ($from_encoding_auto_detected !== false) {
1429 25
            $from_encoding = $from_encoding_auto_detected;
1430 6
        } elseif ($auto_detect_the_from_encoding) {
1431
            // fallback for the "autodetect"-mode
1432 6
            return self::to_utf8($str);
1433
        }
1434
1435
        if (
1436 25
            !$from_encoding
1437
            ||
1438 25
            $from_encoding === $to_encoding
1439
        ) {
1440 15
            return $str;
1441
        }
1442
1443
        if (
1444 20
            $to_encoding === 'UTF-8'
1445
            &&
1446
            (
1447 18
                $from_encoding === 'WINDOWS-1252'
1448
                ||
1449 20
                $from_encoding === 'ISO-8859-1'
1450
            )
1451
        ) {
1452 14
            return self::to_utf8($str);
1453
        }
1454
1455
        if (
1456 12
            $to_encoding === 'ISO-8859-1'
1457
            &&
1458
            (
1459 6
                $from_encoding === 'WINDOWS-1252'
1460
                ||
1461 12
                $from_encoding === 'UTF-8'
1462
            )
1463
        ) {
1464 6
            return self::to_iso8859($str);
1465
        }
1466
1467
        if (
1468 10
            $to_encoding !== 'UTF-8'
1469
            &&
1470 10
            $to_encoding !== 'ISO-8859-1'
1471
            &&
1472 10
            $to_encoding !== 'WINDOWS-1252'
1473
            &&
1474 10
            self::$SUPPORT['mbstring'] === false
1475
        ) {
1476
            /**
1477
             * @psalm-suppress ImpureFunctionCall - is is only a warning
1478
             */
1479
            \trigger_error('UTF8::encode() without mbstring cannot handle "' . $to_encoding . '" encoding', \E_USER_WARNING);
1480
        }
1481
1482 10
        if (self::$SUPPORT['mbstring'] === true) {
1483 10
            $str_encoded = \mb_convert_encoding(
1484 10
                $str,
1485 10
                $to_encoding,
1486 10
                $from_encoding
1487
            );
1488
1489 10
            if ($str_encoded) {
1490
                \assert(\is_string($str_encoded));
1491
1492 10
                return $str_encoded;
1493
            }
1494
        }
1495
1496
        /** @noinspection PhpUsageOfSilenceOperatorInspection - Detected an incomplete multibyte character in input string */
1497
        $return = @\iconv($from_encoding, $to_encoding, $str);
1498
        if ($return !== false) {
1499
            return $return;
1500
        }
1501
1502
        return $str;
1503
    }
1504
1505
    /**
1506
     * @param string $str
1507
     * @param string $from_charset      [optional] <p>Set the input charset.</p>
1508
     * @param string $to_charset        [optional] <p>Set the output charset.</p>
1509
     * @param string $transfer_encoding [optional] <p>Set the transfer encoding.</p>
1510
     * @param string $linefeed          [optional] <p>Set the used linefeed.</p>
1511
     * @param int    $indent            [optional] <p>Set the max length indent.</p>
1512
     *
1513
     * @psalm-pure
1514
     *
1515
     * @return false|string
1516
     *                      <p>An encoded MIME field on success,
1517
     *                      or false if an error occurs during the encoding.</p>
1518
     */
1519 1
    public static function encode_mimeheader(
1520
        string $str,
1521
        string $from_charset = 'UTF-8',
1522
        string $to_charset = 'UTF-8',
1523
        string $transfer_encoding = 'Q',
1524
        string $linefeed = "\r\n",
1525
        int $indent = 76
1526
    ) {
1527 1
        if ($from_charset !== 'UTF-8' && $from_charset !== 'CP850') {
1528
            $from_charset = self::normalize_encoding($from_charset, 'UTF-8');
1529
        }
1530
1531 1
        if ($to_charset !== 'UTF-8' && $to_charset !== 'CP850') {
1532 1
            $to_charset = self::normalize_encoding($to_charset, 'UTF-8');
1533
        }
1534
1535
        // always fallback via symfony polyfill
1536 1
        return \iconv_mime_encode(
1537 1
            '',
1538 1
            $str,
1539
            [
1540 1
                'scheme'           => $transfer_encoding,
1541 1
                'line-length'      => $indent,
1542 1
                'input-charset'    => $from_charset,
1543 1
                'output-charset'   => $to_charset,
1544 1
                'line-break-chars' => $linefeed,
1545
            ]
1546
        );
1547
    }
1548
1549
    /**
1550
     * Create an extract from a sentence, so if the search-string was found, it try to centered in the output.
1551
     *
1552
     * @param string   $str                       <p>The input string.</p>
1553
     * @param string   $search                    <p>The searched string.</p>
1554
     * @param int|null $length                    [optional] <p>Default: null === text->length / 2</p>
1555
     * @param string   $replacer_for_skipped_text [optional] <p>Default: …</p>
1556
     * @param string   $encoding                  [optional] <p>Set the charset for e.g. "mb_" function</p>
1557
     *
1558
     * @psalm-pure
1559
     *
1560
     * @return string
1561
     */
1562 1
    public static function extract_text(
1563
        string $str,
1564
        string $search = '',
1565
        int $length = null,
1566
        string $replacer_for_skipped_text = '…',
1567
        string $encoding = 'UTF-8'
1568
    ): string {
1569 1
        if ($str === '') {
1570 1
            return '';
1571
        }
1572
1573 1
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
1574
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
1575
        }
1576
1577 1
        $trim_chars = "\t\r\n -_()!~?=+/*\\,.:;\"'[]{}`&";
1578
1579 1
        if ($length === null) {
1580 1
            $length = (int) \round((int) self::strlen($str, $encoding) / 2);
1581
        }
1582
1583 1
        if ($search === '') {
1584 1
            if ($encoding === 'UTF-8') {
1585 1
                if ($length > 0) {
1586 1
                    $string_length = (int) \mb_strlen($str);
1587 1
                    $end = ($length - 1) > $string_length ? $string_length : ($length - 1);
1588
                } else {
1589 1
                    $end = 0;
1590
                }
1591
1592 1
                $pos = (int) \min(
1593 1
                    \mb_strpos($str, ' ', $end),
1594 1
                    \mb_strpos($str, '.', $end)
1595
                );
1596
            } else {
1597
                if ($length > 0) {
1598
                    $string_length = (int) self::strlen($str, $encoding);
1599
                    $end = ($length - 1) > $string_length ? $string_length : ($length - 1);
1600
                } else {
1601
                    $end = 0;
1602
                }
1603
1604
                $pos = (int) \min(
1605
                    self::strpos($str, ' ', $end, $encoding),
1606
                    self::strpos($str, '.', $end, $encoding)
1607
                );
1608
            }
1609
1610 1
            if ($pos) {
1611 1
                if ($encoding === 'UTF-8') {
1612 1
                    $str_sub = \mb_substr($str, 0, $pos);
1613
                } else {
1614
                    $str_sub = self::substr($str, 0, $pos, $encoding);
1615
                }
1616
1617 1
                if ($str_sub === false) {
1618
                    return '';
1619
                }
1620
1621 1
                return \rtrim($str_sub, $trim_chars) . $replacer_for_skipped_text;
1622
            }
1623
1624
            return $str;
1625
        }
1626
1627 1
        if ($encoding === 'UTF-8') {
1628 1
            $word_position = (int) \mb_stripos($str, $search);
1629 1
            $half_side = (int) ($word_position - $length / 2 + (int) \mb_strlen($search) / 2);
1630
        } else {
1631
            $word_position = (int) self::stripos($str, $search, 0, $encoding);
1632
            $half_side = (int) ($word_position - $length / 2 + (int) self::strlen($search, $encoding) / 2);
1633
        }
1634
1635 1
        $pos_start = 0;
1636 1
        if ($half_side > 0) {
1637 1
            if ($encoding === 'UTF-8') {
1638 1
                $half_text = \mb_substr($str, 0, $half_side);
1639
            } else {
1640
                $half_text = self::substr($str, 0, $half_side, $encoding);
1641
            }
1642 1
            if ($half_text !== false) {
1643 1
                if ($encoding === 'UTF-8') {
1644 1
                    $pos_start = (int) \max(
1645 1
                        \mb_strrpos($half_text, ' '),
1646 1
                        \mb_strrpos($half_text, '.')
1647
                    );
1648
                } else {
1649
                    $pos_start = (int) \max(
1650
                        self::strrpos($half_text, ' ', 0, $encoding),
1651
                        self::strrpos($half_text, '.', 0, $encoding)
1652
                    );
1653
                }
1654
            }
1655
        }
1656
1657 1
        if ($word_position && $half_side > 0) {
1658 1
            $offset = $pos_start + $length - 1;
1659 1
            $real_length = (int) self::strlen($str, $encoding);
1660
1661 1
            if ($offset > $real_length) {
1662
                $offset = $real_length;
1663
            }
1664
1665 1
            if ($encoding === 'UTF-8') {
1666 1
                $pos_end = (int) \min(
1667 1
                    \mb_strpos($str, ' ', $offset),
1668 1
                    \mb_strpos($str, '.', $offset)
1669 1
                ) - $pos_start;
1670
            } else {
1671
                $pos_end = (int) \min(
1672
                    self::strpos($str, ' ', $offset, $encoding),
1673
                    self::strpos($str, '.', $offset, $encoding)
1674
                ) - $pos_start;
1675
            }
1676
1677 1
            if (!$pos_end || $pos_end <= 0) {
1678 1
                if ($encoding === 'UTF-8') {
1679 1
                    $str_sub = \mb_substr($str, $pos_start, (int) \mb_strlen($str));
1680
                } else {
1681
                    $str_sub = self::substr($str, $pos_start, (int) self::strlen($str, $encoding), $encoding);
1682
                }
1683 1
                if ($str_sub !== false) {
1684 1
                    $extract = $replacer_for_skipped_text . \ltrim($str_sub, $trim_chars);
1685
                } else {
1686 1
                    $extract = '';
1687
                }
1688
            } else {
1689 1
                if ($encoding === 'UTF-8') {
1690 1
                    $str_sub = \mb_substr($str, $pos_start, $pos_end);
1691
                } else {
1692
                    $str_sub = self::substr($str, $pos_start, $pos_end, $encoding);
1693
                }
1694 1
                if ($str_sub !== false) {
1695 1
                    $extract = $replacer_for_skipped_text . \trim($str_sub, $trim_chars) . $replacer_for_skipped_text;
1696
                } else {
1697 1
                    $extract = '';
1698
                }
1699
            }
1700
        } else {
1701 1
            $offset = $length - 1;
1702 1
            $true_length = (int) self::strlen($str, $encoding);
1703
1704 1
            if ($offset > $true_length) {
1705
                $offset = $true_length;
1706
            }
1707
1708 1
            if ($encoding === 'UTF-8') {
1709 1
                $pos_end = (int) \min(
1710 1
                    \mb_strpos($str, ' ', $offset),
1711 1
                    \mb_strpos($str, '.', $offset)
1712
                );
1713
            } else {
1714
                $pos_end = (int) \min(
1715
                    self::strpos($str, ' ', $offset, $encoding),
1716
                    self::strpos($str, '.', $offset, $encoding)
1717
                );
1718
            }
1719
1720 1
            if ($pos_end) {
1721 1
                if ($encoding === 'UTF-8') {
1722 1
                    $str_sub = \mb_substr($str, 0, $pos_end);
1723
                } else {
1724
                    $str_sub = self::substr($str, 0, $pos_end, $encoding);
1725
                }
1726 1
                if ($str_sub !== false) {
1727 1
                    $extract = \rtrim($str_sub, $trim_chars) . $replacer_for_skipped_text;
1728
                } else {
1729 1
                    $extract = '';
1730
                }
1731
            } else {
1732 1
                $extract = $str;
1733
            }
1734
        }
1735
1736 1
        return $extract;
1737
    }
1738
1739
    /**
1740
     * Reads entire file into a string.
1741
     *
1742
     * EXAMPLE: <code>UTF8::file_get_contents('utf16le.txt'); // ...</code>
1743
     *
1744
     * WARNING: Do not use UTF-8 Option ($convert_to_utf8) for binary files (e.g.: images) !!!
1745
     *
1746
     * @see http://php.net/manual/en/function.file-get-contents.php
1747
     *
1748
     * @param string        $filename         <p>
1749
     *                                        Name of the file to read.
1750
     *                                        </p>
1751
     * @param bool          $use_include_path [optional] <p>
1752
     *                                        Prior to PHP 5, this parameter is called
1753
     *                                        use_include_path and is a bool.
1754
     *                                        As of PHP 5 the FILE_USE_INCLUDE_PATH can be used
1755
     *                                        to trigger include path
1756
     *                                        search.
1757
     *                                        </p>
1758
     * @param resource|null $context          [optional] <p>
1759
     *                                        A valid context resource created with
1760
     *                                        stream_context_create. If you don't need to use a
1761
     *                                        custom context, you can skip this parameter by &null;.
1762
     *                                        </p>
1763
     * @param int|null      $offset           [optional] <p>
1764
     *                                        The offset where the reading starts.
1765
     *                                        </p>
1766
     * @param int|null      $max_length       [optional] <p>
1767
     *                                        Maximum length of data read. The default is to read until end
1768
     *                                        of file is reached.
1769
     *                                        </p>
1770
     * @param int           $timeout          <p>The time in seconds for the timeout.</p>
1771
     * @param bool          $convert_to_utf8  <strong>WARNING!!!</strong> <p>Maybe you can't use this option for
1772
     *                                        some files, because they used non default utf-8 chars. Binary files
1773
     *                                        like images or pdf will not be converted.</p>
1774
     * @param string        $from_encoding    [optional] <p>e.g. 'UTF-16', 'UTF-8', 'ISO-8859-1', etc.<br>
1775
     *                                        A empty string will trigger the autodetect anyway.</p>
1776
     *
1777
     * @psalm-pure
1778
     *
1779
     * @return false|string
1780
     *                      <p>The function returns the read data as string or <b>false</b> on failure.</p>
1781
     */
1782 12
    public static function file_get_contents(
1783
        string $filename,
1784
        bool $use_include_path = false,
1785
        $context = null,
1786
        int $offset = null,
1787
        int $max_length = null,
1788
        int $timeout = 10,
1789
        bool $convert_to_utf8 = true,
1790
        string $from_encoding = ''
1791
    ) {
1792
        // init
1793
        /** @noinspection CallableParameterUseCaseInTypeContextInspection - is ok here */
1794 12
        $filename = Bootup::filter_sanitize_string_polyfill($filename);
1795 12
        if ($filename === false) {
1796
            return false;
1797
        }
1798
1799 12
        if ($timeout && $context === null) {
1800 9
            $context = \stream_context_create(
1801
                [
1802
                    'http' => [
1803 9
                        'timeout' => $timeout,
1804
                    ],
1805
                ]
1806
            );
1807
        }
1808
1809 12
        if ($offset === null) {
1810 12
            $offset = 0;
1811
        }
1812
1813 12
        if (\is_int($max_length)) {
1814 2
            $data = \file_get_contents($filename, $use_include_path, $context, $offset, $max_length);
1815
        } else {
1816 12
            $data = \file_get_contents($filename, $use_include_path, $context, $offset);
1817
        }
1818
1819
        // return false on error
1820 12
        if ($data === false) {
1821
            return false;
1822
        }
1823
1824 12
        if ($convert_to_utf8) {
1825
            if (
1826 12
                !self::is_binary($data, true)
1827
                ||
1828 9
                self::is_utf16($data, false) !== false
1829
                ||
1830 12
                self::is_utf32($data, false) !== false
1831
            ) {
1832 9
                $data = self::encode('UTF-8', $data, false, $from_encoding);
1833 9
                $data = self::cleanup($data);
1834
            }
1835
        }
1836
1837 12
        return $data;
1838
    }
1839
1840
    /**
1841
     * Checks if a file starts with BOM (Byte Order Mark) character.
1842
     *
1843
     * EXAMPLE: <code>UTF8::file_has_bom('utf8_with_bom.txt'); // true</code>
1844
     *
1845
     * @param string $file_path <p>Path to a valid file.</p>
1846
     *
1847
     * @throws \RuntimeException if file_get_contents() returned false
1848
     *
1849
     * @return bool
1850
     *              <p><strong>true</strong> if the file has BOM at the start, <strong>false</strong> otherwise</p>
1851
     *
1852
     * @psalm-pure
1853
     */
1854 2
    public static function file_has_bom(string $file_path): bool
1855
    {
1856 2
        $file_content = \file_get_contents($file_path);
1857 2
        if ($file_content === false) {
1858
            throw new \RuntimeException('file_get_contents() returned false for:' . $file_path);
1859
        }
1860
1861 2
        return self::string_has_bom($file_content);
1862
    }
1863
1864
    /**
1865
     * Normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1866
     *
1867
     * EXAMPLE: <code>UTF8::filter(array("\xE9", 'à', 'a')); // array('é', 'à', 'a')</code>
1868
     *
1869
     * @param array|object|string $var
1870
     * @param int                 $normalization_form
1871
     * @param string              $leading_combining
1872
     *
1873
     * @psalm-pure
1874
     *
1875
     * @return mixed
1876
     *
1877
     * @template TFilter
1878
     * @phpstan-param TFilter $var
1879
     * @phpstan-return TFilter
1880
     */
1881 64
    public static function filter(
1882
        $var,
1883
        int $normalization_form = \Normalizer::NFC,
1884
        string $leading_combining = '◌'
1885
    ) {
1886 64
        switch (\gettype($var)) {
1887 64
            case 'object':
1888 64
            case 'array':
1889 6
                foreach ($var as &$v) {
1890 6
                    $v = self::filter($v, $normalization_form, $leading_combining);
1891
                }
1892 6
                unset($v);
1893
1894 6
                break;
1895 64
            case 'string':
1896
1897 62
                if (\strpos($var, "\r") !== false) {
1898 2
                    $var = self::normalize_line_ending($var);
1899
                }
1900
1901 62
                if (!ASCII::is_ascii($var)) {
1902 32
                    if (\Normalizer::isNormalized($var, $normalization_form)) {
1903 26
                        $n = '-';
1904
                    } else {
1905 12
                        $n = \Normalizer::normalize($var, $normalization_form);
1906
1907 12
                        if ($n && isset($n[0])) {
1908 6
                            $var = $n;
1909
                        } else {
1910 8
                            $var = self::encode('UTF-8', $var);
1911
                        }
1912
                    }
1913
1914
                    \assert(\is_string($var));
1915
                    if (
1916 32
                        $n
1917
                        &&
1918 32
                        $var[0] >= "\x80"
1919
                        &&
1920 32
                        isset($n[0], $leading_combining[0])
1921
                        &&
1922 32
                        \preg_match('/^\\p{Mn}/u', $var)
1923
                    ) {
1924
                        // Prevent leading combining chars
1925
                        // for NFC-safe concatenations.
1926 2
                        $var = $leading_combining . $var;
1927
                    }
1928
                }
1929
1930 62
                break;
1931
            default:
1932
                // nothing
1933
        }
1934
1935
        /** @noinspection PhpSillyAssignmentInspection */
1936
        /** @phpstan-var TFilter $var */
1937 64
        $var = $var;
1938
1939 64
        return $var;
1940
    }
1941
1942
    /**
1943
     * "filter_input()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1944
     *
1945
     * Gets a specific external variable by name and optionally filters it.
1946
     *
1947
     * EXAMPLE: <code>
1948
     * // _GET['foo'] = 'bar';
1949
     * UTF8::filter_input(INPUT_GET, 'foo', FILTER_UNSAFE_RAW)); // 'bar'
1950
     * </code>
1951
     *
1952
     * @see http://php.net/manual/en/function.filter-input.php
1953
     *
1954
     * @param int            $type          <p>
1955
     *                                      One of <b>INPUT_GET</b>, <b>INPUT_POST</b>,
1956
     *                                      <b>INPUT_COOKIE</b>, <b>INPUT_SERVER</b>, or
1957
     *                                      <b>INPUT_ENV</b>.
1958
     *                                      </p>
1959
     * @param string         $variable_name <p>
1960
     *                                      Name of a variable to get.
1961
     *                                      </p>
1962
     * @param int            $filter        [optional] <p>
1963
     *                                      The ID of the filter to apply. The
1964
     *                                      manual page lists the available filters.
1965
     *                                      </p>
1966
     * @param int|int[]|null $options       [optional] <p>
1967
     *                                      Associative array of options or bitwise disjunction of flags. If filter
1968
     *                                      accepts options, flags can be provided in "flags" field of array.
1969
     *                                      </p>
1970
     *
1971
     * @psalm-pure
1972
     *
1973
     * @return mixed
1974
     *               <p>
1975
     *               Value of the requested variable on success, <b>FALSE</b> if the filter fails, or <b>NULL</b> if the
1976
     *               <i>variable_name</i> variable is not set. If the flag <b>FILTER_NULL_ON_FAILURE</b> is used, it
1977
     *               returns <b>FALSE</b> if the variable is not set and <b>NULL</b> if the filter fails.
1978
     *               </p>
1979
     */
1980 1
    public static function filter_input(
1981
        int $type,
1982
        string $variable_name,
1983
        int $filter = \FILTER_DEFAULT,
1984
        $options = null
1985
    ) {
1986
        /**
1987
         * @psalm-suppress ImpureFunctionCall - we use func_num_args only for args count matching here
1988
         */
1989 1
        if ($options === null || \func_num_args() < 4) {
1990 1
            $var = \filter_input($type, $variable_name, $filter);
1991
        } else {
1992
            $var = \filter_input($type, $variable_name, $filter, $options);
1993
        }
1994
1995 1
        return self::filter($var);
1996
    }
1997
1998
    /**
1999
     * "filter_input_array()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
2000
     *
2001
     * Gets external variables and optionally filters them.
2002
     *
2003
     * EXAMPLE: <code>
2004
     * // _GET['foo'] = 'bar';
2005
     * UTF8::filter_input_array(INPUT_GET, array('foo' => 'FILTER_UNSAFE_RAW')); // array('bar')
2006
     * </code>
2007
     *
2008
     * @see http://php.net/manual/en/function.filter-input-array.php
2009
     *
2010
     * @param int        $type       <p>
2011
     *                               One of <b>INPUT_GET</b>, <b>INPUT_POST</b>,
2012
     *                               <b>INPUT_COOKIE</b>, <b>INPUT_SERVER</b>, or
2013
     *                               <b>INPUT_ENV</b>.
2014
     *                               </p>
2015
     * @param array|null $definition [optional] <p>
2016
     *                               An array defining the arguments. A valid key is a string
2017
     *                               containing a variable name and a valid value is either a filter type, or an array
2018
     *                               optionally specifying the filter, flags and options. If the value is an
2019
     *                               array, valid keys are filter which specifies the
2020
     *                               filter type,
2021
     *                               flags which specifies any flags that apply to the
2022
     *                               filter, and options which specifies any options that
2023
     *                               apply to the filter. See the example below for a better understanding.
2024
     *                               </p>
2025
     *                               <p>
2026
     *                               This parameter can be also an integer holding a filter constant. Then all values in the
2027
     *                               input array are filtered by this filter.
2028
     *                               </p>
2029
     * @param bool       $add_empty  [optional] <p>
2030
     *                               Add missing keys as <b>NULL</b> to the return value.
2031
     *                               </p>
2032
     *
2033
     * @psalm-pure
2034
     *
2035
     * @return mixed
2036
     *               <p>
2037
     *               An array containing the values of the requested variables on success, or <b>FALSE</b> on failure.
2038
     *               An array value will be <b>FALSE</b> if the filter fails, or <b>NULL</b> if the variable is not
2039
     *               set. Or if the flag <b>FILTER_NULL_ON_FAILURE</b> is used, it returns <b>FALSE</b> if the variable
2040
     *               is not set and <b>NULL</b> if the filter fails.
2041
     *               </p>
2042
     */
2043 1
    public static function filter_input_array(
2044
        int $type,
2045
        $definition = null,
2046
        bool $add_empty = true
2047
    ) {
2048
        /**
2049
         * @psalm-suppress ImpureFunctionCall - we use func_num_args only for args count matching here
2050
         */
2051 1
        if ($definition === null || \func_num_args() < 2) {
2052
            $a = \filter_input_array($type);
2053
        } else {
2054 1
            $a = \filter_input_array($type, $definition, $add_empty);
2055
        }
2056
2057 1
        return self::filter($a);
2058
    }
2059
2060
    /**
2061
     * "filter_var()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
2062
     *
2063
     * Filters a variable with a specified filter.
2064
     *
2065
     * EXAMPLE: <code>UTF8::filter_var('-ABC-中文空白-', FILTER_VALIDATE_URL); // false</code>
2066
     *
2067
     * @see http://php.net/manual/en/function.filter-var.php
2068
     *
2069
     * @param float|int|string|null $variable <p>
2070
     *                                        Value to filter.
2071
     *                                        </p>
2072
     * @param int                   $filter   [optional] <p>
2073
     *                                        The ID of the filter to apply. The
2074
     *                                        manual page lists the available filters.
2075
     *                                        </p>
2076
     * @param int|int[]|null        $options  [optional] <p>
2077
     *                                        Associative array of options or bitwise disjunction of flags. If filter
2078
     *                                        accepts options, flags can be provided in "flags" field of array. For
2079
     *                                        the "callback" filter, callable type should be passed. The
2080
     *                                        callback must accept one argument, the value to be filtered, and return
2081
     *                                        the value after filtering/sanitizing it.
2082
     *                                        </p>
2083
     *                                        <p>
2084
     *                                        <code>
2085
     *                                        // for filters that accept options, use this format
2086
     *                                        $options = array(
2087
     *                                        'options' => array(
2088
     *                                        'default' => 3, // value to return if the filter fails
2089
     *                                        // other options here
2090
     *                                        'min_range' => 0
2091
     *                                        ),
2092
     *                                        'flags' => FILTER_FLAG_ALLOW_OCTAL,
2093
     *                                        );
2094
     *                                        $var = filter_var('0755', FILTER_VALIDATE_INT, $options);
2095
     *                                        // for filter that only accept flags, you can pass them directly
2096
     *                                        $var = filter_var('oops', FILTER_VALIDATE_BOOLEAN, FILTER_NULL_ON_FAILURE);
2097
     *                                        // for filter that only accept flags, you can also pass as an array
2098
     *                                        $var = filter_var('oops', FILTER_VALIDATE_BOOLEAN,
2099
     *                                        array('flags' => FILTER_NULL_ON_FAILURE));
2100
     *                                        // callback validate filter
2101
     *                                        function foo($value)
2102
     *                                        {
2103
     *                                        // Expected format: Surname, GivenNames
2104
     *                                        if (strpos($value, ", ") === false) return false;
2105
     *                                        list($surname, $givennames) = explode(", ", $value, 2);
2106
     *                                        $empty = (empty($surname) || empty($givennames));
2107
     *                                        $notstrings = (!is_string($surname) || !is_string($givennames));
2108
     *                                        if ($empty || $notstrings) {
2109
     *                                        return false;
2110
     *                                        } else {
2111
     *                                        return $value;
2112
     *                                        }
2113
     *                                        }
2114
     *                                        $var = filter_var('Doe, Jane Sue', FILTER_CALLBACK, array('options' => 'foo'));
2115
     *                                        </code>
2116
     *                                        </p>
2117
     *
2118
     * @psalm-pure
2119
     *
2120
     * @return mixed
2121
     *               <p>The filtered data, or <b>FALSE</b> if the filter fails.</p>
2122
     */
2123 2
    public static function filter_var(
2124
        $variable,
2125
        int $filter = \FILTER_DEFAULT,
2126
        $options = null
2127
    ) {
2128
        /**
2129
         * @psalm-suppress ImpureFunctionCall - we use func_num_args only for args count matching here
2130
         */
2131 2
        if (\func_num_args() < 3) {
2132 2
            $variable = \filter_var($variable, $filter);
2133
        } else {
2134 2
            $variable = \filter_var($variable, $filter, $options);
0 ignored issues
show
Bug introduced by
It seems like $options can also be of type null; however, parameter $options of filter_var() does only seem to accept array|integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

2134
            $variable = \filter_var($variable, $filter, /** @scrutinizer ignore-type */ $options);
Loading history...
2135
        }
2136
2137 2
        return self::filter($variable);
2138
    }
2139
2140
    /**
2141
     * "filter_var_array()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
2142
     *
2143
     * Gets multiple variables and optionally filters them.
2144
     *
2145
     * EXAMPLE: <code>
2146
     * $filters = [
2147
     *     'name'  => ['filter'  => FILTER_CALLBACK, 'options' => [UTF8::class, 'ucwords']],
2148
     *     'age'   => ['filter'  => FILTER_VALIDATE_INT, 'options' => ['min_range' => 1, 'max_range' => 120]],
2149
     *     'email' => FILTER_VALIDATE_EMAIL,
2150
     * ];
2151
     *
2152
     * $data = [
2153
     *     'name' => 'κόσμε',
2154
     *     'age' => '18',
2155
     *     'email' => '[email protected]'
2156
     * ];
2157
     *
2158
     * UTF8::filter_var_array($data, $filters, true); // ['name' => 'Κόσμε', 'age' => 18, 'email' => '[email protected]']
2159
     * </code>
2160
     *
2161
     * @see http://php.net/manual/en/function.filter-var-array.php
2162
     *
2163
     * @param array<mixed>   $data       <p>
2164
     *                                   An array with string keys containing the data to filter.
2165
     *                                   </p>
2166
     * @param array|int|null $definition [optional] <p>
2167
     *                                   An array defining the arguments. A valid key is a string
2168
     *                                   containing a variable name and a valid value is either a
2169
     *                                   filter type, or an
2170
     *                                   array optionally specifying the filter, flags and options.
2171
     *                                   If the value is an array, valid keys are filter
2172
     *                                   which specifies the filter type,
2173
     *                                   flags which specifies any flags that apply to the
2174
     *                                   filter, and options which specifies any options that
2175
     *                                   apply to the filter. See the example below for a better understanding.
2176
     *                                   </p>
2177
     *                                   <p>
2178
     *                                   This parameter can be also an integer holding a filter constant. Then all values
2179
     *                                   in the input array are filtered by this filter.
2180
     *                                   </p>
2181
     * @param bool           $add_empty  [optional] <p>
2182
     *                                   Add missing keys as <b>NULL</b> to the return value.
2183
     *                                   </p>
2184
     *
2185
     * @psalm-pure
2186
     *
2187
     * @return mixed
2188
     *               <p>
2189
     *               An array containing the values of the requested variables on success, or <b>FALSE</b> on failure.
2190
     *               An array value will be <b>FALSE</b> if the filter fails, or <b>NULL</b> if the variable is not
2191
     *               set.
2192
     *               </p>
2193
     */
2194 2
    public static function filter_var_array(
2195
        array $data,
2196
        $definition = null,
2197
        bool $add_empty = true
2198
    ) {
2199
        /**
2200
         * @psalm-suppress ImpureFunctionCall - we use func_num_args only for args count matching here
2201
         */
2202 2
        if (\func_num_args() < 2) {
2203 2
            $a = \filter_var_array($data);
2204
        } else {
2205 2
            $a = \filter_var_array($data, $definition, $add_empty);
0 ignored issues
show
Bug introduced by
It seems like $definition can also be of type null; however, parameter $options of filter_var_array() does only seem to accept array|integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

2205
            $a = \filter_var_array($data, /** @scrutinizer ignore-type */ $definition, $add_empty);
Loading history...
2206
        }
2207
2208 2
        return self::filter($a);
2209
    }
2210
2211
    /**
2212
     * Checks whether finfo is available on the server.
2213
     *
2214
     * @psalm-pure
2215
     *
2216
     * @return bool
2217
     *              <p><strong>true</strong> if available, <strong>false</strong> otherwise</p>
2218
     *
2219
     * @internal <p>Please do not use it anymore, we will make is private in next major version.</p>
2220
     */
2221
    public static function finfo_loaded(): bool
2222
    {
2223
        return \class_exists('finfo');
2224
    }
2225
2226
    /**
2227
     * Returns the first $n characters of the string.
2228
     *
2229
     * @param string $str      <p>The input string.</p>
2230
     * @param int    $n        <p>Number of characters to retrieve from the start.</p>
2231
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
2232
     *
2233
     * @psalm-pure
2234
     *
2235
     * @return string
2236
     */
2237 13
    public static function first_char(
2238
        string $str,
2239
        int $n = 1,
2240
        string $encoding = 'UTF-8'
2241
    ): string {
2242 13
        if ($str === '' || $n <= 0) {
2243 5
            return '';
2244
        }
2245
2246 8
        if ($encoding === 'UTF-8') {
2247 4
            return (string) \mb_substr($str, 0, $n);
2248
        }
2249
2250 4
        return (string) self::substr($str, 0, $n, $encoding);
2251
    }
2252
2253
    /**
2254
     * Check if the number of Unicode characters isn't greater than the specified integer.
2255
     *
2256
     * EXAMPLE: <code>UTF8::fits_inside('κόσμε', 6); // false</code>
2257
     *
2258
     * @param string $str      the original string to be checked
2259
     * @param int    $box_size the size in number of chars to be checked against string
2260
     *
2261
     * @psalm-pure
2262
     *
2263
     * @return bool
2264
     *              <p><strong>TRUE</strong> if string is less than or equal to $box_size, <strong>FALSE</strong> otherwise.</p>
2265
     */
2266 2
    public static function fits_inside(string $str, int $box_size): bool
2267
    {
2268 2
        return (int) self::strlen($str) <= $box_size;
2269
    }
2270
2271
    /**
2272
     * Try to fix simple broken UTF-8 strings.
2273
     *
2274
     * INFO: Take a look at "UTF8::fix_utf8()" if you need a more advanced fix for broken UTF-8 strings.
2275
     *
2276
     * EXAMPLE: <code>UTF8::fix_simple_utf8('Düsseldorf'); // 'Düsseldorf'</code>
2277
     *
2278
     * If you received an UTF-8 string that was converted from Windows-1252 as it was ISO-8859-1
2279
     * (ignoring Windows-1252 chars from 80 to 9F) use this function to fix it.
2280
     * See: http://en.wikipedia.org/wiki/Windows-1252
2281
     *
2282
     * @param string $str <p>The input string</p>
2283
     *
2284
     * @psalm-pure
2285
     *
2286
     * @return string
2287
     */
2288 46
    public static function fix_simple_utf8(string $str): string
2289
    {
2290 46
        if ($str === '') {
2291 4
            return '';
2292
        }
2293
2294
        /**
2295
         * @psalm-suppress ImpureStaticVariable
2296
         *
2297
         * @var array<mixed>|null
2298
         */
2299 46
        static $BROKEN_UTF8_TO_UTF8_KEYS_CACHE = null;
2300
2301
        /**
2302
         * @psalm-suppress ImpureStaticVariable
2303
         *
2304
         * @var array<mixed>|null
2305
         */
2306 46
        static $BROKEN_UTF8_TO_UTF8_VALUES_CACHE = null;
2307
2308 46
        if ($BROKEN_UTF8_TO_UTF8_KEYS_CACHE === null) {
2309 1
            if (self::$BROKEN_UTF8_FIX === null) {
2310 1
                self::$BROKEN_UTF8_FIX = self::getData('utf8_fix');
2311
            }
2312
2313 1
            $BROKEN_UTF8_TO_UTF8_KEYS_CACHE = \array_keys(self::$BROKEN_UTF8_FIX ?: []);
2314 1
            $BROKEN_UTF8_TO_UTF8_VALUES_CACHE = self::$BROKEN_UTF8_FIX;
2315
        }
2316
2317
        \assert(\is_array($BROKEN_UTF8_TO_UTF8_VALUES_CACHE));
2318
2319 46
        return \str_replace($BROKEN_UTF8_TO_UTF8_KEYS_CACHE, $BROKEN_UTF8_TO_UTF8_VALUES_CACHE, $str);
2320
    }
2321
2322
    /**
2323
     * Fix a double (or multiple) encoded UTF8 string.
2324
     *
2325
     * EXAMPLE: <code>UTF8::fix_utf8('Fédération'); // 'Fédération'</code>
2326
     *
2327
     * @param string|string[] $str you can use a string or an array of strings
2328
     *
2329
     * @psalm-pure
2330
     *
2331
     * @return string|string[]
2332
     *                         <p>Will return the fixed input-"array" or
2333
     *                         the fixed input-"string".</p>
2334
     *
2335
     * @template TFixUtf8
2336
     * @phpstan-param TFixUtf8 $str
2337
     * @phpstan-return TFixUtf8
2338
     */
2339 2
    public static function fix_utf8($str)
2340
    {
2341 2
        if (\is_array($str)) {
2342 2
            foreach ($str as &$v) {
2343 2
                $v = self::fix_utf8($v);
2344
            }
2345 2
            unset($v);
2346
2347
            /**
2348
             * @psalm-suppress InvalidReturnStatement
2349
             */
2350 2
            return $str;
2351
        }
2352
2353 2
        $str = (string) $str;
2354 2
        $last = '';
2355 2
        while ($last !== $str) {
2356 2
            $last = $str;
2357
            /**
2358
             * @psalm-suppress PossiblyInvalidArgument
2359
             */
2360 2
            $str = self::to_utf8(
2361 2
                self::utf8_decode($str, true)
2362
            );
2363
        }
2364
2365
        /**
2366
         * @psalm-suppress InvalidReturnStatement
2367
         */
2368 2
        return $str;
2369
    }
2370
2371
    /**
2372
     * Get character of a specific character.
2373
     *
2374
     * EXAMPLE: <code>UTF8::getCharDirection('ا'); // 'RTL'</code>
2375
     *
2376
     * @param string $char
2377
     *
2378
     * @psalm-pure
2379
     *
2380
     * @return string
2381
     *                <p>'RTL' or 'LTR'.</p>
2382
     */
2383 2
    public static function getCharDirection(string $char): string
2384
    {
2385 2
        if (self::$SUPPORT['intlChar'] === true) {
2386 2
            $tmp_return = \IntlChar::charDirection($char);
2387
2388
            // from "IntlChar"-Class
2389
            $char_direction = [
2390 2
                'RTL' => [1, 13, 14, 15, 21],
2391
                'LTR' => [0, 11, 12, 20],
2392
            ];
2393
2394 2
            if (\in_array($tmp_return, $char_direction['LTR'], true)) {
2395
                return 'LTR';
2396
            }
2397
2398 2
            if (\in_array($tmp_return, $char_direction['RTL'], true)) {
2399 2
                return 'RTL';
2400
            }
2401
        }
2402
2403 2
        $c = static::chr_to_decimal($char);
2404
2405 2
        if (!($c >= 0x5be && $c <= 0x10b7f)) {
2406 2
            return 'LTR';
2407
        }
2408
2409 2
        if ($c <= 0x85e) {
2410 2
            if ($c === 0x5be ||
2411 2
                $c === 0x5c0 ||
2412 2
                $c === 0x5c3 ||
2413 2
                $c === 0x5c6 ||
2414 2
                ($c >= 0x5d0 && $c <= 0x5ea) ||
2415 2
                ($c >= 0x5f0 && $c <= 0x5f4) ||
2416 2
                $c === 0x608 ||
2417 2
                $c === 0x60b ||
2418 2
                $c === 0x60d ||
2419 2
                $c === 0x61b ||
2420 2
                ($c >= 0x61e && $c <= 0x64a) ||
2421
                ($c >= 0x66d && $c <= 0x66f) ||
2422
                ($c >= 0x671 && $c <= 0x6d5) ||
2423
                ($c >= 0x6e5 && $c <= 0x6e6) ||
2424
                ($c >= 0x6ee && $c <= 0x6ef) ||
2425
                ($c >= 0x6fa && $c <= 0x70d) ||
2426
                $c === 0x710 ||
2427
                ($c >= 0x712 && $c <= 0x72f) ||
2428
                ($c >= 0x74d && $c <= 0x7a5) ||
2429
                $c === 0x7b1 ||
2430
                ($c >= 0x7c0 && $c <= 0x7ea) ||
2431
                ($c >= 0x7f4 && $c <= 0x7f5) ||
2432
                $c === 0x7fa ||
2433
                ($c >= 0x800 && $c <= 0x815) ||
2434
                $c === 0x81a ||
2435
                $c === 0x824 ||
2436
                $c === 0x828 ||
2437
                ($c >= 0x830 && $c <= 0x83e) ||
2438
                ($c >= 0x840 && $c <= 0x858) ||
2439 2
                $c === 0x85e
2440
            ) {
2441 2
                return 'RTL';
2442
            }
2443 2
        } elseif ($c === 0x200f) {
2444
            return 'RTL';
2445 2
        } elseif ($c >= 0xfb1d) {
2446 2
            if ($c === 0xfb1d ||
2447 2
                ($c >= 0xfb1f && $c <= 0xfb28) ||
2448 2
                ($c >= 0xfb2a && $c <= 0xfb36) ||
2449 2
                ($c >= 0xfb38 && $c <= 0xfb3c) ||
2450 2
                $c === 0xfb3e ||
2451 2
                ($c >= 0xfb40 && $c <= 0xfb41) ||
2452 2
                ($c >= 0xfb43 && $c <= 0xfb44) ||
2453 2
                ($c >= 0xfb46 && $c <= 0xfbc1) ||
2454 2
                ($c >= 0xfbd3 && $c <= 0xfd3d) ||
2455 2
                ($c >= 0xfd50 && $c <= 0xfd8f) ||
2456 2
                ($c >= 0xfd92 && $c <= 0xfdc7) ||
2457 2
                ($c >= 0xfdf0 && $c <= 0xfdfc) ||
2458 2
                ($c >= 0xfe70 && $c <= 0xfe74) ||
2459 2
                ($c >= 0xfe76 && $c <= 0xfefc) ||
2460 2
                ($c >= 0x10800 && $c <= 0x10805) ||
2461 2
                $c === 0x10808 ||
2462 2
                ($c >= 0x1080a && $c <= 0x10835) ||
2463 2
                ($c >= 0x10837 && $c <= 0x10838) ||
2464 2
                $c === 0x1083c ||
2465 2
                ($c >= 0x1083f && $c <= 0x10855) ||
2466 2
                ($c >= 0x10857 && $c <= 0x1085f) ||
2467 2
                ($c >= 0x10900 && $c <= 0x1091b) ||
2468 2
                ($c >= 0x10920 && $c <= 0x10939) ||
2469 2
                $c === 0x1093f ||
2470 2
                $c === 0x10a00 ||
2471 2
                ($c >= 0x10a10 && $c <= 0x10a13) ||
2472 2
                ($c >= 0x10a15 && $c <= 0x10a17) ||
2473 2
                ($c >= 0x10a19 && $c <= 0x10a33) ||
2474 2
                ($c >= 0x10a40 && $c <= 0x10a47) ||
2475 2
                ($c >= 0x10a50 && $c <= 0x10a58) ||
2476 2
                ($c >= 0x10a60 && $c <= 0x10a7f) ||
2477 2
                ($c >= 0x10b00 && $c <= 0x10b35) ||
2478 2
                ($c >= 0x10b40 && $c <= 0x10b55) ||
2479 2
                ($c >= 0x10b58 && $c <= 0x10b72) ||
2480 2
                ($c >= 0x10b78)
2481
            ) {
2482 2
                return 'RTL';
2483
            }
2484
        }
2485
2486 2
        return 'LTR';
2487
    }
2488
2489
    /**
2490
     * Check for php-support.
2491
     *
2492
     * @param string|null $key
2493
     *
2494
     * @psalm-pure
2495
     *
2496
     * @return mixed
2497
     *               Return the full support-"array", if $key === null<br>
2498
     *               return bool-value, if $key is used and available<br>
2499
     *               otherwise return <strong>null</strong>
2500
     */
2501 27
    public static function getSupportInfo(string $key = null)
2502
    {
2503 27
        if ($key === null) {
2504 4
            return self::$SUPPORT;
2505
        }
2506
2507 25
        if (self::$INTL_TRANSLITERATOR_LIST === null) {
2508 1
            self::$INTL_TRANSLITERATOR_LIST = self::getData('transliterator_list');
2509
        }
2510
        // compatibility fix for old versions
2511 25
        self::$SUPPORT['intl__transliterator_list_ids'] = self::$INTL_TRANSLITERATOR_LIST;
2512
2513 25
        return self::$SUPPORT[$key] ?? null;
2514
    }
2515
2516
    /**
2517
     * Warning: this method only works for some file-types (png, jpg)
2518
     *          if you need more supported types, please use e.g. "finfo"
2519
     *
2520
     * @param string $str
2521
     * @param array  $fallback <p>with this keys: 'ext', 'mime', 'type'
2522
     *
2523
     * @psalm-pure
2524
     *
2525
     * @return null[]|string[]
2526
     *                         <p>with this keys: 'ext', 'mime', 'type'</p>
2527
     *
2528
     * @phpstan-param array{ext: null|string, mime: null|string, type: null|string} $fallback
2529
     */
2530 39
    public static function get_file_type(
2531
        string $str,
2532
        array $fallback = [
2533
            'ext'  => null,
2534
            'mime' => 'application/octet-stream',
2535
            'type' => null,
2536
        ]
2537
    ): array {
2538 39
        if ($str === '') {
2539
            return $fallback;
2540
        }
2541
2542
        /** @var false|string $str_info - needed for PhpStan (stubs error) */
2543 39
        $str_info = \substr($str, 0, 2);
2544 39
        if ($str_info === false || \strlen($str_info) !== 2) {
2545 10
            return $fallback;
2546
        }
2547
2548
        // DEBUG
2549
        //var_dump($str_info);
2550
2551 36
        $str_info = \unpack('C2chars', $str_info);
2552
2553 36
        if ($str_info === false) {
2554
            return $fallback;
2555
        }
2556 36
        $type_code = (int) ($str_info['chars1'] . $str_info['chars2']);
2557
2558
        // DEBUG
2559
        //var_dump($type_code);
2560
2561
        //
2562
        // info: https://en.wikipedia.org/wiki/Magic_number_%28programming%29#Format_indicator
2563
        //
2564
        switch ($type_code) {
2565
            // WARNING: do not add too simple comparisons, because of false-positive results:
2566
            //
2567
            // 3780 => 'pdf', 7790 => 'exe', 7784 => 'midi', 8075 => 'zip',
2568
            // 8297 => 'rar', 7173 => 'gif', 7373 => 'tiff' 6677 => 'bmp', ...
2569
            //
2570 36
            case 255216:
2571
                $ext = 'jpg';
2572
                $mime = 'image/jpeg';
2573
                $type = 'binary';
2574
2575
                break;
2576 36
            case 13780:
2577 7
                $ext = 'png';
2578 7
                $mime = 'image/png';
2579 7
                $type = 'binary';
2580
2581 7
                break;
2582
            default:
2583 35
                return $fallback;
2584
        }
2585
2586
        return [
2587 7
            'ext'  => $ext,
2588 7
            'mime' => $mime,
2589 7
            'type' => $type,
2590
        ];
2591
    }
2592
2593
    /**
2594
     * @param int    $length         <p>Length of the random string.</p>
2595
     * @param string $possible_chars [optional] <p>Characters string for the random selection.</p>
2596
     * @param string $encoding       [optional] <p>Set the charset for e.g. "mb_" function</p>
2597
     *
2598
     * @return string
2599
     */
2600 1
    public static function get_random_string(
2601
        int $length,
2602
        string $possible_chars = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789',
2603
        string $encoding = 'UTF-8'
2604
    ): string {
2605
        // init
2606 1
        $i = 0;
2607 1
        $str = '';
2608
2609
        //
2610
        // add random chars
2611
        //
2612
2613 1
        if ($encoding === 'UTF-8') {
2614 1
            $max_length = (int) \mb_strlen($possible_chars);
2615 1
            if ($max_length === 0) {
2616 1
                return '';
2617
            }
2618
2619 1
            while ($i < $length) {
2620
                try {
2621 1
                    $rand_int = \random_int(0, $max_length - 1);
2622
                } catch (\Exception $e) {
2623
                    $rand_int = \mt_rand(0, $max_length - 1);
2624
                }
2625 1
                $char = \mb_substr($possible_chars, $rand_int, 1);
2626 1
                if ($char !== false) {
2627 1
                    $str .= $char;
2628 1
                    ++$i;
2629
                }
2630
            }
2631
        } else {
2632
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
2633
2634
            $max_length = (int) self::strlen($possible_chars, $encoding);
2635
            if ($max_length === 0) {
2636
                return '';
2637
            }
2638
2639
            while ($i < $length) {
2640
                try {
2641
                    $rand_int = \random_int(0, $max_length - 1);
2642
                } catch (\Exception $e) {
2643
                    $rand_int = \mt_rand(0, $max_length - 1);
2644
                }
2645
                $char = self::substr($possible_chars, $rand_int, 1, $encoding);
2646
                if ($char !== false) {
2647
                    $str .= $char;
2648
                    ++$i;
2649
                }
2650
            }
2651
        }
2652
2653 1
        return $str;
2654
    }
2655
2656
    /**
2657
     * @param int|string $extra_entropy [optional] <p>Extra entropy via a string or int value.</p>
2658
     * @param bool       $use_md5       [optional] <p>Return the unique identifier as md5-hash? Default: true</p>
2659
     *
2660
     * @return string
2661
     */
2662 1
    public static function get_unique_string($extra_entropy = '', bool $use_md5 = true): string
2663
    {
2664
        try {
2665 1
            $rand_int = \random_int(0, \mt_getrandmax());
2666
        } catch (\Exception $e) {
2667
            $rand_int = \mt_rand(0, \mt_getrandmax());
2668
        }
2669
2670
        $unique_helper = $rand_int .
2671 1
                         \session_id() .
2672 1
                         ($_SERVER['REMOTE_ADDR'] ?? '') .
2673 1
                         ($_SERVER['SERVER_ADDR'] ?? '') .
2674 1
                         $extra_entropy;
2675
2676 1
        $unique_string = \uniqid($unique_helper, true);
2677
2678 1
        if ($use_md5) {
2679 1
            $unique_string = \md5($unique_string . $unique_helper);
2680
        }
2681
2682 1
        return $unique_string;
2683
    }
2684
2685
    /**
2686
     * Returns true if the string contains a lower case char, false otherwise.
2687
     *
2688
     * @param string $str <p>The input string.</p>
2689
     *
2690
     * @psalm-pure
2691
     *
2692
     * @return bool
2693
     *              <p>Whether or not the string contains a lower case character.</p>
2694
     */
2695 47
    public static function has_lowercase(string $str): bool
2696
    {
2697 47
        if (self::$SUPPORT['mbstring'] === true) {
2698 47
            return \mb_ereg_match('.*[[:lower:]]', $str);
2699
        }
2700
2701
        return self::str_matches_pattern($str, '.*[[:lower:]]');
2702
    }
2703
2704
    /**
2705
     * Returns true if the string contains whitespace, false otherwise.
2706
     *
2707
     * @param string $str <p>The input string.</p>
2708
     *
2709
     * @psalm-pure
2710
     *
2711
     * @return bool
2712
     *              <p>Whether or not the string contains whitespace.</p>
2713
     */
2714 11
    public static function has_whitespace(string $str): bool
2715
    {
2716 11
        if (self::$SUPPORT['mbstring'] === true) {
2717 11
            return \mb_ereg_match('.*[[:space:]]', $str);
2718
        }
2719
2720
        return self::str_matches_pattern($str, '.*[[:space:]]');
2721
    }
2722
2723
    /**
2724
     * Returns true if the string contains an upper case char, false otherwise.
2725
     *
2726
     * @param string $str <p>The input string.</p>
2727
     *
2728
     * @psalm-pure
2729
     *
2730
     * @return bool
2731
     *              <p>Whether or not the string contains an upper case character.</p>
2732
     */
2733 12
    public static function has_uppercase(string $str): bool
2734
    {
2735 12
        if (self::$SUPPORT['mbstring'] === true) {
2736 12
            return \mb_ereg_match('.*[[:upper:]]', $str);
2737
        }
2738
2739
        return self::str_matches_pattern($str, '.*[[:upper:]]');
2740
    }
2741
2742
    /**
2743
     * Converts a hexadecimal value into a UTF-8 character.
2744
     *
2745
     * INFO: opposite to UTF8::chr_to_hex()
2746
     *
2747
     * EXAMPLE: <code>UTF8::hex_to_chr('U+00a7'); // '§'</code>
2748
     *
2749
     * @param string $hexdec <p>The hexadecimal value.</p>
2750
     *
2751
     * @psalm-pure
2752
     *
2753
     * @return false|string one single UTF-8 character
2754
     */
2755 4
    public static function hex_to_chr(string $hexdec)
2756
    {
2757
        /** @noinspection PhpUsageOfSilenceOperatorInspection - Invalid characters passed for attempted conversion, these have been ignored */
2758 4
        return self::decimal_to_chr((int) @\hexdec($hexdec));
2759
    }
2760
2761
    /**
2762
     * Converts hexadecimal U+xxxx code point representation to integer.
2763
     *
2764
     * INFO: opposite to UTF8::int_to_hex()
2765
     *
2766
     * EXAMPLE: <code>UTF8::hex_to_int('U+00f1'); // 241</code>
2767
     *
2768
     * @param string $hexdec <p>The hexadecimal code point representation.</p>
2769
     *
2770
     * @psalm-pure
2771
     *
2772
     * @return false|int
2773
     *                   <p>The code point, or false on failure.</p>
2774
     */
2775 2
    public static function hex_to_int($hexdec)
2776
    {
2777
        // init
2778 2
        $hexdec = (string) $hexdec;
2779
2780 2
        if ($hexdec === '') {
2781 2
            return false;
2782
        }
2783
2784 2
        if (\preg_match('/^(?:\\\u|U\+|)([a-zA-Z0-9]{4,6})$/', $hexdec, $match)) {
2785 2
            return \intval($match[1], 16);
2786
        }
2787
2788 2
        return false;
2789
    }
2790
2791
    /**
2792
     * Converts a UTF-8 string to a series of HTML numbered entities.
2793
     *
2794
     * INFO: opposite to UTF8::html_decode()
2795
     *
2796
     * EXAMPLE: <code>UTF8::html_encode('中文空白'); // '&#20013;&#25991;&#31354;&#30333;'</code>
2797
     *
2798
     * @param string $str              <p>The Unicode string to be encoded as numbered entities.</p>
2799
     * @param bool   $keep_ascii_chars [optional] <p>Keep ASCII chars.</p>
2800
     * @param string $encoding         [optional] <p>Set the charset for e.g. "mb_" function</p>
2801
     *
2802
     * @psalm-pure
2803
     *
2804
     * @return string HTML numbered entities
2805
     */
2806 14
    public static function html_encode(
2807
        string $str,
2808
        bool $keep_ascii_chars = false,
2809
        string $encoding = 'UTF-8'
2810
    ): string {
2811 14
        if ($str === '') {
2812 4
            return '';
2813
        }
2814
2815 14
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
2816 4
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
2817
        }
2818
2819
        // INFO: http://stackoverflow.com/questions/35854535/better-explanation-of-convmap-in-mb-encode-numericentity
2820 14
        if (self::$SUPPORT['mbstring'] === true) {
2821 14
            if ($keep_ascii_chars) {
2822 13
                $start_code = 0x80;
2823
            } else {
2824 3
                $start_code = 0x00;
2825
            }
2826
2827 14
            if ($encoding === 'UTF-8') {
2828
                /** @var false|string|null $return - needed for PhpStan (stubs error) */
2829 14
                $return = \mb_encode_numericentity(
0 ignored issues
show
Bug introduced by
The call to mb_encode_numericentity() has too few arguments starting with encoding. ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-call  annotation

2829
                $return = /** @scrutinizer ignore-call */ \mb_encode_numericentity(

This check compares calls to functions or methods with their respective definitions. If the call has less arguments than are defined, it raises an issue.

If a function is defined several times with a different number of parameters, the check may pick up the wrong definition and report false positives. One codebase where this has been known to happen is Wordpress. Please note the @ignore annotation hint above.

Loading history...
2830 14
                    $str,
2831 14
                    [$start_code, 0xfffff, 0, 0xfffff]
2832
                );
2833 14
                if ($return !== null && $return !== false) {
2834 14
                    return $return;
2835
                }
2836
            }
2837
2838
            /** @var false|string|null $return - needed for PhpStan (stubs error) */
2839 4
            $return = \mb_encode_numericentity(
2840 4
                $str,
2841 4
                [$start_code, 0xfffff, 0, 0xfffff],
2842 4
                $encoding
2843
            );
2844 4
            if ($return !== null && $return !== false) {
2845 4
                return $return;
2846
            }
2847
        }
2848
2849
        //
2850
        // fallback via vanilla php
2851
        //
2852
2853
        return \implode(
2854
            '',
2855
            \array_map(
2856
                static function (string $chr) use ($keep_ascii_chars, $encoding): string {
2857
                    return self::single_chr_html_encode($chr, $keep_ascii_chars, $encoding);
2858
                },
2859
                self::str_split($str)
2860
            )
2861
        );
2862
    }
2863
2864
    /**
2865
     * UTF-8 version of html_entity_decode()
2866
     *
2867
     * The reason we are not using html_entity_decode() by itself is because
2868
     * while it is not technically correct to leave out the semicolon
2869
     * at the end of an entity most browsers will still interpret the entity
2870
     * correctly. html_entity_decode() does not convert entities without
2871
     * semicolons, so we are left with our own little solution here. Bummer.
2872
     *
2873
     * Convert all HTML entities to their applicable characters.
2874
     *
2875
     * INFO: opposite to UTF8::html_encode()
2876
     *
2877
     * EXAMPLE: <code>UTF8::html_entity_decode('&#20013;&#25991;&#31354;&#30333;'); // '中文空白'</code>
2878
     *
2879
     * @see http://php.net/manual/en/function.html-entity-decode.php
2880
     *
2881
     * @param string   $str      <p>
2882
     *                           The input string.
2883
     *                           </p>
2884
     * @param int|null $flags    [optional] <p>
2885
     *                           A bitmask of one or more of the following flags, which specify how to handle quotes
2886
     *                           and which document type to use. The default is ENT_COMPAT | ENT_HTML401.
2887
     *                           <table>
2888
     *                           Available <i>flags</i> constants
2889
     *                           <tr valign="top">
2890
     *                           <td>Constant Name</td>
2891
     *                           <td>Description</td>
2892
     *                           </tr>
2893
     *                           <tr valign="top">
2894
     *                           <td><b>ENT_COMPAT</b></td>
2895
     *                           <td>Will convert double-quotes and leave single-quotes alone.</td>
2896
     *                           </tr>
2897
     *                           <tr valign="top">
2898
     *                           <td><b>ENT_QUOTES</b></td>
2899
     *                           <td>Will convert both double and single quotes.</td>
2900
     *                           </tr>
2901
     *                           <tr valign="top">
2902
     *                           <td><b>ENT_NOQUOTES</b></td>
2903
     *                           <td>Will leave both double and single quotes unconverted.</td>
2904
     *                           </tr>
2905
     *                           <tr valign="top">
2906
     *                           <td><b>ENT_HTML401</b></td>
2907
     *                           <td>
2908
     *                           Handle code as HTML 4.01.
2909
     *                           </td>
2910
     *                           </tr>
2911
     *                           <tr valign="top">
2912
     *                           <td><b>ENT_XML1</b></td>
2913
     *                           <td>
2914
     *                           Handle code as XML 1.
2915
     *                           </td>
2916
     *                           </tr>
2917
     *                           <tr valign="top">
2918
     *                           <td><b>ENT_XHTML</b></td>
2919
     *                           <td>
2920
     *                           Handle code as XHTML.
2921
     *                           </td>
2922
     *                           </tr>
2923
     *                           <tr valign="top">
2924
     *                           <td><b>ENT_HTML5</b></td>
2925
     *                           <td>
2926
     *                           Handle code as HTML 5.
2927
     *                           </td>
2928
     *                           </tr>
2929
     *                           </table>
2930
     *                           </p>
2931
     * @param string   $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
2932
     *
2933
     * @psalm-pure
2934
     *
2935
     * @return string the decoded string
2936
     */
2937 50
    public static function html_entity_decode(
2938
        string $str,
2939
        int $flags = null,
2940
        string $encoding = 'UTF-8'
2941
    ): string {
2942
        if (
2943 50
            !isset($str[3]) // examples: &; || &x;
2944
            ||
2945 50
            \strpos($str, '&') === false // no "&"
2946
        ) {
2947 23
            return $str;
2948
        }
2949
2950 50
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
2951 9
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
2952
        }
2953
2954 50
        if ($flags === null) {
2955 11
            $flags = \ENT_QUOTES | \ENT_HTML5;
2956
        }
2957
2958
        if (
2959 50
            $encoding !== 'UTF-8'
2960
            &&
2961 50
            $encoding !== 'ISO-8859-1'
2962
            &&
2963 50
            $encoding !== 'WINDOWS-1252'
2964
            &&
2965 50
            self::$SUPPORT['mbstring'] === false
2966
        ) {
2967
            /**
2968
             * @psalm-suppress ImpureFunctionCall - is is only a warning
2969
             */
2970
            \trigger_error('UTF8::html_entity_decode() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
2971
        }
2972
2973
        do {
2974 50
            $str_compare = $str;
2975
2976 50
            if (\strpos($str, '&') !== false) {
2977 50
                if (\strpos($str, '&#') !== false) {
2978
                    // decode also numeric & UTF16 two byte entities
2979 41
                    $str = (string) \preg_replace(
2980 41
                        '/(&#(?:x0*[0-9a-fA-F]{2,6}(?![0-9a-fA-F;])|(?:0*\d{2,6}(?![0-9;]))))/S',
2981 41
                        '$1;',
2982 41
                        $str
2983
                    );
2984
                }
2985
2986 50
                $str = \html_entity_decode(
2987 50
                    $str,
2988 50
                    $flags,
2989 50
                    $encoding
2990
                );
2991
            }
2992 50
        } while ($str_compare !== $str);
2993
2994 50
        return $str;
2995
    }
2996
2997
    /**
2998
     * Create a escape html version of the string via "UTF8::htmlspecialchars()".
2999
     *
3000
     * @param string $str
3001
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
3002
     *
3003
     * @psalm-pure
3004
     *
3005
     * @return string
3006
     */
3007 6
    public static function html_escape(string $str, string $encoding = 'UTF-8'): string
3008
    {
3009 6
        return self::htmlspecialchars(
3010 6
            $str,
3011 6
            \ENT_QUOTES | \ENT_SUBSTITUTE,
3012 6
            $encoding
3013
        );
3014
    }
3015
3016
    /**
3017
     * Remove empty html-tag.
3018
     *
3019
     * e.g.: <pre><tag></tag></pre>
3020
     *
3021
     * @param string $str
3022
     *
3023
     * @psalm-pure
3024
     *
3025
     * @return string
3026
     */
3027 1
    public static function html_stripe_empty_tags(string $str): string
3028
    {
3029 1
        return (string) \preg_replace(
3030 1
            '/<[^\\/>]*?>\\s*?<\\/[^>]*?>/u',
3031 1
            '',
3032 1
            $str
3033
        );
3034
    }
3035
3036
    /**
3037
     * Convert all applicable characters to HTML entities: UTF-8 version of htmlentities().
3038
     *
3039
     * EXAMPLE: <code>UTF8::htmlentities('<白-öäü>'); // '&lt;&#30333;-&ouml;&auml;&uuml;&gt;'</code>
3040
     *
3041
     * @see http://php.net/manual/en/function.htmlentities.php
3042
     *
3043
     * @param string $str           <p>
3044
     *                              The input string.
3045
     *                              </p>
3046
     * @param int    $flags         [optional] <p>
3047
     *                              A bitmask of one or more of the following flags, which specify how to handle
3048
     *                              quotes, invalid code unit sequences and the used document type. The default is
3049
     *                              ENT_COMPAT | ENT_HTML401.
3050
     *                              <table>
3051
     *                              Available <i>flags</i> constants
3052
     *                              <tr valign="top">
3053
     *                              <td>Constant Name</td>
3054
     *                              <td>Description</td>
3055
     *                              </tr>
3056
     *                              <tr valign="top">
3057
     *                              <td><b>ENT_COMPAT</b></td>
3058
     *                              <td>Will convert double-quotes and leave single-quotes alone.</td>
3059
     *                              </tr>
3060
     *                              <tr valign="top">
3061
     *                              <td><b>ENT_QUOTES</b></td>
3062
     *                              <td>Will convert both double and single quotes.</td>
3063
     *                              </tr>
3064
     *                              <tr valign="top">
3065
     *                              <td><b>ENT_NOQUOTES</b></td>
3066
     *                              <td>Will leave both double and single quotes unconverted.</td>
3067
     *                              </tr>
3068
     *                              <tr valign="top">
3069
     *                              <td><b>ENT_IGNORE</b></td>
3070
     *                              <td>
3071
     *                              Silently discard invalid code unit sequences instead of returning
3072
     *                              an empty string. Using this flag is discouraged as it
3073
     *                              may have security implications.
3074
     *                              </td>
3075
     *                              </tr>
3076
     *                              <tr valign="top">
3077
     *                              <td><b>ENT_SUBSTITUTE</b></td>
3078
     *                              <td>
3079
     *                              Replace invalid code unit sequences with a Unicode Replacement Character
3080
     *                              U+FFFD (UTF-8) or &#38;#38;#FFFD; (otherwise) instead of returning an empty
3081
     *                              string.
3082
     *                              </td>
3083
     *                              </tr>
3084
     *                              <tr valign="top">
3085
     *                              <td><b>ENT_DISALLOWED</b></td>
3086
     *                              <td>
3087
     *                              Replace invalid code points for the given document type with a
3088
     *                              Unicode Replacement Character U+FFFD (UTF-8) or &#38;#38;#FFFD;
3089
     *                              (otherwise) instead of leaving them as is. This may be useful, for
3090
     *                              instance, to ensure the well-formedness of XML documents with
3091
     *                              embedded external content.
3092
     *                              </td>
3093
     *                              </tr>
3094
     *                              <tr valign="top">
3095
     *                              <td><b>ENT_HTML401</b></td>
3096
     *                              <td>
3097
     *                              Handle code as HTML 4.01.
3098
     *                              </td>
3099
     *                              </tr>
3100
     *                              <tr valign="top">
3101
     *                              <td><b>ENT_XML1</b></td>
3102
     *                              <td>
3103
     *                              Handle code as XML 1.
3104
     *                              </td>
3105
     *                              </tr>
3106
     *                              <tr valign="top">
3107
     *                              <td><b>ENT_XHTML</b></td>
3108
     *                              <td>
3109
     *                              Handle code as XHTML.
3110
     *                              </td>
3111
     *                              </tr>
3112
     *                              <tr valign="top">
3113
     *                              <td><b>ENT_HTML5</b></td>
3114
     *                              <td>
3115
     *                              Handle code as HTML 5.
3116
     *                              </td>
3117
     *                              </tr>
3118
     *                              </table>
3119
     *                              </p>
3120
     * @param string $encoding      [optional] <p>
3121
     *                              Like <b>htmlspecialchars</b>,
3122
     *                              <b>htmlentities</b> takes an optional third argument
3123
     *                              <i>encoding</i> which defines encoding used in
3124
     *                              conversion.
3125
     *                              Although this argument is technically optional, you are highly
3126
     *                              encouraged to specify the correct value for your code.
3127
     *                              </p>
3128
     * @param bool   $double_encode [optional] <p>
3129
     *                              When <i>double_encode</i> is turned off PHP will not
3130
     *                              encode existing html entities. The default is to convert everything.
3131
     *                              </p>
3132
     *
3133
     * @psalm-pure
3134
     *
3135
     * @return string
3136
     *                <p>
3137
     *                The encoded string.
3138
     *                <br><br>
3139
     *                If the input <i>string</i> contains an invalid code unit
3140
     *                sequence within the given <i>encoding</i> an empty string
3141
     *                will be returned, unless either the <b>ENT_IGNORE</b> or
3142
     *                <b>ENT_SUBSTITUTE</b> flags are set.
3143
     *                </p>
3144
     */
3145 9
    public static function htmlentities(
3146
        string $str,
3147
        int $flags = \ENT_COMPAT,
3148
        string $encoding = 'UTF-8',
3149
        bool $double_encode = true
3150
    ): string {
3151 9
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
3152 7
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
3153
        }
3154
3155 9
        $str = \htmlentities(
3156 9
            $str,
3157 9
            $flags,
3158 9
            $encoding,
3159 9
            $double_encode
3160
        );
3161
3162
        /**
3163
         * PHP doesn't replace a backslash to its html entity since this is something
3164
         * that's mostly used to escape characters when inserting in a database. Since
3165
         * we're using a decent database layer, we don't need this shit and we're replacing
3166
         * the double backslashes by its' html entity equivalent.
3167
         *
3168
         * https://github.com/forkcms/library/blob/master/spoon/filter/filter.php#L303
3169
         */
3170 9
        $str = \str_replace('\\', '&#92;', $str);
3171
3172 9
        return self::html_encode($str, true, $encoding);
3173
    }
3174
3175
    /**
3176
     * Convert only special characters to HTML entities: UTF-8 version of htmlspecialchars()
3177
     *
3178
     * INFO: Take a look at "UTF8::htmlentities()"
3179
     *
3180
     * EXAMPLE: <code>UTF8::htmlspecialchars('<白-öäü>'); // '&lt;白-öäü&gt;'</code>
3181
     *
3182
     * @see http://php.net/manual/en/function.htmlspecialchars.php
3183
     *
3184
     * @param string $str           <p>
3185
     *                              The string being converted.
3186
     *                              </p>
3187
     * @param int    $flags         [optional] <p>
3188
     *                              A bitmask of one or more of the following flags, which specify how to handle
3189
     *                              quotes, invalid code unit sequences and the used document type. The default is
3190
     *                              ENT_COMPAT | ENT_HTML401.
3191
     *                              <table>
3192
     *                              Available <i>flags</i> constants
3193
     *                              <tr valign="top">
3194
     *                              <td>Constant Name</td>
3195
     *                              <td>Description</td>
3196
     *                              </tr>
3197
     *                              <tr valign="top">
3198
     *                              <td><b>ENT_COMPAT</b></td>
3199
     *                              <td>Will convert double-quotes and leave single-quotes alone.</td>
3200
     *                              </tr>
3201
     *                              <tr valign="top">
3202
     *                              <td><b>ENT_QUOTES</b></td>
3203
     *                              <td>Will convert both double and single quotes.</td>
3204
     *                              </tr>
3205
     *                              <tr valign="top">
3206
     *                              <td><b>ENT_NOQUOTES</b></td>
3207
     *                              <td>Will leave both double and single quotes unconverted.</td>
3208
     *                              </tr>
3209
     *                              <tr valign="top">
3210
     *                              <td><b>ENT_IGNORE</b></td>
3211
     *                              <td>
3212
     *                              Silently discard invalid code unit sequences instead of returning
3213
     *                              an empty string. Using this flag is discouraged as it
3214
     *                              may have security implications.
3215
     *                              </td>
3216
     *                              </tr>
3217
     *                              <tr valign="top">
3218
     *                              <td><b>ENT_SUBSTITUTE</b></td>
3219
     *                              <td>
3220
     *                              Replace invalid code unit sequences with a Unicode Replacement Character
3221
     *                              U+FFFD (UTF-8) or &#38;#38;#FFFD; (otherwise) instead of returning an empty
3222
     *                              string.
3223
     *                              </td>
3224
     *                              </tr>
3225
     *                              <tr valign="top">
3226
     *                              <td><b>ENT_DISALLOWED</b></td>
3227
     *                              <td>
3228
     *                              Replace invalid code points for the given document type with a
3229
     *                              Unicode Replacement Character U+FFFD (UTF-8) or &#38;#38;#FFFD;
3230
     *                              (otherwise) instead of leaving them as is. This may be useful, for
3231
     *                              instance, to ensure the well-formedness of XML documents with
3232
     *                              embedded external content.
3233
     *                              </td>
3234
     *                              </tr>
3235
     *                              <tr valign="top">
3236
     *                              <td><b>ENT_HTML401</b></td>
3237
     *                              <td>
3238
     *                              Handle code as HTML 4.01.
3239
     *                              </td>
3240
     *                              </tr>
3241
     *                              <tr valign="top">
3242
     *                              <td><b>ENT_XML1</b></td>
3243
     *                              <td>
3244
     *                              Handle code as XML 1.
3245
     *                              </td>
3246
     *                              </tr>
3247
     *                              <tr valign="top">
3248
     *                              <td><b>ENT_XHTML</b></td>
3249
     *                              <td>
3250
     *                              Handle code as XHTML.
3251
     *                              </td>
3252
     *                              </tr>
3253
     *                              <tr valign="top">
3254
     *                              <td><b>ENT_HTML5</b></td>
3255
     *                              <td>
3256
     *                              Handle code as HTML 5.
3257
     *                              </td>
3258
     *                              </tr>
3259
     *                              </table>
3260
     *                              </p>
3261
     * @param string $encoding      [optional] <p>
3262
     *                              Defines encoding used in conversion.
3263
     *                              </p>
3264
     *                              <p>
3265
     *                              For the purposes of this function, the encodings
3266
     *                              ISO-8859-1, ISO-8859-15,
3267
     *                              UTF-8, cp866,
3268
     *                              cp1251, cp1252, and
3269
     *                              KOI8-R are effectively equivalent, provided the
3270
     *                              <i>string</i> itself is valid for the encoding, as
3271
     *                              the characters affected by <b>htmlspecialchars</b> occupy
3272
     *                              the same positions in all of these encodings.
3273
     *                              </p>
3274
     * @param bool   $double_encode [optional] <p>
3275
     *                              When <i>double_encode</i> is turned off PHP will not
3276
     *                              encode existing html entities, the default is to convert everything.
3277
     *                              </p>
3278
     *
3279
     * @psalm-pure
3280
     *
3281
     * @return string the converted string.
3282
     *                </p>
3283
     *                <p>
3284
     *                If the input <i>string</i> contains an invalid code unit
3285
     *                sequence within the given <i>encoding</i> an empty string
3286
     *                will be returned, unless either the <b>ENT_IGNORE</b> or
3287
     *                <b>ENT_SUBSTITUTE</b> flags are set
3288
     */
3289 8
    public static function htmlspecialchars(
3290
        string $str,
3291
        int $flags = \ENT_COMPAT,
3292
        string $encoding = 'UTF-8',
3293
        bool $double_encode = true
3294
    ): string {
3295 8
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
3296 8
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
3297
        }
3298
3299 8
        return \htmlspecialchars(
3300 8
            $str,
3301 8
            $flags,
3302 8
            $encoding,
3303 8
            $double_encode
3304
        );
3305
    }
3306
3307
    /**
3308
     * Checks whether iconv is available on the server.
3309
     *
3310
     * @psalm-pure
3311
     *
3312
     * @return bool
3313
     *              <p><strong>true</strong> if available, <strong>false</strong> otherwise</p>
3314
     *
3315
     * @internal <p>Please do not use it anymore, we will make is private in next major version.</p>
3316
     */
3317
    public static function iconv_loaded(): bool
3318
    {
3319
        return \extension_loaded('iconv');
3320
    }
3321
3322
    /**
3323
     * Converts Integer to hexadecimal U+xxxx code point representation.
3324
     *
3325
     * INFO: opposite to UTF8::hex_to_int()
3326
     *
3327
     * EXAMPLE: <code>UTF8::int_to_hex(241); // 'U+00f1'</code>
3328
     *
3329
     * @param int    $int    <p>The integer to be converted to hexadecimal code point.</p>
3330
     * @param string $prefix [optional]
3331
     *
3332
     * @psalm-pure
3333
     *
3334
     * @return string the code point, or empty string on failure
3335
     */
3336 6
    public static function int_to_hex(int $int, string $prefix = 'U+'): string
3337
    {
3338 6
        $hex = \dechex($int);
3339
3340 6
        $hex = (\strlen($hex) < 4 ? \substr('0000' . $hex, -4) : $hex);
3341
3342 6
        return $prefix . $hex . '';
3343
    }
3344
3345
    /**
3346
     * Checks whether intl-char is available on the server.
3347
     *
3348
     * @psalm-pure
3349
     *
3350
     * @return bool
3351
     *              <p><strong>true</strong> if available, <strong>false</strong> otherwise</p>
3352
     *
3353
     * @internal <p>Please do not use it anymore, we will make is private in next major version.</p>
3354
     */
3355
    public static function intlChar_loaded(): bool
3356
    {
3357
        return \class_exists('IntlChar');
3358
    }
3359
3360
    /**
3361
     * Checks whether intl is available on the server.
3362
     *
3363
     * @psalm-pure
3364
     *
3365
     * @return bool
3366
     *              <p><strong>true</strong> if available, <strong>false</strong> otherwise</p>
3367
     *
3368
     * @internal <p>Please do not use it anymore, we will make is private in next major version.</p>
3369
     */
3370 5
    public static function intl_loaded(): bool
3371
    {
3372 5
        return \extension_loaded('intl');
3373
    }
3374
3375
    /**
3376
     * Returns true if the string contains only alphabetic chars, false otherwise.
3377
     *
3378
     * @param string $str <p>The input string.</p>
3379
     *
3380
     * @psalm-pure
3381
     *
3382
     * @return bool
3383
     *              <p>Whether or not $str contains only alphabetic chars.</p>
3384
     */
3385 10
    public static function is_alpha(string $str): bool
3386
    {
3387 10
        if (self::$SUPPORT['mbstring'] === true) {
3388 10
            return \mb_ereg_match('^[[:alpha:]]*$', $str);
3389
        }
3390
3391
        return self::str_matches_pattern($str, '^[[:alpha:]]*$');
3392
    }
3393
3394
    /**
3395
     * Returns true if the string contains only alphabetic and numeric chars, false otherwise.
3396
     *
3397
     * @param string $str <p>The input string.</p>
3398
     *
3399
     * @psalm-pure
3400
     *
3401
     * @return bool
3402
     *              <p>Whether or not $str contains only alphanumeric chars.</p>
3403
     */
3404 13
    public static function is_alphanumeric(string $str): bool
3405
    {
3406 13
        if (self::$SUPPORT['mbstring'] === true) {
3407 13
            return \mb_ereg_match('^[[:alnum:]]*$', $str);
3408
        }
3409
3410
        return self::str_matches_pattern($str, '^[[:alnum:]]*$');
3411
    }
3412
3413
    /**
3414
     * Returns true if the string contains only punctuation chars, false otherwise.
3415
     *
3416
     * @param string $str <p>The input string.</p>
3417
     *
3418
     * @psalm-pure
3419
     *
3420
     * @return bool
3421
     *              <p>Whether or not $str contains only punctuation chars.</p>
3422
     */
3423 10
    public static function is_punctuation(string $str): bool
3424
    {
3425 10
        return self::str_matches_pattern($str, '^[[:punct:]]*$');
3426
    }
3427
3428
    /**
3429
     * Returns true if the string contains only printable (non-invisible) chars, false otherwise.
3430
     *
3431
     * @param string $str                       <p>The input string.</p>
3432
     * @param bool   $ignore_control_characters [optional] <p>Ignore control characters like [LRM] or [LSEP].</p>
3433
     *
3434
     * @psalm-pure
3435
     *
3436
     * @return bool
3437
     *              <p>Whether or not $str contains only printable (non-invisible) chars.</p>
3438
     */
3439 1
    public static function is_printable(string $str, bool $ignore_control_characters = false): bool
3440
    {
3441 1
        return self::remove_invisible_characters($str, false, '', $ignore_control_characters) === $str;
3442
    }
3443
3444
    /**
3445
     * Checks if a string is 7 bit ASCII.
3446
     *
3447
     * EXAMPLE: <code>UTF8::is_ascii('白'); // false</code>
3448
     *
3449
     * @param string $str <p>The string to check.</p>
3450
     *
3451
     * @psalm-pure
3452
     *
3453
     * @return bool
3454
     *              <p>
3455
     *              <strong>true</strong> if it is ASCII<br>
3456
     *              <strong>false</strong> otherwise
3457
     *              </p>
3458
     */
3459 8
    public static function is_ascii(string $str): bool
3460
    {
3461 8
        return ASCII::is_ascii($str);
3462
    }
3463
3464
    /**
3465
     * Returns true if the string is base64 encoded, false otherwise.
3466
     *
3467
     * EXAMPLE: <code>UTF8::is_base64('4KSu4KWL4KSo4KS/4KSa'); // true</code>
3468
     *
3469
     * @param string|null $str                   <p>The input string.</p>
3470
     * @param bool        $empty_string_is_valid [optional] <p>Is an empty string valid base64 or not?</p>
3471
     *
3472
     * @psalm-pure
3473
     *
3474
     * @return bool
3475
     *              <p>Whether or not $str is base64 encoded.</p>
3476
     */
3477 16
    public static function is_base64($str, bool $empty_string_is_valid = false): bool
3478
    {
3479
        if (
3480 16
            !$empty_string_is_valid
3481
            &&
3482 16
            $str === ''
3483
        ) {
3484 3
            return false;
3485
        }
3486
3487 15
        if (!\is_string($str)) {
3488 2
            return false;
3489
        }
3490
3491 15
        $base64String = \base64_decode($str, true);
3492
3493 15
        return $base64String !== false && \base64_encode($base64String) === $str;
3494
    }
3495
3496
    /**
3497
     * Check if the input is binary... (is look like a hack).
3498
     *
3499
     * EXAMPLE: <code>UTF8::is_binary(01); // true</code>
3500
     *
3501
     * @param int|string $input
3502
     * @param bool       $strict
3503
     *
3504
     * @psalm-pure
3505
     *
3506
     * @return bool
3507
     */
3508 39
    public static function is_binary($input, bool $strict = false): bool
3509
    {
3510 39
        $input = (string) $input;
3511 39
        if ($input === '') {
3512 10
            return false;
3513
        }
3514
3515 39
        if (\preg_match('~^[01]+$~', $input)) {
3516 13
            return true;
3517
        }
3518
3519 39
        $ext = self::get_file_type($input);
3520 39
        if ($ext['type'] === 'binary') {
3521 7
            return true;
3522
        }
3523
3524 38
        $test_length = \strlen($input);
3525 38
        $test_null_counting = \substr_count($input, "\x0", 0, $test_length);
3526 38
        if (($test_null_counting / $test_length) > 0.25) {
3527 15
            return true;
3528
        }
3529
3530 34
        if ($strict) {
3531 34
            if (self::$SUPPORT['finfo'] === false) {
3532
                throw new \RuntimeException('ext-fileinfo: is not installed');
3533
            }
3534
3535
            /**
3536
             * @psalm-suppress ImpureMethodCall - it will return the same result for the same file ...
3537
             */
3538 34
            $finfo_encoding = (new \finfo(\FILEINFO_MIME_ENCODING))->buffer($input);
3539 34
            if ($finfo_encoding && $finfo_encoding === 'binary') {
3540 14
                return true;
3541
            }
3542
        }
3543
3544 31
        return false;
3545
    }
3546
3547
    /**
3548
     * Check if the file is binary.
3549
     *
3550
     * EXAMPLE: <code>UTF8::is_binary('./utf32.txt'); // true</code>
3551
     *
3552
     * @param string $file
3553
     *
3554
     * @return bool
3555
     */
3556 6
    public static function is_binary_file($file): bool
3557
    {
3558
        // init
3559 6
        $block = '';
3560
3561 6
        $fp = \fopen($file, 'rb');
3562 6
        if (\is_resource($fp)) {
3563 6
            $block = \fread($fp, 512);
3564 6
            \fclose($fp);
3565
        }
3566
3567 6
        if ($block === '' || $block === false) {
3568 2
            return false;
3569
        }
3570
3571 6
        return self::is_binary($block, true);
3572
    }
3573
3574
    /**
3575
     * Returns true if the string contains only whitespace chars, false otherwise.
3576
     *
3577
     * @param string $str <p>The input string.</p>
3578
     *
3579
     * @psalm-pure
3580
     *
3581
     * @return bool
3582
     *              <p>Whether or not $str contains only whitespace characters.</p>
3583
     */
3584 15
    public static function is_blank(string $str): bool
3585
    {
3586 15
        if (self::$SUPPORT['mbstring'] === true) {
3587 15
            return \mb_ereg_match('^[[:space:]]*$', $str);
3588
        }
3589
3590
        return self::str_matches_pattern($str, '^[[:space:]]*$');
3591
    }
3592
3593
    /**
3594
     * Checks if the given string is equal to any "Byte Order Mark".
3595
     *
3596
     * WARNING: Use "UTF8::string_has_bom()" if you will check BOM in a string.
3597
     *
3598
     * EXAMPLE: <code>UTF8::is_bom("\xef\xbb\xbf"); // true</code>
3599
     *
3600
     * @param string $str <p>The input string.</p>
3601
     *
3602
     * @psalm-pure
3603
     *
3604
     * @return bool
3605
     *              <p><strong>true</strong> if the $utf8_chr is Byte Order Mark, <strong>false</strong> otherwise.</p>
3606
     */
3607 2
    public static function is_bom($str): bool
3608
    {
3609
        /** @noinspection PhpUnusedLocalVariableInspection */
3610 2
        foreach (self::$BOM as $bom_string => &$bom_byte_length) {
3611 2
            if ($str === $bom_string) {
3612 2
                return true;
3613
            }
3614
        }
3615
3616 2
        return false;
3617
    }
3618
3619
    /**
3620
     * Determine whether the string is considered to be empty.
3621
     *
3622
     * A variable is considered empty if it does not exist or if its value equals FALSE.
3623
     * empty() does not generate a warning if the variable does not exist.
3624
     *
3625
     * @param array|float|int|string $str
3626
     *
3627
     * @psalm-pure
3628
     *
3629
     * @return bool
3630
     *              <p>Whether or not $str is empty().</p>
3631
     */
3632 1
    public static function is_empty($str): bool
3633
    {
3634 1
        return empty($str);
3635
    }
3636
3637
    /**
3638
     * Returns true if the string contains only hexadecimal chars, false otherwise.
3639
     *
3640
     * @param string $str <p>The input string.</p>
3641
     *
3642
     * @psalm-pure
3643
     *
3644
     * @return bool
3645
     *              <p>Whether or not $str contains only hexadecimal chars.</p>
3646
     */
3647 13
    public static function is_hexadecimal(string $str): bool
3648
    {
3649 13
        if (self::$SUPPORT['mbstring'] === true) {
3650 13
            return \mb_ereg_match('^[[:xdigit:]]*$', $str);
3651
        }
3652
3653
        return self::str_matches_pattern($str, '^[[:xdigit:]]*$');
3654
    }
3655
3656
    /**
3657
     * Check if the string contains any HTML tags.
3658
     *
3659
     * EXAMPLE: <code>UTF8::is_html('<b>lall</b>'); // true</code>
3660
     *
3661
     * @param string $str <p>The input string.</p>
3662
     *
3663
     * @psalm-pure
3664
     *
3665
     * @return bool
3666
     *              <p>Whether or not $str contains html elements.</p>
3667
     */
3668 3
    public static function is_html(string $str): bool
3669
    {
3670 3
        if ($str === '') {
3671 3
            return false;
3672
        }
3673
3674
        // init
3675 3
        $matches = [];
3676
3677 3
        $str = self::emoji_encode($str); // hack for emoji support :/
3678
3679 3
        \preg_match("/<\\/?\\w+(?:(?:\\s+\\w+(?:\\s*=\\s*(?:\".*?\"|'.*?'|[^'\">\\s]+))?)*\\s*|\\s*)\\/?>/u", $str, $matches);
3680
3681 3
        return $matches !== [];
3682
    }
3683
3684
    /**
3685
     * Check if $url is an correct url.
3686
     *
3687
     * @param string $url
3688
     * @param bool   $disallow_localhost
3689
     *
3690
     * @psalm-pure
3691
     *
3692
     * @return bool
3693
     */
3694 1
    public static function is_url(string $url, bool $disallow_localhost = false): bool
3695
    {
3696 1
        if ($url === '') {
3697 1
            return false;
3698
        }
3699
3700
        // WARNING: keep this as hack protection
3701 1
        if (!self::str_istarts_with_any($url, ['http://', 'https://'])) {
3702 1
            return false;
3703
        }
3704
3705
        // e.g. -> the server itself connect to "https://foo.localhost/phpmyadmin/...
3706 1
        if ($disallow_localhost) {
3707 1
            if (self::str_istarts_with_any(
3708 1
                $url,
3709
                [
3710 1
                    'http://localhost',
3711
                    'https://localhost',
3712
                    'http://127.0.0.1',
3713
                    'https://127.0.0.1',
3714
                    'http://::1',
3715
                    'https://::1',
3716
                ]
3717
            )) {
3718 1
                return false;
3719
            }
3720
3721 1
            $regex = '/^(?:http(?:s)?:\/\/).*?(?:\.localhost)/iu';
3722 1
            if (\preg_match($regex, $url)) {
3723 1
                return false;
3724
            }
3725
        }
3726
3727
        // INFO: this is needed for e.g. "http://müller.de/" (internationalized domain names) and non ASCII-parameters
3728 1
        $regex = '/^(?:http(?:s)?:\\/\\/)(?:[\p{L}0-9][\p{L}0-9_-]*(?:\\.[\p{L}0-9][\p{L}0-9_-]*))(?:\\d+)?(?:\\/\\.*)?/iu';
3729 1
        if (\preg_match($regex, $url)) {
3730 1
            return true;
3731
        }
3732
3733 1
        return \filter_var($url, \FILTER_VALIDATE_URL) !== false;
3734
    }
3735
3736
    /**
3737
     * Try to check if "$str" is a JSON-string.
3738
     *
3739
     * EXAMPLE: <code>UTF8::is_json('{"array":[1,"¥","ä"]}'); // true</code>
3740
     *
3741
     * @param string $str                                    <p>The input string.</p>
3742
     * @param bool   $only_array_or_object_results_are_valid [optional] <p>Only array and objects are valid json
3743
     *                                                       results.</p>
3744
     *
3745
     * @return bool
3746
     *              <p>Whether or not the $str is in JSON format.</p>
3747
     */
3748 42
    public static function is_json(string $str, bool $only_array_or_object_results_are_valid = true): bool
3749
    {
3750 42
        if ($str === '') {
3751 4
            return false;
3752
        }
3753
3754 40
        if (self::$SUPPORT['json'] === false) {
3755
            throw new \RuntimeException('ext-json: is not installed');
3756
        }
3757
3758 40
        $jsonOrNull = self::json_decode($str);
3759 40
        if ($jsonOrNull === null && \strtoupper($str) !== 'NULL') {
3760 18
            return false;
3761
        }
3762
3763
        if (
3764 24
            $only_array_or_object_results_are_valid
3765
            &&
3766 24
            !\is_object($jsonOrNull)
3767
            &&
3768 24
            !\is_array($jsonOrNull)
3769
        ) {
3770 5
            return false;
3771
        }
3772
3773 19
        return \json_last_error() === \JSON_ERROR_NONE;
3774
    }
3775
3776
    /**
3777
     * @param string $str <p>The input string.</p>
3778
     *
3779
     * @psalm-pure
3780
     *
3781
     * @return bool
3782
     *              <p>Whether or not $str contains only lowercase chars.</p>
3783
     */
3784 8
    public static function is_lowercase(string $str): bool
3785
    {
3786 8
        if (self::$SUPPORT['mbstring'] === true) {
3787 8
            return \mb_ereg_match('^[[:lower:]]*$', $str);
3788
        }
3789
3790
        return self::str_matches_pattern($str, '^[[:lower:]]*$');
3791
    }
3792
3793
    /**
3794
     * Returns true if the string is serialized, false otherwise.
3795
     *
3796
     * @param string $str <p>The input string.</p>
3797
     *
3798
     * @psalm-pure
3799
     *
3800
     * @return bool
3801
     *              <p>Whether or not $str is serialized.</p>
3802
     */
3803 7
    public static function is_serialized(string $str): bool
3804
    {
3805 7
        if ($str === '') {
3806 1
            return false;
3807
        }
3808
3809
        /** @noinspection PhpUsageOfSilenceOperatorInspection */
3810
        /** @noinspection UnserializeExploitsInspection */
3811 6
        return $str === 'b:0;'
3812
               ||
3813 6
               @\unserialize($str, []) !== false;
3814
    }
3815
3816
    /**
3817
     * Returns true if the string contains only lower case chars, false
3818
     * otherwise.
3819
     *
3820
     * @param string $str <p>The input string.</p>
3821
     *
3822
     * @psalm-pure
3823
     *
3824
     * @return bool
3825
     *              <p>Whether or not $str contains only lower case characters.</p>
3826
     */
3827 8
    public static function is_uppercase(string $str): bool
3828
    {
3829 8
        if (self::$SUPPORT['mbstring'] === true) {
3830 8
            return \mb_ereg_match('^[[:upper:]]*$', $str);
3831
        }
3832
3833
        return self::str_matches_pattern($str, '^[[:upper:]]*$');
3834
    }
3835
3836
    /**
3837
     * Check if the string is UTF-16.
3838
     *
3839
     * EXAMPLE: <code>
3840
     * UTF8::is_utf16(file_get_contents('utf-16-le.txt')); // 1
3841
     * //
3842
     * UTF8::is_utf16(file_get_contents('utf-16-be.txt')); // 2
3843
     * //
3844
     * UTF8::is_utf16(file_get_contents('utf-8.txt')); // false
3845
     * </code>
3846
     *
3847
     * @param string $str                       <p>The input string.</p>
3848
     * @param bool   $check_if_string_is_binary
3849
     *
3850
     * @psalm-pure
3851
     *
3852
     * @return false|int
3853
     *                   <strong>false</strong> if is't not UTF-16,<br>
3854
     *                   <strong>1</strong> for UTF-16LE,<br>
3855
     *                   <strong>2</strong> for UTF-16BE
3856
     */
3857 21
    public static function is_utf16($str, bool $check_if_string_is_binary = true)
3858
    {
3859
        // init
3860 21
        $str = (string) $str;
3861 21
        $str_chars = [];
3862
3863
        if (
3864 21
            $check_if_string_is_binary
3865
            &&
3866 21
            !self::is_binary($str, true)
3867
        ) {
3868 2
            return false;
3869
        }
3870
3871 21
        if (self::$SUPPORT['mbstring'] === false) {
3872
            /**
3873
             * @psalm-suppress ImpureFunctionCall - is is only a warning
3874
             */
3875 3
            \trigger_error('UTF8::is_utf16() without mbstring may did not work correctly', \E_USER_WARNING);
3876
        }
3877
3878 21
        $str = self::remove_bom($str);
3879
3880 21
        $maybe_utf16le = 0;
3881 21
        $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-16LE');
3882 21
        if ($test) {
3883 15
            $test2 = \mb_convert_encoding($test, 'UTF-16LE', 'UTF-8');
3884 15
            $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-16LE');
3885 15
            if ($test3 === $test) {
3886
                /**
3887
                 * @psalm-suppress RedundantCondition
3888
                 */
3889 15
                if ($str_chars === []) {
3890 15
                    $str_chars = self::count_chars($str, true, false);
3891
                }
3892 15
                foreach (self::count_chars($test3) as $test3char => &$test3charEmpty) {
0 ignored issues
show
Bug introduced by
The expression self::count_chars($test3) cannot be used as a reference.

Let?s assume that you have the following foreach statement:

foreach ($array as &$itemValue) { }

$itemValue is assigned by reference. This is possible because the expression (in the example $array) can be used as a reference target.

However, if we were to replace $array with something different like the result of a function call as in

foreach (getArray() as &$itemValue) { }

then assigning by reference is not possible anymore as there is no target that could be modified.

Available Fixes

1. Do not assign by reference
foreach (getArray() as $itemValue) { }
2. Assign to a local variable first
$array = getArray();
foreach ($array as &$itemValue) {}
3. Return a reference
function &getArray() { $array = array(); return $array; }

foreach (getArray() as &$itemValue) { }
Loading history...
Bug introduced by
It seems like $test3 can also be of type array; however, parameter $str of voku\helper\UTF8::count_chars() does only seem to accept string, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

3892
                foreach (self::count_chars(/** @scrutinizer ignore-type */ $test3) as $test3char => &$test3charEmpty) {
Loading history...
3893 15
                    if (\in_array($test3char, $str_chars, true)) {
3894 15
                        ++$maybe_utf16le;
3895
                    }
3896
                }
3897 15
                unset($test3charEmpty);
3898
            }
3899
        }
3900
3901 21
        $maybe_utf16be = 0;
3902 21
        $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-16BE');
3903 21
        if ($test) {
3904 15
            $test2 = \mb_convert_encoding($test, 'UTF-16BE', 'UTF-8');
3905 15
            $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-16BE');
3906 15
            if ($test3 === $test) {
3907 15
                if ($str_chars === []) {
3908 7
                    $str_chars = self::count_chars($str, true, false);
3909
                }
3910 15
                foreach (self::count_chars($test3) as $test3char => &$test3charEmpty) {
0 ignored issues
show
Bug introduced by
The expression self::count_chars($test3) cannot be used as a reference.

Let?s assume that you have the following foreach statement:

foreach ($array as &$itemValue) { }

$itemValue is assigned by reference. This is possible because the expression (in the example $array) can be used as a reference target.

However, if we were to replace $array with something different like the result of a function call as in

foreach (getArray() as &$itemValue) { }

then assigning by reference is not possible anymore as there is no target that could be modified.

Available Fixes

1. Do not assign by reference
foreach (getArray() as $itemValue) { }
2. Assign to a local variable first
$array = getArray();
foreach ($array as &$itemValue) {}
3. Return a reference
function &getArray() { $array = array(); return $array; }

foreach (getArray() as &$itemValue) { }
Loading history...
3911 15
                    if (\in_array($test3char, $str_chars, true)) {
3912 15
                        ++$maybe_utf16be;
3913
                    }
3914
                }
3915 15
                unset($test3charEmpty);
3916
            }
3917
        }
3918
3919 21
        if ($maybe_utf16be !== $maybe_utf16le) {
3920 7
            if ($maybe_utf16le > $maybe_utf16be) {
3921 5
                return 1;
3922
            }
3923
3924 6
            return 2;
3925
        }
3926
3927 17
        return false;
3928
    }
3929
3930
    /**
3931
     * Check if the string is UTF-32.
3932
     *
3933
     * EXAMPLE: <code>
3934
     * UTF8::is_utf32(file_get_contents('utf-32-le.txt')); // 1
3935
     * //
3936
     * UTF8::is_utf32(file_get_contents('utf-32-be.txt')); // 2
3937
     * //
3938
     * UTF8::is_utf32(file_get_contents('utf-8.txt')); // false
3939
     * </code>
3940
     *
3941
     * @param string $str                       <p>The input string.</p>
3942
     * @param bool   $check_if_string_is_binary
3943
     *
3944
     * @psalm-pure
3945
     *
3946
     * @return false|int
3947
     *                   <strong>false</strong> if is't not UTF-32,<br>
3948
     *                   <strong>1</strong> for UTF-32LE,<br>
3949
     *                   <strong>2</strong> for UTF-32BE
3950
     */
3951 19
    public static function is_utf32($str, bool $check_if_string_is_binary = true)
3952
    {
3953
        // init
3954 19
        $str = (string) $str;
3955 19
        $str_chars = [];
3956
3957
        if (
3958 19
            $check_if_string_is_binary
3959
            &&
3960 19
            !self::is_binary($str, true)
3961
        ) {
3962 2
            return false;
3963
        }
3964
3965 19
        if (self::$SUPPORT['mbstring'] === false) {
3966
            /**
3967
             * @psalm-suppress ImpureFunctionCall - is is only a warning
3968
             */
3969 3
            \trigger_error('UTF8::is_utf32() without mbstring may did not work correctly', \E_USER_WARNING);
3970
        }
3971
3972 19
        $str = self::remove_bom($str);
3973
3974 19
        $maybe_utf32le = 0;
3975 19
        $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-32LE');
3976 19
        if ($test) {
3977 13
            $test2 = \mb_convert_encoding($test, 'UTF-32LE', 'UTF-8');
3978 13
            $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-32LE');
3979 13
            if ($test3 === $test) {
3980
                /**
3981
                 * @psalm-suppress RedundantCondition
3982
                 */
3983 13
                if ($str_chars === []) {
3984 13
                    $str_chars = self::count_chars($str, true, false);
3985
                }
3986 13
                foreach (self::count_chars($test3) as $test3char => &$test3charEmpty) {
0 ignored issues
show
Bug introduced by
It seems like $test3 can also be of type array; however, parameter $str of voku\helper\UTF8::count_chars() does only seem to accept string, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

3986
                foreach (self::count_chars(/** @scrutinizer ignore-type */ $test3) as $test3char => &$test3charEmpty) {
Loading history...
Bug introduced by
The expression self::count_chars($test3) cannot be used as a reference.

Let?s assume that you have the following foreach statement:

foreach ($array as &$itemValue) { }

$itemValue is assigned by reference. This is possible because the expression (in the example $array) can be used as a reference target.

However, if we were to replace $array with something different like the result of a function call as in

foreach (getArray() as &$itemValue) { }

then assigning by reference is not possible anymore as there is no target that could be modified.

Available Fixes

1. Do not assign by reference
foreach (getArray() as $itemValue) { }
2. Assign to a local variable first
$array = getArray();
foreach ($array as &$itemValue) {}
3. Return a reference
function &getArray() { $array = array(); return $array; }

foreach (getArray() as &$itemValue) { }
Loading history...
3987 13
                    if (\in_array($test3char, $str_chars, true)) {
3988 13
                        ++$maybe_utf32le;
3989
                    }
3990
                }
3991 13
                unset($test3charEmpty);
3992
            }
3993
        }
3994
3995 19
        $maybe_utf32be = 0;
3996 19
        $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-32BE');
3997 19
        if ($test) {
3998 13
            $test2 = \mb_convert_encoding($test, 'UTF-32BE', 'UTF-8');
3999 13
            $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-32BE');
4000 13
            if ($test3 === $test) {
4001 13
                if ($str_chars === []) {
4002 7
                    $str_chars = self::count_chars($str, true, false);
4003
                }
4004 13
                foreach (self::count_chars($test3) as $test3char => &$test3charEmpty) {
0 ignored issues
show
Bug introduced by
The expression self::count_chars($test3) cannot be used as a reference.

Let?s assume that you have the following foreach statement:

foreach ($array as &$itemValue) { }

$itemValue is assigned by reference. This is possible because the expression (in the example $array) can be used as a reference target.

However, if we were to replace $array with something different like the result of a function call as in

foreach (getArray() as &$itemValue) { }

then assigning by reference is not possible anymore as there is no target that could be modified.

Available Fixes

1. Do not assign by reference
foreach (getArray() as $itemValue) { }
2. Assign to a local variable first
$array = getArray();
foreach ($array as &$itemValue) {}
3. Return a reference
function &getArray() { $array = array(); return $array; }

foreach (getArray() as &$itemValue) { }
Loading history...
4005 13
                    if (\in_array($test3char, $str_chars, true)) {
4006 13
                        ++$maybe_utf32be;
4007
                    }
4008
                }
4009 13
                unset($test3charEmpty);
4010
            }
4011
        }
4012
4013 19
        if ($maybe_utf32be !== $maybe_utf32le) {
4014 3
            if ($maybe_utf32le > $maybe_utf32be) {
4015 2
                return 1;
4016
            }
4017
4018 3
            return 2;
4019
        }
4020
4021 19
        return false;
4022
    }
4023
4024
    /**
4025
     * Checks whether the passed input contains only byte sequences that appear valid UTF-8.
4026
     *
4027
     * EXAMPLE: <code>
4028
     * UTF8::is_utf8(['Iñtërnâtiônàlizætiøn', 'foo']); // true
4029
     * //
4030
     * UTF8::is_utf8(["Iñtërnâtiônàlizætiøn\xA0\xA1", 'bar']); // false
4031
     * </code>
4032
     *
4033
     * @param int|string|string[]|null $str    <p>The input to be checked.</p>
4034
     * @param bool                     $strict <p>Check also if the string is not UTF-16 or UTF-32.</p>
4035
     *
4036
     * @psalm-pure
4037
     *
4038
     * @return bool
4039
     */
4040 83
    public static function is_utf8($str, bool $strict = false): bool
4041
    {
4042 83
        if (\is_array($str)) {
4043 2
            foreach ($str as &$v) {
4044 2
                if (!self::is_utf8($v, $strict)) {
4045 2
                    return false;
4046
                }
4047
            }
4048
4049
            return true;
4050
        }
4051
4052 83
        return self::is_utf8_string((string) $str, $strict);
4053
    }
4054
4055
    /**
4056
     * (PHP 5 &gt;= 5.2.0, PECL json &gt;= 1.2.0)<br/>
4057
     * Decodes a JSON string
4058
     *
4059
     * EXAMPLE: <code>UTF8::json_decode('[1,"\u00a5","\u00e4"]'); // array(1, '¥', 'ä')</code>
4060
     *
4061
     * @see http://php.net/manual/en/function.json-decode.php
4062
     *
4063
     * @param string $json    <p>
4064
     *                        The <i>json</i> string being decoded.
4065
     *                        </p>
4066
     *                        <p>
4067
     *                        This function only works with UTF-8 encoded strings.
4068
     *                        </p>
4069
     *                        <p>PHP implements a superset of
4070
     *                        JSON - it will also encode and decode scalar types and <b>NULL</b>. The JSON standard
4071
     *                        only supports these values when they are nested inside an array or an object.
4072
     *                        </p>
4073
     * @param bool   $assoc   [optional] <p>
4074
     *                        When <b>TRUE</b>, returned objects will be converted into
4075
     *                        associative arrays.
4076
     *                        </p>
4077
     * @param int    $depth   [optional] <p>
4078
     *                        User specified recursion depth.
4079
     *                        </p>
4080
     * @param int    $options [optional] <p>
4081
     *                        Bitmask of JSON decode options. Currently only
4082
     *                        <b>JSON_BIGINT_AS_STRING</b>
4083
     *                        is supported (default is to cast large integers as floats)
4084
     *                        </p>
4085
     *
4086
     * @psalm-pure
4087
     *
4088
     * @return mixed
4089
     *               <p>The value encoded in <i>json</i> in appropriate PHP type. Values true, false and
4090
     *               null (case-insensitive) are returned as <b>TRUE</b>, <b>FALSE</b> and <b>NULL</b> respectively.
4091
     *               <b>NULL</b> is returned if the <i>json</i> cannot be decoded or if the encoded data
4092
     *               is deeper than the recursion limit.</p>
4093
     */
4094 43
    public static function json_decode(
4095
        string $json,
4096
        bool $assoc = false,
4097
        int $depth = 512,
4098
        int $options = 0
4099
    ) {
4100 43
        $json = self::filter($json);
4101
4102 43
        if (self::$SUPPORT['json'] === false) {
4103
            throw new \RuntimeException('ext-json: is not installed');
4104
        }
4105
4106 43
        return \json_decode($json, $assoc, $depth, $options);
4107
    }
4108
4109
    /**
4110
     * (PHP 5 &gt;= 5.2.0, PECL json &gt;= 1.2.0)<br/>
4111
     * Returns the JSON representation of a value.
4112
     *
4113
     * EXAMPLE: <code>UTF8::json_enocde(array(1, '¥', 'ä')); // '[1,"\u00a5","\u00e4"]'</code>
4114
     *
4115
     * @see http://php.net/manual/en/function.json-encode.php
4116
     *
4117
     * @param mixed $value   <p>
4118
     *                       The <i>value</i> being encoded. Can be any type except
4119
     *                       a resource.
4120
     *                       </p>
4121
     *                       <p>
4122
     *                       All string data must be UTF-8 encoded.
4123
     *                       </p>
4124
     *                       <p>PHP implements a superset of
4125
     *                       JSON - it will also encode and decode scalar types and <b>NULL</b>. The JSON standard
4126
     *                       only supports these values when they are nested inside an array or an object.
4127
     *                       </p>
4128
     * @param int   $options [optional] <p>
4129
     *                       Bitmask consisting of <b>JSON_HEX_QUOT</b>,
4130
     *                       <b>JSON_HEX_TAG</b>,
4131
     *                       <b>JSON_HEX_AMP</b>,
4132
     *                       <b>JSON_HEX_APOS</b>,
4133
     *                       <b>JSON_NUMERIC_CHECK</b>,
4134
     *                       <b>JSON_PRETTY_PRINT</b>,
4135
     *                       <b>JSON_UNESCAPED_SLASHES</b>,
4136
     *                       <b>JSON_FORCE_OBJECT</b>,
4137
     *                       <b>JSON_UNESCAPED_UNICODE</b>. The behaviour of these
4138
     *                       constants is described on
4139
     *                       the JSON constants page.
4140
     *                       </p>
4141
     * @param int   $depth   [optional] <p>
4142
     *                       Set the maximum depth. Must be greater than zero.
4143
     *                       </p>
4144
     *
4145
     * @psalm-pure
4146
     *
4147
     * @return false|string
4148
     *                      A JSON encoded <strong>string</strong> on success or<br>
4149
     *                      <strong>FALSE</strong> on failure
4150
     */
4151 5
    public static function json_encode($value, int $options = 0, int $depth = 512)
4152
    {
4153 5
        $value = self::filter($value);
4154
4155 5
        if (self::$SUPPORT['json'] === false) {
4156
            throw new \RuntimeException('ext-json: is not installed');
4157
        }
4158
4159 5
        return \json_encode($value, $options, $depth);
4160
    }
4161
4162
    /**
4163
     * Checks whether JSON is available on the server.
4164
     *
4165
     * @psalm-pure
4166
     *
4167
     * @return bool
4168
     *              <p><strong>true</strong> if available, <strong>false</strong> otherwise</p>
4169
     *
4170
     * @internal <p>Please do not use it anymore, we will make is private in next major version.</p>
4171
     */
4172
    public static function json_loaded(): bool
4173
    {
4174
        return \function_exists('json_decode');
4175
    }
4176
4177
    /**
4178
     * Makes string's first char lowercase.
4179
     *
4180
     * EXAMPLE: <code>UTF8::lcfirst('ÑTËRNÂTIÔNÀLIZÆTIØN'); // ñTËRNÂTIÔNÀLIZÆTIØN</code>
4181
     *
4182
     * @param string      $str                           <p>The input string</p>
4183
     * @param string      $encoding                      [optional] <p>Set the charset for e.g. "mb_" function</p>
4184
     * @param bool        $clean_utf8                    [optional] <p>Remove non UTF-8 chars from the string.</p>
4185
     * @param string|null $lang                          [optional] <p>Set the language for special cases: az, el, lt,
4186
     *                                                   tr</p>
4187
     * @param bool        $try_to_keep_the_string_length [optional] <p>true === try to keep the string length: e.g. ẞ
4188
     *                                                   -> ß</p>
4189
     *
4190
     * @psalm-pure
4191
     *
4192
     * @return string the resulting string
4193
     */
4194 46
    public static function lcfirst(
4195
        string $str,
4196
        string $encoding = 'UTF-8',
4197
        bool $clean_utf8 = false,
4198
        string $lang = null,
4199
        bool $try_to_keep_the_string_length = false
4200
    ): string {
4201 46
        if ($clean_utf8) {
4202
            $str = self::clean($str);
4203
        }
4204
4205 46
        $use_mb_functions = ($lang === null && !$try_to_keep_the_string_length);
4206
4207 46
        if ($encoding === 'UTF-8') {
4208 43
            $str_part_two = (string) \mb_substr($str, 1);
4209
4210 43
            if ($use_mb_functions) {
4211 43
                $str_part_one = \mb_strtolower(
4212 43
                    (string) \mb_substr($str, 0, 1)
4213
                );
4214
            } else {
4215
                $str_part_one = self::strtolower(
4216
                    (string) \mb_substr($str, 0, 1),
4217
                    $encoding,
4218
                    false,
4219
                    $lang,
4220 43
                    $try_to_keep_the_string_length
4221
                );
4222
            }
4223
        } else {
4224 3
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
4225
4226 3
            $str_part_two = (string) self::substr($str, 1, null, $encoding);
4227
4228 3
            $str_part_one = self::strtolower(
4229 3
                (string) self::substr($str, 0, 1, $encoding),
4230 3
                $encoding,
4231 3
                false,
4232 3
                $lang,
4233 3
                $try_to_keep_the_string_length
4234
            );
4235
        }
4236
4237 46
        return $str_part_one . $str_part_two;
4238
    }
4239
4240
    /**
4241
     * Lowercase for all words in the string.
4242
     *
4243
     * @param string      $str                           <p>The input string.</p>
4244
     * @param string[]    $exceptions                    [optional] <p>Exclusion for some words.</p>
4245
     * @param string      $char_list                     [optional] <p>Additional chars that contains to words and do
4246
     *                                                   not start a new word.</p>
4247
     * @param string      $encoding                      [optional] <p>Set the charset.</p>
4248
     * @param bool        $clean_utf8                    [optional] <p>Remove non UTF-8 chars from the string.</p>
4249
     * @param string|null $lang                          [optional] <p>Set the language for special cases: az, el, lt,
4250
     *                                                   tr</p>
4251
     * @param bool        $try_to_keep_the_string_length [optional] <p>true === try to keep the string length: e.g. ẞ
4252
     *                                                   -> ß</p>
4253
     *
4254
     * @psalm-pure
4255
     *
4256
     * @return string
4257
     */
4258 4
    public static function lcwords(
4259
        string $str,
4260
        array $exceptions = [],
4261
        string $char_list = '',
4262
        string $encoding = 'UTF-8',
4263
        bool $clean_utf8 = false,
4264
        string $lang = null,
4265
        bool $try_to_keep_the_string_length = false
4266
    ): string {
4267 4
        if (!$str) {
4268 2
            return '';
4269
        }
4270
4271 4
        $words = self::str_to_words($str, $char_list);
4272 4
        $use_exceptions = $exceptions !== [];
4273
4274 4
        $words_str = '';
4275 4
        foreach ($words as &$word) {
4276 4
            if (!$word) {
4277 4
                continue;
4278
            }
4279
4280
            if (
4281 4
                !$use_exceptions
4282
                ||
4283 4
                !\in_array($word, $exceptions, true)
4284
            ) {
4285 4
                $words_str .= self::lcfirst($word, $encoding, $clean_utf8, $lang, $try_to_keep_the_string_length);
4286
            } else {
4287 4
                $words_str .= $word;
4288
            }
4289
        }
4290
4291 4
        return $words_str;
4292
    }
4293
4294
    /**
4295
     * Strip whitespace or other characters from the beginning of a UTF-8 string.
4296
     *
4297
     * EXAMPLE: <code>UTF8::ltrim(' 中文空白  '); // '中文空白  '</code>
4298
     *
4299
     * @param string      $str   <p>The string to be trimmed</p>
4300
     * @param string|null $chars <p>Optional characters to be stripped</p>
4301
     *
4302
     * @psalm-pure
4303
     *
4304
     * @return string the string with unwanted characters stripped from the left
4305
     */
4306 23
    public static function ltrim(string $str = '', string $chars = null): string
4307
    {
4308 23
        if ($str === '') {
4309 3
            return '';
4310
        }
4311
4312 22
        if (self::$SUPPORT['mbstring'] === true) {
4313 22
            if ($chars !== null) {
4314
                /** @noinspection PregQuoteUsageInspection */
4315 11
                $chars = \preg_quote($chars);
4316 11
                $pattern = "^[${chars}]+";
4317
            } else {
4318 14
                $pattern = '^[\\s]+';
4319
            }
4320
4321 22
            return (string) \mb_ereg_replace($pattern, '', $str);
4322
        }
4323
4324
        if ($chars !== null) {
4325
            $chars = \preg_quote($chars, '/');
4326
            $pattern = "^[${chars}]+";
4327
        } else {
4328
            $pattern = '^[\\s]+';
4329
        }
4330
4331
        return self::regex_replace($str, $pattern, '');
4332
    }
4333
4334
    /**
4335
     * Returns the UTF-8 character with the maximum code point in the given data.
4336
     *
4337
     * EXAMPLE: <code>UTF8::max('abc-äöü-中文空白'); // 'ø'</code>
4338
     *
4339
     * @param array<string>|string $arg <p>A UTF-8 encoded string or an array of such strings.</p>
4340
     *
4341
     * @psalm-pure
4342
     *
4343
     * @return string|null the character with the highest code point than others, returns null on failure or empty input
4344
     */
4345
    public static function max($arg)
4346
    {
4347 2
        if (\is_array($arg)) {
4348 2
            $arg = \implode('', $arg);
4349
        }
4350
4351 2
        $codepoints = self::codepoints($arg);
4352 2
        if ($codepoints === []) {
4353 2
            return null;
4354
        }
4355
4356 2
        $codepoint_max = \max($codepoints);
4357
4358 2
        return self::chr((int) $codepoint_max);
4359
    }
4360
4361
    /**
4362
     * Calculates and returns the maximum number of bytes taken by any
4363
     * UTF-8 encoded character in the given string.
4364
     *
4365
     * EXAMPLE: <code>UTF8::max_chr_width('Intërnâtiônàlizætiøn'); // 2</code>
4366
     *
4367
     * @param string $str <p>The original Unicode string.</p>
4368
     *
4369
     * @psalm-pure
4370
     *
4371
     * @return int
4372
     *             <p>Max byte lengths of the given chars.</p>
4373
     */
4374
    public static function max_chr_width(string $str): int
4375
    {
4376 2
        $bytes = self::chr_size_list($str);
4377 2
        if ($bytes !== []) {
4378 2
            return (int) \max($bytes);
4379
        }
4380
4381 2
        return 0;
4382
    }
4383
4384
    /**
4385
     * Checks whether mbstring is available on the server.
4386
     *
4387
     * @psalm-pure
4388
     *
4389
     * @return bool
4390
     *              <p><strong>true</strong> if available, <strong>false</strong> otherwise</p>
4391
     *
4392
     * @internal <p>Please do not use it anymore, we will make is private in next major version.</p>
4393
     */
4394
    public static function mbstring_loaded(): bool
4395
    {
4396 28
        return \extension_loaded('mbstring');
4397
    }
4398
4399
    /**
4400
     * Returns the UTF-8 character with the minimum code point in the given data.
4401
     *
4402
     * EXAMPLE: <code>UTF8::min('abc-äöü-中文空白'); // '-'</code>
4403
     *
4404
     * @param string|string[] $arg <strong>A UTF-8 encoded string or an array of such strings.</strong>
4405
     *
4406
     * @psalm-pure
4407
     *
4408
     * @return string|null
4409
     *                     <p>The character with the lowest code point than others, returns null on failure or empty input.</p>
4410
     */
4411
    public static function min($arg)
4412
    {
4413 2
        if (\is_array($arg)) {
4414 2
            $arg = \implode('', $arg);
4415
        }
4416
4417 2
        $codepoints = self::codepoints($arg);
4418 2
        if ($codepoints === []) {
4419 2
            return null;
4420
        }
4421
4422 2
        $codepoint_min = \min($codepoints);
4423
4424 2
        return self::chr((int) $codepoint_min);
4425
    }
4426
4427
    /**
4428
     * Normalize the encoding-"name" input.
4429
     *
4430
     * EXAMPLE: <code>UTF8::normalize_encoding('UTF8'); // 'UTF-8'</code>
4431
     *
4432
     * @param mixed $encoding <p>e.g.: ISO, UTF8, WINDOWS-1251 etc.</p>
4433
     * @param mixed $fallback <p>e.g.: UTF-8</p>
4434
     *
4435
     * @psalm-pure
4436
     *
4437
     * @return mixed|string
4438
     *                      <p>e.g.: ISO-8859-1, UTF-8, WINDOWS-1251 etc.<br>Will return a empty string as fallback (by default)</p>
4439
     *
4440
     * @template TNormalizeEncodingFallback
4441
     * @phpstan-param string|TNormalizeEncodingFallback $fallback
4442
     * @phpstan-return string|TNormalizeEncodingFallback
4443
     */
4444
    public static function normalize_encoding($encoding, $fallback = '')
4445
    {
4446
        /**
4447
         * @psalm-suppress ImpureStaticVariable
4448
         *
4449
         * @var array<string,string>
4450
         */
4451 339
        static $STATIC_NORMALIZE_ENCODING_CACHE = [];
4452
4453
        // init
4454 339
        $encoding = (string) $encoding;
4455
4456 339
        if (!$encoding) {
4457 290
            return $fallback;
4458
        }
4459
4460
        if (
4461 53
            $encoding === 'UTF-8'
4462
            ||
4463 53
            $encoding === 'UTF8'
4464
        ) {
4465 29
            return 'UTF-8';
4466
        }
4467
4468
        if (
4469 44
            $encoding === '8BIT'
4470
            ||
4471 44
            $encoding === 'BINARY'
4472
        ) {
4473
            return 'CP850';
4474
        }
4475
4476
        if (
4477 44
            $encoding === 'HTML'
4478
            ||
4479 44
            $encoding === 'HTML-ENTITIES'
4480
        ) {
4481 2
            return 'HTML-ENTITIES';
4482
        }
4483
4484
        if (
4485 44
            $encoding === 'ISO'
4486
            ||
4487 44
            $encoding === 'ISO-8859-1'
4488
        ) {
4489 41
            return 'ISO-8859-1';
4490
        }
4491
4492
        if (
4493 11
            $encoding === '1' // only a fallback, for non "strict_types" usage ...
4494
            ||
4495 11
            $encoding === '0' // only a fallback, for non "strict_types" usage ...
4496
        ) {
4497
            return $fallback;
4498
        }
4499
4500 11
        if (isset($STATIC_NORMALIZE_ENCODING_CACHE[$encoding])) {
4501 8
            return $STATIC_NORMALIZE_ENCODING_CACHE[$encoding];
4502
        }
4503
4504 5
        if (self::$ENCODINGS === null) {
4505 1
            self::$ENCODINGS = self::getData('encodings');
4506
        }
4507
4508 5
        if (\in_array($encoding, self::$ENCODINGS, true)) {
4509 3
            $STATIC_NORMALIZE_ENCODING_CACHE[$encoding] = $encoding;
4510
4511 3
            return $encoding;
4512
        }
4513
4514 4
        $encoding_original = $encoding;
4515 4
        $encoding = \strtoupper($encoding);
4516 4
        $encoding_upper_helper = (string) \preg_replace('/[^a-zA-Z0-9]/u', '', $encoding);
4517
4518
        $equivalences = [
4519 4
            'ISO8859'     => 'ISO-8859-1',
4520
            'ISO88591'    => 'ISO-8859-1',
4521
            'ISO'         => 'ISO-8859-1',
4522
            'LATIN'       => 'ISO-8859-1',
4523
            'LATIN1'      => 'ISO-8859-1', // Western European
4524
            'ISO88592'    => 'ISO-8859-2',
4525
            'LATIN2'      => 'ISO-8859-2', // Central European
4526
            'ISO88593'    => 'ISO-8859-3',
4527
            'LATIN3'      => 'ISO-8859-3', // Southern European
4528
            'ISO88594'    => 'ISO-8859-4',
4529
            'LATIN4'      => 'ISO-8859-4', // Northern European
4530
            'ISO88595'    => 'ISO-8859-5',
4531
            'ISO88596'    => 'ISO-8859-6', // Greek
4532
            'ISO88597'    => 'ISO-8859-7',
4533
            'ISO88598'    => 'ISO-8859-8', // Hebrew
4534
            'ISO88599'    => 'ISO-8859-9',
4535
            'LATIN5'      => 'ISO-8859-9', // Turkish
4536
            'ISO885911'   => 'ISO-8859-11',
4537
            'TIS620'      => 'ISO-8859-11', // Thai
4538
            'ISO885910'   => 'ISO-8859-10',
4539
            'LATIN6'      => 'ISO-8859-10', // Nordic
4540
            'ISO885913'   => 'ISO-8859-13',
4541
            'LATIN7'      => 'ISO-8859-13', // Baltic
4542
            'ISO885914'   => 'ISO-8859-14',
4543
            'LATIN8'      => 'ISO-8859-14', // Celtic
4544
            'ISO885915'   => 'ISO-8859-15',
4545
            'LATIN9'      => 'ISO-8859-15', // Western European (with some extra chars e.g. €)
4546
            'ISO885916'   => 'ISO-8859-16',
4547
            'LATIN10'     => 'ISO-8859-16', // Southeast European
4548
            'CP1250'      => 'WINDOWS-1250',
4549
            'WIN1250'     => 'WINDOWS-1250',
4550
            'WINDOWS1250' => 'WINDOWS-1250',
4551
            'CP1251'      => 'WINDOWS-1251',
4552
            'WIN1251'     => 'WINDOWS-1251',
4553
            'WINDOWS1251' => 'WINDOWS-1251',
4554
            'CP1252'      => 'WINDOWS-1252',
4555
            'WIN1252'     => 'WINDOWS-1252',
4556
            'WINDOWS1252' => 'WINDOWS-1252',
4557
            'CP1253'      => 'WINDOWS-1253',
4558
            'WIN1253'     => 'WINDOWS-1253',
4559
            'WINDOWS1253' => 'WINDOWS-1253',
4560
            'CP1254'      => 'WINDOWS-1254',
4561
            'WIN1254'     => 'WINDOWS-1254',
4562
            'WINDOWS1254' => 'WINDOWS-1254',
4563
            'CP1255'      => 'WINDOWS-1255',
4564
            'WIN1255'     => 'WINDOWS-1255',
4565
            'WINDOWS1255' => 'WINDOWS-1255',
4566
            'CP1256'      => 'WINDOWS-1256',
4567
            'WIN1256'     => 'WINDOWS-1256',
4568
            'WINDOWS1256' => 'WINDOWS-1256',
4569
            'CP1257'      => 'WINDOWS-1257',
4570
            'WIN1257'     => 'WINDOWS-1257',
4571
            'WINDOWS1257' => 'WINDOWS-1257',
4572
            'CP1258'      => 'WINDOWS-1258',
4573
            'WIN1258'     => 'WINDOWS-1258',
4574
            'WINDOWS1258' => 'WINDOWS-1258',
4575
            'UTF16'       => 'UTF-16',
4576
            'UTF32'       => 'UTF-32',
4577
            'UTF8'        => 'UTF-8',
4578
            'UTF'         => 'UTF-8',
4579
            'UTF7'        => 'UTF-7',
4580
            '8BIT'        => 'CP850',
4581
            'BINARY'      => 'CP850',
4582
        ];
4583
4584 4
        if (!empty($equivalences[$encoding_upper_helper])) {
4585 3
            $encoding = $equivalences[$encoding_upper_helper];
4586
        }
4587
4588 4
        $STATIC_NORMALIZE_ENCODING_CACHE[$encoding_original] = $encoding;
4589
4590 4
        return $encoding;
4591
    }
4592
4593
    /**
4594
     * Standardize line ending to unix-like.
4595
     *
4596
     * @param string          $str      <p>The input string.</p>
4597
     * @param string|string[] $replacer <p>The replacer char e.g. "\n" (Linux) or "\r\n" (Windows). You can also use \PHP_EOL
4598
     *                                  here.</p>
4599
     *
4600
     * @psalm-pure
4601
     *
4602
     * @return string
4603
     *                <p>A string with normalized line ending.</p>
4604
     */
4605
    public static function normalize_line_ending(string $str, $replacer = "\n"): string
4606
    {
4607 4
        return \str_replace(["\r\n", "\r", "\n"], $replacer, $str);
4608
    }
4609
4610
    /**
4611
     * Normalize some MS Word special characters.
4612
     *
4613
     * EXAMPLE: <code>UTF8::normalize_msword('„Abcdef…”'); // '"Abcdef..."'</code>
4614
     *
4615
     * @param string $str <p>The string to be normalized.</p>
4616
     *
4617
     * @psalm-pure
4618
     *
4619
     * @return string
4620
     *                <p>A string with normalized characters for commonly used chars in Word documents.</p>
4621
     */
4622
    public static function normalize_msword(string $str): string
4623
    {
4624 10
        return ASCII::normalize_msword($str);
4625
    }
4626
4627
    /**
4628
     * Normalize the whitespace.
4629
     *
4630
     * EXAMPLE: <code>UTF8::normalize_whitespace("abc-\xc2\xa0-öäü-\xe2\x80\xaf-\xE2\x80\xAC", true); // "abc-\xc2\xa0-öäü- -"</code>
4631
     *
4632
     * @param string $str                          <p>The string to be normalized.</p>
4633
     * @param bool   $keep_non_breaking_space      [optional] <p>Set to true, to keep non-breaking-spaces.</p>
4634
     * @param bool   $keep_bidi_unicode_controls   [optional] <p>Set to true, to keep non-printable (for the web)
4635
     *                                             bidirectional text chars.</p>
4636
     * @param bool   $normalize_control_characters [optional] <p>Set to true, to convert e.g. LINE-, PARAGRAPH-SEPARATOR with "\n" and LINE TABULATION with "\t".</p>
4637
     *
4638
     * @psalm-pure
4639
     *
4640
     * @return string
4641
     *                <p>A string with normalized whitespace.</p>
4642
     */
4643
    public static function normalize_whitespace(
4644
        string $str,
4645
        bool $keep_non_breaking_space = false,
4646
        bool $keep_bidi_unicode_controls = false,
4647
        bool $normalize_control_characters = false
4648
    ): string {
4649 61
        return ASCII::normalize_whitespace(
4650 61
            $str,
4651 61
            $keep_non_breaking_space,
4652 61
            $keep_bidi_unicode_controls,
4653 61
            $normalize_control_characters
4654
        );
4655
    }
4656
4657
    /**
4658
     * Calculates Unicode code point of the given UTF-8 encoded character.
4659
     *
4660
     * INFO: opposite to UTF8::chr()
4661
     *
4662
     * EXAMPLE: <code>UTF8::ord('☃'); // 0x2603</code>
4663
     *
4664
     * @param string $chr      <p>The character of which to calculate code point.<p/>
4665
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
4666
     *
4667
     * @psalm-pure
4668
     *
4669
     * @return int
4670
     *             <p>Unicode code point of the given character,<br>
4671
     *             0 on invalid UTF-8 byte sequence</p>
4672
     */
4673
    public static function ord($chr, string $encoding = 'UTF-8'): int
4674
    {
4675
        /**
4676
         * @psalm-suppress ImpureStaticVariable
4677
         *
4678
         * @var array<string,int>
4679
         */
4680 27
        static $CHAR_CACHE = [];
4681
4682
        // init
4683 27
        $chr = (string) $chr;
4684
4685 27
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
4686 5
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
4687
        }
4688
4689 27
        $cache_key = $chr . '_' . $encoding;
4690 27
        if (isset($CHAR_CACHE[$cache_key])) {
4691 27
            return $CHAR_CACHE[$cache_key];
4692
        }
4693
4694
        // check again, if it's still not UTF-8
4695 11
        if ($encoding !== 'UTF-8') {
4696 3
            $chr = self::encode($encoding, $chr);
4697
        }
4698
4699 11
        if (self::$ORD === null) {
4700 1
            self::$ORD = self::getData('ord');
4701
        }
4702
4703 11
        if (isset(self::$ORD[$chr])) {
4704 11
            return $CHAR_CACHE[$cache_key] = self::$ORD[$chr];
4705
        }
4706
4707
        //
4708
        // fallback via "IntlChar"
4709
        //
4710
4711 6
        if (self::$SUPPORT['intlChar'] === true) {
4712 5
            $code = \IntlChar::ord($chr);
4713 5
            if ($code) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $code of type integer|null is loosely compared to true; this is ambiguous if the integer can be 0. You might want to explicitly use !== null instead.

In PHP, under loose comparison (like ==, or !=, or switch conditions), values of different types might be equal.

For integer values, zero is a special case, in particular the following results might be unexpected:

0   == false // true
0   == null  // true
123 == false // false
123 == null  // false

// It is often better to use strict comparison
0 === false // false
0 === null  // false
Loading history...
4714 5
                return $CHAR_CACHE[$cache_key] = $code;
4715
            }
4716
        }
4717
4718
        //
4719
        // fallback via vanilla php
4720
        //
4721
4722 1
        $chr = \unpack('C*', (string) \substr($chr, 0, 4));
4723
        /** @noinspection PhpSillyAssignmentInspection - hack for phpstan */
4724
        /** @var int[] $chr - "unpack": only false if the format string contains errors */
4725 1
        $chr = $chr;
4726 1
        $code = $chr ? $chr[1] : 0;
4727
4728 1
        if ($code >= 0xF0 && isset($chr[4])) {
4729
            return $CHAR_CACHE[$cache_key] = ((($code - 0xF0) << 18) + (($chr[2] - 0x80) << 12) + (($chr[3] - 0x80) << 6) + $chr[4] - 0x80);
4730
        }
4731
4732 1
        if ($code >= 0xE0 && isset($chr[3])) {
4733 1
            return $CHAR_CACHE[$cache_key] = ((($code - 0xE0) << 12) + (($chr[2] - 0x80) << 6) + $chr[3] - 0x80);
4734
        }
4735
4736 1
        if ($code >= 0xC0 && isset($chr[2])) {
4737 1
            return $CHAR_CACHE[$cache_key] = ((($code - 0xC0) << 6) + $chr[2] - 0x80);
4738
        }
4739
4740
        return $CHAR_CACHE[$cache_key] = $code;
4741
    }
4742
4743
    /**
4744
     * Parses the string into an array (into the the second parameter).
4745
     *
4746
     * WARNING: Unlike "parse_str()", this method does not (re-)place variables in the current scope,
4747
     *          if the second parameter is not set!
4748
     *
4749
     * EXAMPLE: <code>
4750
     * UTF8::parse_str('Iñtërnâtiônéàlizætiøn=測試&arr[]=foo+測試&arr[]=ການທົດສອບ', $array);
4751
     * echo $array['Iñtërnâtiônéàlizætiøn']; // '測試'
4752
     * </code>
4753
     *
4754
     * @see http://php.net/manual/en/function.parse-str.php
4755
     *
4756
     * @param string $str        <p>The input string.</p>
4757
     * @param array  $result     <p>The result will be returned into this reference parameter.</p>
4758
     * @param bool   $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
4759
     *
4760
     * @psalm-pure
4761
     *
4762
     * @return bool
4763
     *              <p>Will return <strong>false</strong> if php can't parse the string and we haven't any $result.</p>
4764
     */
4765
    public static function parse_str(string $str, &$result, bool $clean_utf8 = false): bool
4766
    {
4767 2
        if ($clean_utf8) {
4768 2
            $str = self::clean($str);
4769
        }
4770
4771 2
        if (self::$SUPPORT['mbstring'] === true) {
4772 2
            $return = \mb_parse_str($str, $result);
4773
4774 2
            return $return !== false && $result !== [];
4775
        }
4776
4777
        /**
4778
         * @psalm-suppress ImpureFunctionCall - we use the second parameter, so we don't change variables by magic
4779
         */
4780
        \parse_str($str, $result);
4781
4782
        return $result !== [];
4783
    }
4784
4785
    /**
4786
     * Checks if \u modifier is available that enables Unicode support in PCRE.
4787
     *
4788
     * @psalm-pure
4789
     *
4790
     * @return bool
4791
     *              <p>
4792
     *              <strong>true</strong> if support is available,<br>
4793
     *              <strong>false</strong> otherwise
4794
     *              </p>
4795
     */
4796
    public static function pcre_utf8_support(): bool
4797
    {
4798
        /** @noinspection PhpUsageOfSilenceOperatorInspection */
4799
        return (bool) @\preg_match('//u', '');
4800
    }
4801
4802
    /**
4803
     * Create an array containing a range of UTF-8 characters.
4804
     *
4805
     * EXAMPLE: <code>UTF8::range('κ', 'ζ'); // array('κ', 'ι', 'θ', 'η', 'ζ',)</code>
4806
     *
4807
     * @param int|string $var1      <p>Numeric or hexadecimal code points, or a UTF-8 character to start from.</p>
4808
     * @param int|string $var2      <p>Numeric or hexadecimal code points, or a UTF-8 character to end at.</p>
4809
     * @param bool       $use_ctype <p>use ctype to detect numeric and hexadecimal, otherwise we will use a simple
4810
     *                              "is_numeric"</p>
4811
     * @param string     $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
4812
     * @param float|int  $step      [optional] <p>
4813
     *                              If a step value is given, it will be used as the
4814
     *                              increment between elements in the sequence. step
4815
     *                              should be given as a positive number. If not specified,
4816
     *                              step will default to 1.
4817
     *                              </p>
4818
     *
4819
     * @psalm-pure
4820
     *
4821
     * @return string[]
4822
     */
4823
    public static function range(
4824
        $var1,
4825
        $var2,
4826
        bool $use_ctype = true,
4827
        string $encoding = 'UTF-8',
4828
        $step = 1
4829
    ): array {
4830 2
        if (!$var1 || !$var2) {
4831 2
            return [];
4832
        }
4833
4834 2
        if ($step !== 1) {
4835
            /**
4836
             * @psalm-suppress RedundantConditionGivenDocblockType
4837
             * @psalm-suppress DocblockTypeContradiction
4838
             */
4839 1
            if (!\is_numeric($step)) {
0 ignored issues
show
introduced by
The condition is_numeric($step) is always true.
Loading history...
4840
                throw new \InvalidArgumentException('$step need to be a number, type given: ' . \gettype($step));
4841
            }
4842
4843
            /**
4844
             * @psalm-suppress RedundantConditionGivenDocblockType - false-positive from psalm?
4845
             */
4846 1
            if ($step <= 0) {
4847
                throw new \InvalidArgumentException('$step need to be a positive number, given: ' . $step);
4848
            }
4849
        }
4850
4851 2
        if ($use_ctype && self::$SUPPORT['ctype'] === false) {
4852
            throw new \RuntimeException('ext-ctype: is not installed');
4853
        }
4854
4855 2
        $is_digit = false;
4856 2
        $is_xdigit = false;
4857
4858 2
        if ($use_ctype && \ctype_digit((string) $var1) && \ctype_digit((string) $var2)) {
4859 2
            $is_digit = true;
4860 2
            $start = (int) $var1;
4861 2
        } elseif ($use_ctype && \ctype_xdigit($var1) && \ctype_xdigit($var2)) {
4862
            $is_xdigit = true;
4863
            $start = (int) self::hex_to_int((string) $var1);
4864 2
        } elseif (!$use_ctype && \is_numeric($var1)) {
4865 1
            $start = (int) $var1;
4866
        } else {
4867 2
            $start = self::ord((string) $var1);
4868
        }
4869
4870 2
        if (!$start) {
4871
            return [];
4872
        }
4873
4874 2
        if ($is_digit) {
4875 2
            $end = (int) $var2;
4876 2
        } elseif ($is_xdigit) {
4877
            $end = (int) self::hex_to_int((string) $var2);
4878 2
        } elseif (!$use_ctype && \is_numeric($var2)) {
4879 1
            $end = (int) $var2;
4880
        } else {
4881 2
            $end = self::ord((string) $var2);
4882
        }
4883
4884 2
        if (!$end) {
4885
            return [];
4886
        }
4887
4888 2
        $array = [];
4889 2
        foreach (\range($start, $end, $step) as $i) {
4890 2
            $array[] = (string) self::chr((int) $i, $encoding);
4891
        }
4892
4893 2
        return $array;
4894
    }
4895
4896
    /**
4897
     * Multi decode HTML entity + fix urlencoded-win1252-chars.
4898
     *
4899
     * EXAMPLE: <code>UTF8::rawurldecode('tes%20öäü%20\u00edtest+test'); // 'tes öäü ítest+test'</code>
4900
     *
4901
     * e.g:
4902
     * 'test+test'                     => 'test+test'
4903
     * 'D&#252;sseldorf'               => 'Düsseldorf'
4904
     * 'D%FCsseldorf'                  => 'Düsseldorf'
4905
     * 'D&#xFC;sseldorf'               => 'Düsseldorf'
4906
     * 'D%26%23xFC%3Bsseldorf'         => 'Düsseldorf'
4907
     * 'Düsseldorf'                   => 'Düsseldorf'
4908
     * 'D%C3%BCsseldorf'               => 'Düsseldorf'
4909
     * 'D%C3%83%C2%BCsseldorf'         => 'Düsseldorf'
4910
     * 'D%25C3%2583%25C2%25BCsseldorf' => 'Düsseldorf'
4911
     *
4912
     * @param string $str          <p>The input string.</p>
4913
     * @param bool   $multi_decode <p>Decode as often as possible.</p>
4914
     *
4915
     * @psalm-pure
4916
     *
4917
     * @return string
4918
     *                <p>The decoded URL, as a string.</p>
4919
     */
4920
    public static function rawurldecode(string $str, bool $multi_decode = true): string
4921
    {
4922 6
        if ($str === '') {
4923 4
            return '';
4924
        }
4925
4926 6
        $str = self::urldecode_unicode_helper($str);
4927
4928 6
        if ($multi_decode) {
4929
            do {
4930 5
                $str_compare = $str;
4931
4932
                /**
4933
                 * @psalm-suppress PossiblyInvalidArgument
4934
                 */
4935 5
                $str = \rawurldecode(
4936 5
                    self::html_entity_decode(
4937 5
                        self::to_utf8($str),
4938 5
                        \ENT_QUOTES | \ENT_HTML5
4939
                    )
4940
                );
4941 5
            } while ($str_compare !== $str);
4942
        } else {
4943
            /**
4944
             * @psalm-suppress PossiblyInvalidArgument
4945
             */
4946 1
            $str = \rawurldecode(
4947 1
                self::html_entity_decode(
4948 1
                    self::to_utf8($str),
4949 1
                    \ENT_QUOTES | \ENT_HTML5
4950
                )
4951
            );
4952
        }
4953
4954 6
        return self::fix_simple_utf8($str);
4955
    }
4956
4957
    /**
4958
     * Replaces all occurrences of $pattern in $str by $replacement.
4959
     *
4960
     * @param string $str         <p>The input string.</p>
4961
     * @param string $pattern     <p>The regular expression pattern.</p>
4962
     * @param string $replacement <p>The string to replace with.</p>
4963
     * @param string $options     [optional] <p>Matching conditions to be used.</p>
4964
     * @param string $delimiter   [optional] <p>Delimiter the the regex. Default: '/'</p>
4965
     *
4966
     * @psalm-pure
4967
     *
4968
     * @return string
4969
     */
4970
    public static function regex_replace(
4971
        string $str,
4972
        string $pattern,
4973
        string $replacement,
4974
        string $options = '',
4975
        string $delimiter = '/'
4976
    ): string {
4977 18
        if ($options === 'msr') {
4978 9
            $options = 'ms';
4979
        }
4980
4981
        // fallback
4982 18
        if (!$delimiter) {
4983
            $delimiter = '/';
4984
        }
4985
4986 18
        return (string) \preg_replace(
4987 18
            $delimiter . $pattern . $delimiter . 'u' . $options,
4988 18
            $replacement,
4989 18
            $str
4990
        );
4991
    }
4992
4993
    /**
4994
     * Remove the BOM from UTF-8 / UTF-16 / UTF-32 strings.
4995
     *
4996
     * EXAMPLE: <code>UTF8::remove_bom("\xEF\xBB\xBFΜπορώ να"); // 'Μπορώ να'</code>
4997
     *
4998
     * @param string $str <p>The input string.</p>
4999
     *
5000
     * @psalm-pure
5001
     *
5002
     * @return string
5003
     *                <p>A string without UTF-BOM.</p>
5004
     */
5005
    public static function remove_bom(string $str): string
5006
    {
5007 54
        if ($str === '') {
5008 9
            return '';
5009
        }
5010
5011 54
        $str_length = \strlen($str);
5012 54
        foreach (self::$BOM as $bom_string => $bom_byte_length) {
5013 54
            if (\strncmp($str, $bom_string, $bom_byte_length) === 0) {
5014
                /** @var false|string $str_tmp - needed for PhpStan (stubs error) */
5015 11
                $str_tmp = \substr($str, $bom_byte_length, $str_length);
5016 11
                if ($str_tmp === false) {
5017
                    return '';
5018
                }
5019
5020 11
                $str_length -= $bom_byte_length;
5021
5022 54
                $str = (string) $str_tmp;
5023
            }
5024
        }
5025
5026 54
        return $str;
5027
    }
5028
5029
    /**
5030
     * Removes duplicate occurrences of a string in another string.
5031
     *
5032
     * EXAMPLE: <code>UTF8::remove_duplicates('öäü-κόσμεκόσμε-äöü', 'κόσμε'); // 'öäü-κόσμε-äöü'</code>
5033
     *
5034
     * @param string          $str  <p>The base string.</p>
5035
     * @param string|string[] $what <p>String to search for in the base string.</p>
5036
     *
5037
     * @psalm-pure
5038
     *
5039
     * @return string
5040
     *                <p>A string with removed duplicates.</p>
5041
     */
5042
    public static function remove_duplicates(string $str, $what = ' '): string
5043
    {
5044 2
        if (\is_string($what)) {
5045 2
            $what = [$what];
5046
        }
5047
5048
        /**
5049
         * @psalm-suppress RedundantConditionGivenDocblockType
5050
         */
5051 2
        if (\is_array($what)) {
0 ignored issues
show
introduced by
The condition is_array($what) is always true.
Loading history...
5052 2
            foreach ($what as $item) {
5053 2
                $str = (string) \preg_replace('/(' . \preg_quote($item, '/') . ')+/u', $item, $str);
5054
            }
5055
        }
5056
5057 2
        return $str;
5058
    }
5059
5060
    /**
5061
     * Remove html via "strip_tags()" from the string.
5062
     *
5063
     * @param string $str            <p>The input string.</p>
5064
     * @param string $allowable_tags [optional] <p>You can use the optional second parameter to specify tags which
5065
     *                               should not be stripped. Default: null
5066
     *                               </p>
5067
     *
5068
     * @psalm-pure
5069
     *
5070
     * @return string
5071
     *                <p>A string with without html tags.</p>
5072
     */
5073
    public static function remove_html(string $str, string $allowable_tags = ''): string
5074
    {
5075 6
        return \strip_tags($str, $allowable_tags);
5076
    }
5077
5078
    /**
5079
     * Remove all breaks [<br> | \r\n | \r | \n | ...] from the string.
5080
     *
5081
     * @param string $str         <p>The input string.</p>
5082
     * @param string $replacement [optional] <p>Default is a empty string.</p>
5083
     *
5084
     * @psalm-pure
5085
     *
5086
     * @return string
5087
     *                <p>A string without breaks.</p>
5088
     */
5089
    public static function remove_html_breaks(string $str, string $replacement = ''): string
5090
    {
5091 6
        return (string) \preg_replace("#/\r\n|\r|\n|<br.*/?>#isU", $replacement, $str);
5092
    }
5093
5094
    /**
5095
     * Remove invisible characters from a string.
5096
     *
5097
     * e.g.: This prevents sandwiching null characters between ascii characters, like Java\0script.
5098
     *
5099
     * EXAMPLE: <code>UTF8::remove_invisible_characters("κόσ\0με"); // 'κόσμε'</code>
5100
     *
5101
     * copy&past from https://github.com/bcit-ci/CodeIgniter/blob/develop/system/core/Common.php
5102
     *
5103
     * @param string $str                           <p>The input string.</p>
5104
     * @param bool   $url_encoded                   [optional] <p>
5105
     *                                              Try to remove url encoded control character.
5106
     *                                              WARNING: maybe contains false-positives e.g. aa%0Baa -> aaaa.
5107
     *                                              <br>
5108
     *                                              Default: false
5109
     *                                              </p>
5110
     * @param string $replacement                   [optional] <p>The replacement character.</p>
5111
     * @param bool   $keep_basic_control_characters [optional] <p>Keep control characters like [LRM] or [LSEP].</p>
5112
     *
5113
     * @psalm-pure
5114
     *
5115
     * @return string
5116
     *                <p>A string without invisible chars.</p>
5117
     */
5118
    public static function remove_invisible_characters(
5119
        string $str,
5120
        bool $url_encoded = false,
5121
        string $replacement = '',
5122
        bool $keep_basic_control_characters = true
5123
    ): string {
5124 92
        return ASCII::remove_invisible_characters(
5125 92
            $str,
5126 92
            $url_encoded,
5127 92
            $replacement,
5128 92
            $keep_basic_control_characters
5129
        );
5130
    }
5131
5132
    /**
5133
     * Returns a new string with the prefix $substring removed, if present.
5134
     *
5135
     * @param string $str       <p>The input string.</p>
5136
     * @param string $substring <p>The prefix to remove.</p>
5137
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
5138
     *
5139
     * @psalm-pure
5140
     *
5141
     * @return string
5142
     *                <p>A string without the prefix $substring.</p>
5143
     */
5144
    public static function remove_left(
5145
        string $str,
5146
        string $substring,
5147
        string $encoding = 'UTF-8'
5148
    ): string {
5149
        if (
5150 12
            $substring
5151
            &&
5152 12
            \strpos($str, $substring) === 0
5153
        ) {
5154 6
            if ($encoding === 'UTF-8') {
5155 4
                return (string) \mb_substr(
5156 4
                    $str,
5157 4
                    (int) \mb_strlen($substring)
5158
                );
5159
            }
5160
5161 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
5162
5163 2
            return (string) self::substr(
5164 2
                $str,
5165 2
                (int) self::strlen($substring, $encoding),
5166 2
                null,
5167 2
                $encoding
5168
            );
5169
        }
5170
5171 6
        return $str;
5172
    }
5173
5174
    /**
5175
     * Returns a new string with the suffix $substring removed, if present.
5176
     *
5177
     * @param string $str
5178
     * @param string $substring <p>The suffix to remove.</p>
5179
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
5180
     *
5181
     * @psalm-pure
5182
     *
5183
     * @return string
5184
     *                <p>A string having a $str without the suffix $substring.</p>
5185
     */
5186
    public static function remove_right(
5187
        string $str,
5188
        string $substring,
5189
        string $encoding = 'UTF-8'
5190
    ): string {
5191 12
        if ($substring && \substr($str, -\strlen($substring)) === $substring) {
5192 6
            if ($encoding === 'UTF-8') {
5193 4
                return (string) \mb_substr(
5194 4
                    $str,
5195 4
                    0,
5196 4
                    (int) \mb_strlen($str) - (int) \mb_strlen($substring)
5197
                );
5198
            }
5199
5200 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
5201
5202 2
            return (string) self::substr(
5203 2
                $str,
5204 2
                0,
5205 2
                (int) self::strlen($str, $encoding) - (int) self::strlen($substring, $encoding),
5206 2
                $encoding
5207
            );
5208
        }
5209
5210 6
        return $str;
5211
    }
5212
5213
    /**
5214
     * Replaces all occurrences of $search in $str by $replacement.
5215
     *
5216
     * @param string $str            <p>The input string.</p>
5217
     * @param string $search         <p>The needle to search for.</p>
5218
     * @param string $replacement    <p>The string to replace with.</p>
5219
     * @param bool   $case_sensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
5220
     *
5221
     * @psalm-pure
5222
     *
5223
     * @return string
5224
     *                <p>A string with replaced parts.</p>
5225
     */
5226
    public static function replace(
5227
        string $str,
5228
        string $search,
5229
        string $replacement,
5230
        bool $case_sensitive = true
5231
    ): string {
5232 29
        if ($case_sensitive) {
5233 22
            return \str_replace($search, $replacement, $str);
5234
        }
5235
5236 7
        return self::str_ireplace($search, $replacement, $str);
0 ignored issues
show
Bug Best Practice introduced by
The expression return self::str_ireplac...ch, $replacement, $str) could return the type string[] which is incompatible with the type-hinted return string. Consider adding an additional type-check to rule them out.
Loading history...
5237
    }
5238
5239
    /**
5240
     * Replaces all occurrences of $search in $str by $replacement.
5241
     *
5242
     * @param string       $str            <p>The input string.</p>
5243
     * @param array        $search         <p>The elements to search for.</p>
5244
     * @param array|string $replacement    <p>The string to replace with.</p>
5245
     * @param bool         $case_sensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
5246
     *
5247
     * @psalm-pure
5248
     *
5249
     * @return string
5250
     *                <p>A string with replaced parts.</p>
5251
     */
5252
    public static function replace_all(
5253
        string $str,
5254
        array $search,
5255
        $replacement,
5256
        bool $case_sensitive = true
5257
    ): string {
5258 30
        if ($case_sensitive) {
5259 23
            return \str_replace($search, $replacement, $str);
5260
        }
5261
5262 7
        return self::str_ireplace($search, $replacement, $str);
0 ignored issues
show
Bug Best Practice introduced by
The expression return self::str_ireplac...ch, $replacement, $str) could return the type string[] which is incompatible with the type-hinted return string. Consider adding an additional type-check to rule them out.
Loading history...
5263
    }
5264
5265
    /**
5266
     * Replace the diamond question mark (�) and invalid-UTF8 chars with the replacement.
5267
     *
5268
     * EXAMPLE: <code>UTF8::replace_diamond_question_mark('中文空白�', ''); // '中文空白'</code>
5269
     *
5270
     * @param string $str                        <p>The input string</p>
5271
     * @param string $replacement_char           <p>The replacement character.</p>
5272
     * @param bool   $process_invalid_utf8_chars <p>Convert invalid UTF-8 chars </p>
5273
     *
5274
     * @psalm-pure
5275
     *
5276
     * @return string
5277
     *                <p>A string without diamond question marks (�).</p>
5278
     */
5279
    public static function replace_diamond_question_mark(
5280
        string $str,
5281
        string $replacement_char = '',
5282
        bool $process_invalid_utf8_chars = true
5283
    ): string {
5284 35
        if ($str === '') {
5285 9
            return '';
5286
        }
5287
5288 35
        if ($process_invalid_utf8_chars) {
5289 35
            if ($replacement_char === '') {
5290 35
                $replacement_char_helper = 'none';
5291
            } else {
5292 2
                $replacement_char_helper = \ord($replacement_char);
5293
            }
5294
5295 35
            if (self::$SUPPORT['mbstring'] === false) {
5296
                // if there is no native support for "mbstring",
5297
                // then we need to clean the string before ...
5298
                $str = self::clean($str);
5299
            }
5300
5301
            /**
5302
             * @psalm-suppress ImpureFunctionCall - we will reset the value in the next step
5303
             */
5304 35
            $save = \mb_substitute_character();
5305
            /** @noinspection PhpUsageOfSilenceOperatorInspection - ignore "Unknown character" warnings, it's working anyway */
5306 35
            @\mb_substitute_character($replacement_char_helper);
0 ignored issues
show
Security Best Practice introduced by
It seems like you do not handle an error condition for mb_substitute_character(). This can introduce security issues, and is generally not recommended. ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-unhandled  annotation

5306
            /** @scrutinizer ignore-unhandled */ @\mb_substitute_character($replacement_char_helper);

If you suppress an error, we recommend checking for the error condition explicitly:

// For example instead of
@mkdir($dir);

// Better use
if (@mkdir($dir) === false) {
    throw new \RuntimeException('The directory '.$dir.' could not be created.');
}
Loading history...
5307
            // the polyfill maybe return false, so cast to string
5308 35
            $str = (string) \mb_convert_encoding($str, 'UTF-8', 'UTF-8');
5309 35
            \mb_substitute_character($save);
0 ignored issues
show
Bug introduced by
It seems like $save can also be of type true; however, parameter $substitute_character of mb_substitute_character() does only seem to accept integer|null|string, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

5309
            \mb_substitute_character(/** @scrutinizer ignore-type */ $save);
Loading history...
5310
        }
5311
5312 35
        return \str_replace(
5313
            [
5314 35
                "\xEF\xBF\xBD",
5315
                '�',
5316
            ],
5317
            [
5318 35
                $replacement_char,
5319 35
                $replacement_char,
5320
            ],
5321 35
            $str
5322
        );
5323
    }
5324
5325
    /**
5326
     * Strip whitespace or other characters from the end of a UTF-8 string.
5327
     *
5328
     * EXAMPLE: <code>UTF8::rtrim('-ABC-中文空白-  '); // '-ABC-中文空白-'</code>
5329
     *
5330
     * @param string      $str   <p>The string to be trimmed.</p>
5331
     * @param string|null $chars <p>Optional characters to be stripped.</p>
5332
     *
5333
     * @psalm-pure
5334
     *
5335
     * @return string
5336
     *                <p>A string with unwanted characters stripped from the right.</p>
5337
     */
5338
    public static function rtrim(string $str = '', string $chars = null): string
5339
    {
5340 21
        if ($str === '') {
5341 3
            return '';
5342
        }
5343
5344 20
        if (self::$SUPPORT['mbstring'] === true) {
5345 20
            if ($chars !== null) {
5346
                /** @noinspection PregQuoteUsageInspection */
5347 9
                $chars = \preg_quote($chars);
5348 9
                $pattern = "[${chars}]+$";
5349
            } else {
5350 14
                $pattern = '[\\s]+$';
5351
            }
5352
5353 20
            return (string) \mb_ereg_replace($pattern, '', $str);
5354
        }
5355
5356
        if ($chars !== null) {
5357
            $chars = \preg_quote($chars, '/');
5358
            $pattern = "[${chars}]+$";
5359
        } else {
5360
            $pattern = '[\\s]+$';
5361
        }
5362
5363
        return self::regex_replace($str, $pattern, '');
5364
    }
5365
5366
    /**
5367
     * WARNING: Print native UTF-8 support (libs) by default, e.g. for debugging.
5368
     *
5369
     * @param bool $useEcho
5370
     *
5371
     * @psalm-pure
5372
     *
5373
     * @return string|void
5374
     */
5375
    public static function showSupport(bool $useEcho = true)
5376
    {
5377
        // init
5378 2
        $html = '';
5379
5380 2
        $html .= '<pre>';
5381 2
        foreach (self::$SUPPORT as $key => &$value) {
5382 2
            $html .= $key . ' - ' . \print_r($value, true) . "\n<br>";
0 ignored issues
show
Bug introduced by
Are you sure print_r($value, true) of type string|true can be used in concatenation? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

5382
            $html .= $key . ' - ' . /** @scrutinizer ignore-type */ \print_r($value, true) . "\n<br>";
Loading history...
5383
        }
5384 2
        $html .= '</pre>';
5385
5386 2
        if ($useEcho) {
5387 1
            echo $html;
5388
        }
5389
5390 2
        return $html;
5391
    }
5392
5393
    /**
5394
     * Converts a UTF-8 character to HTML Numbered Entity like "&#123;".
5395
     *
5396
     * EXAMPLE: <code>UTF8::single_chr_html_encode('κ'); // '&#954;'</code>
5397
     *
5398
     * @param string $char             <p>The Unicode character to be encoded as numbered entity.</p>
5399
     * @param bool   $keep_ascii_chars <p>Set to <strong>true</strong> to keep ASCII chars.</>
5400
     * @param string $encoding         [optional] <p>Set the charset for e.g. "mb_" function</p>
5401
     *
5402
     * @psalm-pure
5403
     *
5404
     * @return string
5405
     *                <p>The HTML numbered entity for the given character.</p>
5406
     */
5407
    public static function single_chr_html_encode(
5408
        string $char,
5409
        bool $keep_ascii_chars = false,
5410
        string $encoding = 'UTF-8'
5411
    ): string {
5412 2
        if ($char === '') {
5413 2
            return '';
5414
        }
5415
5416
        if (
5417 2
            $keep_ascii_chars
5418
            &&
5419 2
            ASCII::is_ascii($char)
5420
        ) {
5421 2
            return $char;
5422
        }
5423
5424 2
        return '&#' . self::ord($char, $encoding) . ';';
5425
    }
5426
5427
    /**
5428
     * @param string $str
5429
     * @param int    $tab_length
5430
     *
5431
     * @psalm-pure
5432
     *
5433
     * @return string
5434
     */
5435
    public static function spaces_to_tabs(string $str, int $tab_length = 4): string
5436
    {
5437 5
        if ($tab_length === 4) {
5438 3
            $tab = '    ';
5439 2
        } elseif ($tab_length === 2) {
5440 1
            $tab = '  ';
5441
        } else {
5442 1
            $tab = \str_repeat(' ', $tab_length);
5443
        }
5444
5445 5
        return \str_replace($tab, "\t", $str);
5446
    }
5447
5448
    /**
5449
     * Returns a camelCase version of the string. Trims surrounding spaces,
5450
     * capitalizes letters following digits, spaces, dashes and underscores,
5451
     * and removes spaces, dashes, as well as underscores.
5452
     *
5453
     * @param string      $str                           <p>The input string.</p>
5454
     * @param string      $encoding                      [optional] <p>Default: 'UTF-8'</p>
5455
     * @param bool        $clean_utf8                    [optional] <p>Remove non UTF-8 chars from the string.</p>
5456
     * @param string|null $lang                          [optional] <p>Set the language for special cases: az, el, lt,
5457
     *                                                   tr</p>
5458
     * @param bool        $try_to_keep_the_string_length [optional] <p>true === try to keep the string length: e.g. ẞ
5459
     *                                                   -> ß</p>
5460
     *
5461
     * @psalm-pure
5462
     *
5463
     * @return string
5464
     */
5465
    public static function str_camelize(
5466
        string $str,
5467
        string $encoding = 'UTF-8',
5468
        bool $clean_utf8 = false,
5469
        string $lang = null,
5470
        bool $try_to_keep_the_string_length = false
5471
    ): string {
5472 32
        if ($clean_utf8) {
5473
            $str = self::clean($str);
5474
        }
5475
5476 32
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
5477 26
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
5478
        }
5479
5480 32
        $str = self::lcfirst(
5481 32
            \trim($str),
5482 32
            $encoding,
5483 32
            false,
5484 32
            $lang,
5485 32
            $try_to_keep_the_string_length
5486
        );
5487 32
        $str = (string) \preg_replace('/^[-_]+/', '', $str);
5488
5489 32
        $use_mb_functions = $lang === null && !$try_to_keep_the_string_length;
5490
5491 32
        $str = (string) \preg_replace_callback(
5492 32
            '/[-_\\s]+(.)?/u',
5493
            /**
5494
             * @param array $match
5495
             *
5496
             * @psalm-pure
5497
             *
5498
             * @return string
5499
             */
5500
            static function (array $match) use ($use_mb_functions, $encoding, $lang, $try_to_keep_the_string_length): string {
5501 27
                if (isset($match[1])) {
5502 27
                    if ($use_mb_functions) {
5503 27
                        if ($encoding === 'UTF-8') {
5504 27
                            return \mb_strtoupper($match[1]);
5505
                        }
5506
5507
                        return \mb_strtoupper($match[1], $encoding);
5508
                    }
5509
5510
                    return self::strtoupper($match[1], $encoding, false, $lang, $try_to_keep_the_string_length);
5511
                }
5512
5513 1
                return '';
5514 32
            },
5515 32
            $str
5516
        );
5517
5518 32
        return (string) \preg_replace_callback(
5519 32
            '/[\\p{N}]+(.)?/u',
5520
            /**
5521
             * @param array $match
5522
             *
5523
             * @psalm-pure
5524
             *
5525
             * @return string
5526
             */
5527
            static function (array $match) use ($use_mb_functions, $encoding, $clean_utf8, $lang, $try_to_keep_the_string_length): string {
5528 6
                if ($use_mb_functions) {
5529 6
                    if ($encoding === 'UTF-8') {
5530 6
                        return \mb_strtoupper($match[0]);
5531
                    }
5532
5533
                    return \mb_strtoupper($match[0], $encoding);
5534
                }
5535
5536
                return self::strtoupper($match[0], $encoding, $clean_utf8, $lang, $try_to_keep_the_string_length);
5537 32
            },
5538 32
            $str
5539
        );
5540
    }
5541
5542
    /**
5543
     * Returns the string with the first letter of each word capitalized,
5544
     * except for when the word is a name which shouldn't be capitalized.
5545
     *
5546
     * @param string $str
5547
     *
5548
     * @psalm-pure
5549
     *
5550
     * @return string
5551
     *                <p>A string with $str capitalized.</p>
5552
     */
5553
    public static function str_capitalize_name(string $str): string
5554
    {
5555 1
        return self::str_capitalize_name_helper(
5556 1
            self::str_capitalize_name_helper(
5557 1
                self::collapse_whitespace($str),
5558 1
                ' '
5559
            ),
5560 1
            '-'
5561
        );
5562
    }
5563
5564
    /**
5565
     * Returns true if the string contains $needle, false otherwise. By default
5566
     * the comparison is case-sensitive, but can be made insensitive by setting
5567
     * $case_sensitive to false.
5568
     *
5569
     * @param string $haystack       <p>The input string.</p>
5570
     * @param string $needle         <p>Substring to look for.</p>
5571
     * @param bool   $case_sensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
5572
     *
5573
     * @psalm-pure
5574
     *
5575
     * @return bool
5576
     *              <p>Whether or not $haystack contains $needle.</p>
5577
     */
5578
    public static function str_contains(
5579
        string $haystack,
5580
        string $needle,
5581
        bool $case_sensitive = true
5582
    ): bool {
5583 21
        if ($case_sensitive) {
5584 11
            if (\PHP_VERSION_ID >= 80000) {
5585
                /** @phpstan-ignore-next-line - only for PHP8 */
5586
                return \str_contains($haystack, $needle);
5587
            }
5588
5589 11
            return \strpos($haystack, $needle) !== false;
5590
        }
5591
5592 10
        return \mb_stripos($haystack, $needle) !== false;
5593
    }
5594
5595
    /**
5596
     * Returns true if the string contains all $needles, false otherwise. By
5597
     * default the comparison is case-sensitive, but can be made insensitive by
5598
     * setting $case_sensitive to false.
5599
     *
5600
     * @param string $haystack       <p>The input string.</p>
5601
     * @param array  $needles        <p>SubStrings to look for.</p>
5602
     * @param bool   $case_sensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
5603
     *
5604
     * @psalm-pure
5605
     *
5606
     * @return bool
5607
     *              <p>Whether or not $haystack contains $needle.</p>
5608
     */
5609
    public static function str_contains_all(
5610
        string $haystack,
5611
        array $needles,
5612
        bool $case_sensitive = true
5613
    ): bool {
5614 45
        if ($haystack === '' || $needles === []) {
5615 1
            return false;
5616
        }
5617
5618 44
        foreach ($needles as &$needle) {
5619 44
            if ($case_sensitive) {
5620 24
                if (!$needle || \strpos($haystack, $needle) === false) {
5621 12
                    return false;
5622
                }
5623
            }
5624
5625 33
            if (!$needle || \mb_stripos($haystack, $needle) === false) {
5626 33
                return false;
5627
            }
5628
        }
5629
5630 24
        return true;
5631
    }
5632
5633
    /**
5634
     * Returns true if the string contains any $needles, false otherwise. By
5635
     * default the comparison is case-sensitive, but can be made insensitive by
5636
     * setting $case_sensitive to false.
5637
     *
5638
     * @param string $haystack       <p>The input string.</p>
5639
     * @param array  $needles        <p>SubStrings to look for.</p>
5640
     * @param bool   $case_sensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
5641
     *
5642
     * @psalm-pure
5643
     *
5644
     * @return bool
5645
     *              <p>Whether or not $str contains $needle.</p>
5646
     */
5647
    public static function str_contains_any(
5648
        string $haystack,
5649
        array $needles,
5650
        bool $case_sensitive = true
5651
    ): bool {
5652 46
        if ($haystack === '' || $needles === []) {
5653 1
            return false;
5654
        }
5655
5656 45
        foreach ($needles as &$needle) {
5657 45
            if (!$needle) {
5658
                continue;
5659
            }
5660
5661 45
            if ($case_sensitive) {
5662 25
                if (\strpos($haystack, $needle) !== false) {
5663 14
                    return true;
5664
                }
5665
5666 13
                continue;
5667
            }
5668
5669 20
            if (\mb_stripos($haystack, $needle) !== false) {
5670 20
                return true;
5671
            }
5672
        }
5673
5674 19
        return false;
5675
    }
5676
5677
    /**
5678
     * Returns a lowercase and trimmed string separated by dashes. Dashes are
5679
     * inserted before uppercase characters (with the exception of the first
5680
     * character of the string), and in place of spaces as well as underscores.
5681
     *
5682
     * @param string $str      <p>The input string.</p>
5683
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
5684
     *
5685
     * @psalm-pure
5686
     *
5687
     * @return string
5688
     */
5689
    public static function str_dasherize(string $str, string $encoding = 'UTF-8'): string
5690
    {
5691 19
        return self::str_delimit($str, '-', $encoding);
5692
    }
5693
5694
    /**
5695
     * Returns a lowercase and trimmed string separated by the given delimiter.
5696
     * Delimiters are inserted before uppercase characters (with the exception
5697
     * of the first character of the string), and in place of spaces, dashes,
5698
     * and underscores. Alpha delimiters are not converted to lowercase.
5699
     *
5700
     * @param string      $str                           <p>The input string.</p>
5701
     * @param string      $delimiter                     <p>Sequence used to separate parts of the string.</p>
5702
     * @param string      $encoding                      [optional] <p>Set the charset for e.g. "mb_" function</p>
5703
     * @param bool        $clean_utf8                    [optional] <p>Remove non UTF-8 chars from the string.</p>
5704
     * @param string|null $lang                          [optional] <p>Set the language for special cases: az, el, lt,
5705
     *                                                   tr</p>
5706
     * @param bool        $try_to_keep_the_string_length [optional] <p>true === try to keep the string length: e.g. ẞ ->
5707
     *                                                   ß</p>
5708
     *
5709
     * @psalm-pure
5710
     *
5711
     * @return string
5712
     */
5713
    public static function str_delimit(
5714
        string $str,
5715
        string $delimiter,
5716
        string $encoding = 'UTF-8',
5717
        bool $clean_utf8 = false,
5718
        string $lang = null,
5719
        bool $try_to_keep_the_string_length = false
5720
    ): string {
5721 49
        if (self::$SUPPORT['mbstring'] === true) {
5722 49
            $str = (string) \mb_ereg_replace('\\B(\\p{Lu})', '-\1', \trim($str));
5723
5724 49
            $use_mb_functions = $lang === null && !$try_to_keep_the_string_length;
5725 49
            if ($use_mb_functions && $encoding === 'UTF-8') {
5726 22
                $str = \mb_strtolower($str);
5727
            } else {
5728 27
                $str = self::strtolower($str, $encoding, $clean_utf8, $lang, $try_to_keep_the_string_length);
5729
            }
5730
5731 49
            return (string) \mb_ereg_replace('[\\-_\\s]+', $delimiter, $str);
5732
        }
5733
5734
        $str = (string) \preg_replace('/\\B(\\p{Lu})/u', '-\1', \trim($str));
5735
5736
        $use_mb_functions = $lang === null && !$try_to_keep_the_string_length;
5737
        if ($use_mb_functions && $encoding === 'UTF-8') {
5738
            $str = \mb_strtolower($str);
5739
        } else {
5740
            $str = self::strtolower($str, $encoding, $clean_utf8, $lang, $try_to_keep_the_string_length);
5741
        }
5742
5743
        return (string) \preg_replace('/[\\-_\\s]+/u', $delimiter, $str);
5744
    }
5745
5746
    /**
5747
     * Optimized "mb_detect_encoding()"-function -> with support for UTF-16 and UTF-32.
5748
     *
5749
     * EXAMPLE: <code>
5750
     * UTF8::str_detect_encoding('中文空白'); // 'UTF-8'
5751
     * UTF8::str_detect_encoding('Abc'); // 'ASCII'
5752
     * </code>
5753
     *
5754
     * @param string $str <p>The input string.</p>
5755
     *
5756
     * @psalm-pure
5757
     *
5758
     * @return false|string
5759
     *                      <p>
5760
     *                      The detected string-encoding e.g. UTF-8 or UTF-16BE,<br>
5761
     *                      otherwise it will return false e.g. for BINARY or not detected encoding.
5762
     *                      </p>
5763
     */
5764
    public static function str_detect_encoding($str)
5765
    {
5766
        // init
5767 30
        $str = (string) $str;
5768
5769
        //
5770
        // 1.) check binary strings (010001001...) like UTF-16 / UTF-32 / PDF / Images / ...
5771
        //
5772
5773 30
        if (self::is_binary($str, true)) {
5774 10
            $is_utf32 = self::is_utf32($str, false);
5775 10
            if ($is_utf32 === 1) {
5776
                return 'UTF-32LE';
5777
            }
5778 10
            if ($is_utf32 === 2) {
5779 1
                return 'UTF-32BE';
5780
            }
5781
5782 10
            $is_utf16 = self::is_utf16($str, false);
5783 10
            if ($is_utf16 === 1) {
5784 3
                return 'UTF-16LE';
5785
            }
5786 10
            if ($is_utf16 === 2) {
5787 2
                return 'UTF-16BE';
5788
            }
5789
5790
            // is binary but not "UTF-16" or "UTF-32"
5791 8
            return false;
5792
        }
5793
5794
        //
5795
        // 2.) simple check for ASCII chars
5796
        //
5797
5798 27
        if (ASCII::is_ascii($str)) {
5799 10
            return 'ASCII';
5800
        }
5801
5802
        //
5803
        // 3.) simple check for UTF-8 chars
5804
        //
5805
5806 27
        if (self::is_utf8_string($str)) {
5807 19
            return 'UTF-8';
5808
        }
5809
5810
        //
5811
        // 4.) check via "mb_detect_encoding()"
5812
        //
5813
        // INFO: UTF-16, UTF-32, UCS2 and UCS4, encoding detection will fail always with "mb_detect_encoding()"
5814
5815
        $encoding_detecting_order = [
5816 16
            'ISO-8859-1',
5817
            'ISO-8859-2',
5818
            'ISO-8859-3',
5819
            'ISO-8859-4',
5820
            'ISO-8859-5',
5821
            'ISO-8859-6',
5822
            'ISO-8859-7',
5823
            'ISO-8859-8',
5824
            'ISO-8859-9',
5825
            'ISO-8859-10',
5826
            'ISO-8859-13',
5827
            'ISO-8859-14',
5828
            'ISO-8859-15',
5829
            'ISO-8859-16',
5830
            'WINDOWS-1251',
5831
            'WINDOWS-1252',
5832
            'WINDOWS-1254',
5833
            'CP932',
5834
            'CP936',
5835
            'CP950',
5836
            'CP866',
5837
            'CP850',
5838
            'CP51932',
5839
            'CP50220',
5840
            'CP50221',
5841
            'CP50222',
5842
            'ISO-2022-JP',
5843
            'ISO-2022-KR',
5844
            'JIS',
5845
            'JIS-ms',
5846
            'EUC-CN',
5847
            'EUC-JP',
5848
        ];
5849
5850 16
        if (self::$SUPPORT['mbstring'] === true) {
5851
            // info: do not use the symfony polyfill here
5852 16
            $encoding = \mb_detect_encoding($str, $encoding_detecting_order, true);
5853 16
            if ($encoding) {
5854 16
                return $encoding;
5855
            }
5856
        }
5857
5858
        //
5859
        // 5.) check via "iconv()"
5860
        //
5861
5862
        if (self::$ENCODINGS === null) {
5863
            self::$ENCODINGS = self::getData('encodings');
5864
        }
5865
5866
        foreach (self::$ENCODINGS as $encoding_tmp) {
5867
            // INFO: //IGNORE but still throw notice
5868
            /** @noinspection PhpUsageOfSilenceOperatorInspection */
5869
            if ((string) @\iconv($encoding_tmp, $encoding_tmp . '//IGNORE', $str) === $str) {
5870
                return $encoding_tmp;
5871
            }
5872
        }
5873
5874
        return false;
5875
    }
5876
5877
    /**
5878
     * Check if the string ends with the given substring.
5879
     *
5880
     * EXAMPLE: <code>
5881
     * UTF8::str_ends_with('BeginMiddleΚόσμε', 'Κόσμε'); // true
5882
     * UTF8::str_ends_with('BeginMiddleΚόσμε', 'κόσμε'); // false
5883
     * </code>
5884
     *
5885
     * @param string $haystack <p>The string to search in.</p>
5886
     * @param string $needle   <p>The substring to search for.</p>
5887
     *
5888
     * @psalm-pure
5889
     *
5890
     * @return bool
5891
     */
5892
    public static function str_ends_with(string $haystack, string $needle): bool
5893
    {
5894 9
        if ($needle === '') {
5895 2
            return true;
5896
        }
5897
5898 9
        if ($haystack === '') {
5899
            return false;
5900
        }
5901
5902 9
        if (\PHP_VERSION_ID >= 80000) {
5903
            /** @phpstan-ignore-next-line - only for PHP8 */
5904
            return \str_ends_with($haystack, $needle);
5905
        }
5906
5907 9
        return \substr($haystack, -\strlen($needle)) === $needle;
5908
    }
5909
5910
    /**
5911
     * Returns true if the string ends with any of $substrings, false otherwise.
5912
     *
5913
     * - case-sensitive
5914
     *
5915
     * @param string   $str        <p>The input string.</p>
5916
     * @param string[] $substrings <p>Substrings to look for.</p>
5917
     *
5918
     * @psalm-pure
5919
     *
5920
     * @return bool
5921
     *              <p>Whether or not $str ends with $substring.</p>
5922
     */
5923
    public static function str_ends_with_any(string $str, array $substrings): bool
5924
    {
5925 7
        if ($substrings === []) {
5926
            return false;
5927
        }
5928
5929 7
        foreach ($substrings as &$substring) {
5930 7
            if (\substr($str, -\strlen($substring)) === $substring) {
5931 7
                return true;
5932
            }
5933
        }
5934
5935 6
        return false;
5936
    }
5937
5938
    /**
5939
     * Ensures that the string begins with $substring. If it doesn't, it's
5940
     * prepended.
5941
     *
5942
     * @param string $str       <p>The input string.</p>
5943
     * @param string $substring <p>The substring to add if not present.</p>
5944
     *
5945
     * @psalm-pure
5946
     *
5947
     * @return string
5948
     */
5949
    public static function str_ensure_left(string $str, string $substring): string
5950
    {
5951
        if (
5952 10
            $substring !== ''
5953
            &&
5954 10
            \strpos($str, $substring) === 0
5955
        ) {
5956 6
            return $str;
5957
        }
5958
5959 4
        return $substring . $str;
5960
    }
5961
5962
    /**
5963
     * Ensures that the string ends with $substring. If it doesn't, it's appended.
5964
     *
5965
     * @param string $str       <p>The input string.</p>
5966
     * @param string $substring <p>The substring to add if not present.</p>
5967
     *
5968
     * @psalm-pure
5969
     *
5970
     * @return string
5971
     */
5972
    public static function str_ensure_right(string $str, string $substring): string
5973
    {
5974
        if (
5975 10
            $str === ''
5976
            ||
5977 10
            $substring === ''
5978
            ||
5979 10
            \substr($str, -\strlen($substring)) !== $substring
5980
        ) {
5981 4
            $str .= $substring;
5982
        }
5983
5984 10
        return $str;
5985
    }
5986
5987
    /**
5988
     * Capitalizes the first word of the string, replaces underscores with
5989
     * spaces, and strips '_id'.
5990
     *
5991
     * @param string $str
5992
     *
5993
     * @psalm-pure
5994
     *
5995
     * @return string
5996
     */
5997
    public static function str_humanize($str): string
5998
    {
5999 3
        $str = \str_replace(
6000
            [
6001 3
                '_id',
6002
                '_',
6003
            ],
6004
            [
6005 3
                '',
6006
                ' ',
6007
            ],
6008 3
            $str
6009
        );
6010
6011 3
        return self::ucfirst(\trim($str));
6012
    }
6013
6014
    /**
6015
     * Check if the string ends with the given substring, case-insensitive.
6016
     *
6017
     * EXAMPLE: <code>
6018
     * UTF8::str_iends_with('BeginMiddleΚόσμε', 'Κόσμε'); // true
6019
     * UTF8::str_iends_with('BeginMiddleΚόσμε', 'κόσμε'); // true
6020
     * </code>
6021
     *
6022
     * @param string $haystack <p>The string to search in.</p>
6023
     * @param string $needle   <p>The substring to search for.</p>
6024
     *
6025
     * @psalm-pure
6026
     *
6027
     * @return bool
6028
     */
6029
    public static function str_iends_with(string $haystack, string $needle): bool
6030
    {
6031 12
        if ($needle === '') {
6032 2
            return true;
6033
        }
6034
6035 12
        if ($haystack === '') {
6036
            return false;
6037
        }
6038
6039 12
        return self::strcasecmp(\substr($haystack, -\strlen($needle)), $needle) === 0;
6040
    }
6041
6042
    /**
6043
     * Returns true if the string ends with any of $substrings, false otherwise.
6044
     *
6045
     * - case-insensitive
6046
     *
6047
     * @param string   $str        <p>The input string.</p>
6048
     * @param string[] $substrings <p>Substrings to look for.</p>
6049
     *
6050
     * @psalm-pure
6051
     *
6052
     * @return bool
6053
     *              <p>Whether or not $str ends with $substring.</p>
6054
     */
6055
    public static function str_iends_with_any(string $str, array $substrings): bool
6056
    {
6057 4
        if ($substrings === []) {
6058
            return false;
6059
        }
6060
6061 4
        foreach ($substrings as &$substring) {
6062 4
            if (self::str_iends_with($str, $substring)) {
6063 4
                return true;
6064
            }
6065
        }
6066
6067
        return false;
6068
    }
6069
6070
    /**
6071
     * Inserts $substring into the string at the $index provided.
6072
     *
6073
     * @param string $str       <p>The input string.</p>
6074
     * @param string $substring <p>String to be inserted.</p>
6075
     * @param int    $index     <p>The index at which to insert the substring.</p>
6076
     * @param string $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
6077
     *
6078
     * @psalm-pure
6079
     *
6080
     * @return string
6081
     */
6082
    public static function str_insert(
6083
        string $str,
6084
        string $substring,
6085
        int $index,
6086
        string $encoding = 'UTF-8'
6087
    ): string {
6088 8
        if ($encoding === 'UTF-8') {
6089 4
            $len = (int) \mb_strlen($str);
6090 4
            if ($index > $len) {
6091
                return $str;
6092
            }
6093
6094
            /** @noinspection UnnecessaryCastingInspection */
6095 4
            return (string) \mb_substr($str, 0, $index) .
6096 4
                   $substring .
6097 4
                   (string) \mb_substr($str, $index, $len);
6098
        }
6099
6100 4
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
6101
6102 4
        $len = (int) self::strlen($str, $encoding);
6103 4
        if ($index > $len) {
6104 1
            return $str;
6105
        }
6106
6107 3
        return ((string) self::substr($str, 0, $index, $encoding)) .
6108 3
               $substring .
6109 3
               ((string) self::substr($str, $index, $len, $encoding));
6110
    }
6111
6112
    /**
6113
     * Case-insensitive and UTF-8 safe version of <function>str_replace</function>.
6114
     *
6115
     * EXAMPLE: <code>
6116
     * UTF8::str_ireplace('lIzÆ', 'lise', 'Iñtërnâtiônàlizætiøn'); // 'Iñtërnâtiônàlisetiøn'
6117
     * </code>
6118
     *
6119
     * @see http://php.net/manual/en/function.str-ireplace.php
6120
     *
6121
     * @param string|string[] $search      <p>
6122
     *                                     Every replacement with search array is
6123
     *                                     performed on the result of previous replacement.
6124
     *                                     </p>
6125
     * @param string|string[] $replacement <p>The replacement.</p>
6126
     * @param string|string[] $subject     <p>
6127
     *                                     If subject is an array, then the search and
6128
     *                                     replace is performed with every entry of
6129
     *                                     subject, and the return value is an array as
6130
     *                                     well.
6131
     *                                     </p>
6132
     * @param int             $count       [optional] <p>
6133
     *                                     The number of matched and replaced needles will
6134
     *                                     be returned in count which is passed by
6135
     *                                     reference.
6136
     *                                     </p>
6137
     *
6138
     * @psalm-pure
6139
     *
6140
     * @return string|string[]
6141
     *                         <p>A string or an array of replacements.</p>
6142
     *
6143
     * @template TStrIReplaceSubject
6144
     * @phpstan-param TStrIReplaceSubject $subject
6145
     * @phpstan-return TStrIReplaceSubject
6146
     */
6147
    public static function str_ireplace($search, $replacement, $subject, &$count = null)
6148
    {
6149 29
        $search = (array) $search;
6150
6151
        /** @noinspection AlterInForeachInspection */
6152 29
        foreach ($search as &$s) {
6153 29
            $s = (string) $s;
6154 29
            if ($s === '') {
6155 6
                $s = '/^(?<=.)$/';
6156
            } else {
6157 29
                $s = '/' . \preg_quote($s, '/') . '/ui';
6158
            }
6159
        }
6160
6161
        // fallback
6162
        /** @phpstan-ignore-next-line - only a fallback for PHP8 */
6163 29
        if ($replacement === null) {
0 ignored issues
show
introduced by
The condition $replacement === null is always false.
Loading history...
6164 1
            $replacement = '';
6165
        }
6166
        /** @phpstan-ignore-next-line - only a fallback for PHP8 */
6167 29
        if ($subject === null) {
6168 1
            $subject = '';
6169
        }
6170
6171
        /**
6172
         * @psalm-suppress PossiblyNullArgument
6173
         * @phpstan-var TStrIReplaceSubject $subject
6174
         */
6175 29
        $subject = \preg_replace($search, $replacement, $subject, -1, $count);
6176
6177 29
        return $subject;
6178
    }
6179
6180
    /**
6181
     * Replaces $search from the beginning of string with $replacement.
6182
     *
6183
     * @param string $str         <p>The input string.</p>
6184
     * @param string $search      <p>The string to search for.</p>
6185
     * @param string $replacement <p>The replacement.</p>
6186
     *
6187
     * @psalm-pure
6188
     *
6189
     * @return string
6190
     *                <p>The string after the replacement.</p>
6191
     */
6192
    public static function str_ireplace_beginning(string $str, string $search, string $replacement): string
6193
    {
6194 17
        if ($str === '') {
6195 4
            if ($replacement === '') {
6196 2
                return '';
6197
            }
6198
6199 2
            if ($search === '') {
6200 2
                return $replacement;
6201
            }
6202
        }
6203
6204 13
        if ($search === '') {
6205 2
            return $str . $replacement;
6206
        }
6207
6208 11
        $searchLength = \strlen($search);
6209 11
        if (\strncasecmp($str, $search, $searchLength) === 0) {
6210 10
            return $replacement . \substr($str, $searchLength);
6211
        }
6212
6213 1
        return $str;
6214
    }
6215
6216
    /**
6217
     * Replaces $search from the ending of string with $replacement.
6218
     *
6219
     * @param string $str         <p>The input string.</p>
6220
     * @param string $search      <p>The string to search for.</p>
6221
     * @param string $replacement <p>The replacement.</p>
6222
     *
6223
     * @psalm-pure
6224
     *
6225
     * @return string
6226
     *                <p>The string after the replacement.</p>
6227
     */
6228
    public static function str_ireplace_ending(string $str, string $search, string $replacement): string
6229
    {
6230 17
        if ($str === '') {
6231 4
            if ($replacement === '') {
6232 2
                return '';
6233
            }
6234
6235 2
            if ($search === '') {
6236 2
                return $replacement;
6237
            }
6238
        }
6239
6240 13
        if ($search === '') {
6241 2
            return $str . $replacement;
6242
        }
6243
6244 11
        if (\stripos($str, $search, \strlen($str) - \strlen($search)) !== false) {
6245 9
            $str = \substr($str, 0, -\strlen($search)) . $replacement;
6246
        }
6247
6248 11
        return $str;
6249
    }
6250
6251
    /**
6252
     * Check if the string starts with the given substring, case-insensitive.
6253
     *
6254
     * EXAMPLE: <code>
6255
     * UTF8::str_istarts_with('ΚόσμεMiddleEnd', 'Κόσμε'); // true
6256
     * UTF8::str_istarts_with('ΚόσμεMiddleEnd', 'κόσμε'); // true
6257
     * </code>
6258
     *
6259
     * @param string $haystack <p>The string to search in.</p>
6260
     * @param string $needle   <p>The substring to search for.</p>
6261
     *
6262
     * @psalm-pure
6263
     *
6264
     * @return bool
6265
     */
6266
    public static function str_istarts_with(string $haystack, string $needle): bool
6267
    {
6268 13
        if ($needle === '') {
6269 2
            return true;
6270
        }
6271
6272 13
        if ($haystack === '') {
6273
            return false;
6274
        }
6275
6276 13
        return self::stripos($haystack, $needle) === 0;
6277
    }
6278
6279
    /**
6280
     * Returns true if the string begins with any of $substrings, false otherwise.
6281
     *
6282
     * - case-insensitive
6283
     *
6284
     * @param string $str        <p>The input string.</p>
6285
     * @param array  $substrings <p>Substrings to look for.</p>
6286
     *
6287
     * @psalm-pure
6288
     *
6289
     * @return bool
6290
     *              <p>Whether or not $str starts with $substring.</p>
6291
     */
6292
    public static function str_istarts_with_any(string $str, array $substrings): bool
6293
    {
6294 5
        if ($str === '') {
6295
            return false;
6296
        }
6297
6298 5
        if ($substrings === []) {
6299
            return false;
6300
        }
6301
6302 5
        foreach ($substrings as &$substring) {
6303 5
            if (self::str_istarts_with($str, $substring)) {
6304 5
                return true;
6305
            }
6306
        }
6307
6308 1
        return false;
6309
    }
6310
6311
    /**
6312
     * Gets the substring after the first occurrence of a separator.
6313
     *
6314
     * @param string $str       <p>The input string.</p>
6315
     * @param string $separator <p>The string separator.</p>
6316
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
6317
     *
6318
     * @psalm-pure
6319
     *
6320
     * @return string
6321
     */
6322
    public static function str_isubstr_after_first_separator(
6323
        string $str,
6324
        string $separator,
6325
        string $encoding = 'UTF-8'
6326
    ): string {
6327 1
        if ($separator === '' || $str === '') {
6328 1
            return '';
6329
        }
6330
6331 1
        $offset = self::stripos($str, $separator);
6332 1
        if ($offset === false) {
6333 1
            return '';
6334
        }
6335
6336 1
        if ($encoding === 'UTF-8') {
6337 1
            return (string) \mb_substr(
6338 1
                $str,
6339 1
                $offset + (int) \mb_strlen($separator)
6340
            );
6341
        }
6342
6343
        return (string) self::substr(
6344
            $str,
6345
            $offset + (int) self::strlen($separator, $encoding),
6346
            null,
6347
            $encoding
6348
        );
6349
    }
6350
6351
    /**
6352
     * Gets the substring after the last occurrence of a separator.
6353
     *
6354
     * @param string $str       <p>The input string.</p>
6355
     * @param string $separator <p>The string separator.</p>
6356
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
6357
     *
6358
     * @psalm-pure
6359
     *
6360
     * @return string
6361
     */
6362
    public static function str_isubstr_after_last_separator(
6363
        string $str,
6364
        string $separator,
6365
        string $encoding = 'UTF-8'
6366
    ): string {
6367 1
        if ($separator === '' || $str === '') {
6368 1
            return '';
6369
        }
6370
6371 1
        $offset = self::strripos($str, $separator);
6372 1
        if ($offset === false) {
6373 1
            return '';
6374
        }
6375
6376 1
        if ($encoding === 'UTF-8') {
6377 1
            return (string) \mb_substr(
6378 1
                $str,
6379 1
                $offset + (int) self::strlen($separator)
6380
            );
6381
        }
6382
6383
        return (string) self::substr(
6384
            $str,
6385
            $offset + (int) self::strlen($separator, $encoding),
6386
            null,
6387
            $encoding
6388
        );
6389
    }
6390
6391
    /**
6392
     * Gets the substring before the first occurrence of a separator.
6393
     *
6394
     * @param string $str       <p>The input string.</p>
6395
     * @param string $separator <p>The string separator.</p>
6396
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
6397
     *
6398
     * @psalm-pure
6399
     *
6400
     * @return string
6401
     */
6402
    public static function str_isubstr_before_first_separator(
6403
        string $str,
6404
        string $separator,
6405
        string $encoding = 'UTF-8'
6406
    ): string {
6407 1
        if ($separator === '' || $str === '') {
6408 1
            return '';
6409
        }
6410
6411 1
        $offset = self::stripos($str, $separator);
6412 1
        if ($offset === false) {
6413 1
            return '';
6414
        }
6415
6416 1
        if ($encoding === 'UTF-8') {
6417 1
            return (string) \mb_substr($str, 0, $offset);
6418
        }
6419
6420
        return (string) self::substr($str, 0, $offset, $encoding);
6421
    }
6422
6423
    /**
6424
     * Gets the substring before the last occurrence of a separator.
6425
     *
6426
     * @param string $str       <p>The input string.</p>
6427
     * @param string $separator <p>The string separator.</p>
6428
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
6429
     *
6430
     * @psalm-pure
6431
     *
6432
     * @return string
6433
     */
6434
    public static function str_isubstr_before_last_separator(
6435
        string $str,
6436
        string $separator,
6437
        string $encoding = 'UTF-8'
6438
    ): string {
6439 1
        if ($separator === '' || $str === '') {
6440 1
            return '';
6441
        }
6442
6443 1
        if ($encoding === 'UTF-8') {
6444 1
            $offset = \mb_strripos($str, $separator);
6445 1
            if ($offset === false) {
6446 1
                return '';
6447
            }
6448
6449 1
            return (string) \mb_substr($str, 0, $offset);
6450
        }
6451
6452
        $offset = self::strripos($str, $separator, 0, $encoding);
6453
        if ($offset === false) {
6454
            return '';
6455
        }
6456
6457
        return (string) self::substr($str, 0, $offset, $encoding);
6458
    }
6459
6460
    /**
6461
     * Gets the substring after (or before via "$before_needle") the first occurrence of the "$needle".
6462
     *
6463
     * @param string $str           <p>The input string.</p>
6464
     * @param string $needle        <p>The string to look for.</p>
6465
     * @param bool   $before_needle [optional] <p>Default: false</p>
6466
     * @param string $encoding      [optional] <p>Default: 'UTF-8'</p>
6467
     *
6468
     * @psalm-pure
6469
     *
6470
     * @return string
6471
     */
6472
    public static function str_isubstr_first(
6473
        string $str,
6474
        string $needle,
6475
        bool $before_needle = false,
6476
        string $encoding = 'UTF-8'
6477
    ): string {
6478
        if (
6479 2
            $needle === ''
6480
            ||
6481 2
            $str === ''
6482
        ) {
6483 2
            return '';
6484
        }
6485
6486 2
        $part = self::stristr(
6487 2
            $str,
6488 2
            $needle,
6489 2
            $before_needle,
6490 2
            $encoding
6491
        );
6492 2
        if ($part === false) {
6493 2
            return '';
6494
        }
6495
6496 2
        return $part;
6497
    }
6498
6499
    /**
6500
     * Gets the substring after (or before via "$before_needle") the last occurrence of the "$needle".
6501
     *
6502
     * @param string $str           <p>The input string.</p>
6503
     * @param string $needle        <p>The string to look for.</p>
6504
     * @param bool   $before_needle [optional] <p>Default: false</p>
6505
     * @param string $encoding      [optional] <p>Default: 'UTF-8'</p>
6506
     *
6507
     * @psalm-pure
6508
     *
6509
     * @return string
6510
     */
6511
    public static function str_isubstr_last(
6512
        string $str,
6513
        string $needle,
6514
        bool $before_needle = false,
6515
        string $encoding = 'UTF-8'
6516
    ): string {
6517
        if (
6518 1
            $needle === ''
6519
            ||
6520 1
            $str === ''
6521
        ) {
6522 1
            return '';
6523
        }
6524
6525 1
        $part = self::strrichr(
6526 1
            $str,
6527 1
            $needle,
6528 1
            $before_needle,
6529 1
            $encoding
6530
        );
6531 1
        if ($part === false) {
6532 1
            return '';
6533
        }
6534
6535 1
        return $part;
6536
    }
6537
6538
    /**
6539
     * Returns the last $n characters of the string.
6540
     *
6541
     * @param string $str      <p>The input string.</p>
6542
     * @param int    $n        <p>Number of characters to retrieve from the end.</p>
6543
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6544
     *
6545
     * @psalm-pure
6546
     *
6547
     * @return string
6548
     */
6549
    public static function str_last_char(
6550
        string $str,
6551
        int $n = 1,
6552
        string $encoding = 'UTF-8'
6553
    ): string {
6554 12
        if ($str === '' || $n <= 0) {
6555 4
            return '';
6556
        }
6557
6558 8
        if ($encoding === 'UTF-8') {
6559 4
            return (string) \mb_substr($str, -$n);
6560
        }
6561
6562 4
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
6563
6564 4
        return (string) self::substr($str, -$n, null, $encoding);
6565
    }
6566
6567
    /**
6568
     * Limit the number of characters in a string.
6569
     *
6570
     * @param string $str        <p>The input string.</p>
6571
     * @param int    $length     [optional] <p>Default: 100</p>
6572
     * @param string $str_add_on [optional] <p>Default: …</p>
6573
     * @param string $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
6574
     *
6575
     * @psalm-pure
6576
     *
6577
     * @return string
6578
     */
6579
    public static function str_limit(
6580
        string $str,
6581
        int $length = 100,
6582
        string $str_add_on = '…',
6583
        string $encoding = 'UTF-8'
6584
    ): string {
6585 2
        if ($str === '' || $length <= 0) {
6586 2
            return '';
6587
        }
6588
6589 2
        if ($encoding === 'UTF-8') {
6590 2
            if ((int) \mb_strlen($str) <= $length) {
6591 2
                return $str;
6592
            }
6593
6594
            /** @noinspection UnnecessaryCastingInspection */
6595 2
            return (string) \mb_substr($str, 0, $length - (int) self::strlen($str_add_on)) . $str_add_on;
6596
        }
6597
6598
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
6599
6600
        if ((int) self::strlen($str, $encoding) <= $length) {
6601
            return $str;
6602
        }
6603
6604
        return ((string) self::substr($str, 0, $length - (int) self::strlen($str_add_on), $encoding)) . $str_add_on;
6605
    }
6606
6607
    /**
6608
     * Limit the number of characters in a string, but also after the next word.
6609
     *
6610
     * EXAMPLE: <code>UTF8::str_limit_after_word('fòô bàř fòô', 8, ''); // 'fòô bàř'</code>
6611
     *
6612
     * @param string $str        <p>The input string.</p>
6613
     * @param int    $length     [optional] <p>Default: 100</p>
6614
     * @param string $str_add_on [optional] <p>Default: …</p>
6615
     * @param string $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
6616
     *
6617
     * @psalm-pure
6618
     *
6619
     * @return string
6620
     */
6621
    public static function str_limit_after_word(
6622
        string $str,
6623
        int $length = 100,
6624
        string $str_add_on = '…',
6625
        string $encoding = 'UTF-8'
6626
    ): string {
6627 6
        if ($str === '' || $length <= 0) {
6628 2
            return '';
6629
        }
6630
6631 6
        if ($encoding === 'UTF-8') {
6632 2
            if ((int) \mb_strlen($str) <= $length) {
6633 2
                return $str;
6634
            }
6635
6636 2
            if (\mb_substr($str, $length - 1, 1) === ' ') {
6637 2
                return ((string) \mb_substr($str, 0, $length - 1)) . $str_add_on;
6638
            }
6639
6640 2
            $str = \mb_substr($str, 0, $length);
6641
6642 2
            $array = \explode(' ', $str, -1);
6643 2
            $new_str = \implode(' ', $array);
6644
6645 2
            if ($new_str === '') {
6646 2
                return ((string) \mb_substr($str, 0, $length - 1)) . $str_add_on;
6647
            }
6648
        } else {
6649 4
            if ((int) self::strlen($str, $encoding) <= $length) {
6650
                return $str;
6651
            }
6652
6653 4
            if (self::substr($str, $length - 1, 1, $encoding) === ' ') {
6654 3
                return ((string) self::substr($str, 0, $length - 1, $encoding)) . $str_add_on;
6655
            }
6656
6657
            /** @noinspection CallableParameterUseCaseInTypeContextInspection - FP */
6658 1
            $str = self::substr($str, 0, $length, $encoding);
6659 1
            if ($str === false) {
6660
                return '' . $str_add_on;
6661
            }
6662
6663 1
            $array = \explode(' ', $str, -1);
6664 1
            $new_str = \implode(' ', $array);
6665
6666 1
            if ($new_str === '') {
6667
                return ((string) self::substr($str, 0, $length - 1, $encoding)) . $str_add_on;
6668
            }
6669
        }
6670
6671 3
        return $new_str . $str_add_on;
6672
    }
6673
6674
    /**
6675
     * Returns the longest common prefix between the $str1 and $str2.
6676
     *
6677
     * @param string $str1     <p>The input sting.</p>
6678
     * @param string $str2     <p>Second string for comparison.</p>
6679
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6680
     *
6681
     * @psalm-pure
6682
     *
6683
     * @return string
6684
     */
6685
    public static function str_longest_common_prefix(
6686
        string $str1,
6687
        string $str2,
6688
        string $encoding = 'UTF-8'
6689
    ): string {
6690
        // init
6691 10
        $longest_common_prefix = '';
6692
6693 10
        if ($encoding === 'UTF-8') {
6694 5
            $max_length = (int) \min(
6695 5
                \mb_strlen($str1),
6696 5
                \mb_strlen($str2)
6697
            );
6698
6699 5
            for ($i = 0; $i < $max_length; ++$i) {
6700 4
                $char = \mb_substr($str1, $i, 1);
6701
6702
                if (
6703 4
                    $char !== false
6704
                    &&
6705 4
                    $char === \mb_substr($str2, $i, 1)
6706
                ) {
6707 3
                    $longest_common_prefix .= $char;
6708
                } else {
6709 3
                    break;
6710
                }
6711
            }
6712
        } else {
6713 5
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
6714
6715 5
            $max_length = (int) \min(
6716 5
                self::strlen($str1, $encoding),
6717 5
                self::strlen($str2, $encoding)
6718
            );
6719
6720 5
            for ($i = 0; $i < $max_length; ++$i) {
6721 4
                $char = self::substr($str1, $i, 1, $encoding);
6722
6723
                if (
6724 4
                    $char !== false
6725
                    &&
6726 4
                    $char === self::substr($str2, $i, 1, $encoding)
6727
                ) {
6728 3
                    $longest_common_prefix .= $char;
6729
                } else {
6730 3
                    break;
6731
                }
6732
            }
6733
        }
6734
6735 10
        return $longest_common_prefix;
6736
    }
6737
6738
    /**
6739
     * Returns the longest common substring between the $str1 and $str2.
6740
     * In the case of ties, it returns that which occurs first.
6741
     *
6742
     * @param string $str1
6743
     * @param string $str2     <p>Second string for comparison.</p>
6744
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6745
     *
6746
     * @psalm-pure
6747
     *
6748
     * @return string
6749
     *                <p>A string with its $str being the longest common substring.</p>
6750
     */
6751
    public static function str_longest_common_substring(
6752
        string $str1,
6753
        string $str2,
6754
        string $encoding = 'UTF-8'
6755
    ): string {
6756 11
        if ($str1 === '' || $str2 === '') {
6757 2
            return '';
6758
        }
6759
6760
        // Uses dynamic programming to solve
6761
        // http://en.wikipedia.org/wiki/Longest_common_substring_problem
6762
6763 9
        if ($encoding === 'UTF-8') {
6764 4
            $str_length = (int) \mb_strlen($str1);
6765 4
            $other_length = (int) \mb_strlen($str2);
6766
        } else {
6767 5
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
6768
6769 5
            $str_length = (int) self::strlen($str1, $encoding);
6770 5
            $other_length = (int) self::strlen($str2, $encoding);
6771
        }
6772
6773
        // Return if either string is empty
6774 9
        if ($str_length === 0 || $other_length === 0) {
6775
            return '';
6776
        }
6777
6778 9
        $len = 0;
6779 9
        $end = 0;
6780 9
        $table = \array_fill(
6781 9
            0,
6782 9
            $str_length + 1,
6783 9
            \array_fill(0, $other_length + 1, 0)
6784
        );
6785
6786 9
        if ($encoding === 'UTF-8') {
6787 9
            for ($i = 1; $i <= $str_length; ++$i) {
6788 9
                for ($j = 1; $j <= $other_length; ++$j) {
6789 9
                    $str_char = \mb_substr($str1, $i - 1, 1);
6790 9
                    $other_char = \mb_substr($str2, $j - 1, 1);
6791
6792 9
                    if ($str_char === $other_char) {
6793 8
                        $table[$i][$j] = $table[$i - 1][$j - 1] + 1;
6794 8
                        if ($table[$i][$j] > $len) {
6795 8
                            $len = $table[$i][$j];
6796 8
                            $end = $i;
6797
                        }
6798
                    } else {
6799 9
                        $table[$i][$j] = 0;
6800
                    }
6801
                }
6802
            }
6803
        } else {
6804
            for ($i = 1; $i <= $str_length; ++$i) {
6805
                for ($j = 1; $j <= $other_length; ++$j) {
6806
                    $str_char = self::substr($str1, $i - 1, 1, $encoding);
6807
                    $other_char = self::substr($str2, $j - 1, 1, $encoding);
6808
6809
                    if ($str_char === $other_char) {
6810
                        $table[$i][$j] = $table[$i - 1][$j - 1] + 1;
6811
                        if ($table[$i][$j] > $len) {
6812
                            $len = $table[$i][$j];
6813
                            $end = $i;
6814
                        }
6815
                    } else {
6816
                        $table[$i][$j] = 0;
6817
                    }
6818
                }
6819
            }
6820
        }
6821
6822 9
        if ($encoding === 'UTF-8') {
6823 9
            return (string) \mb_substr($str1, $end - $len, $len);
6824
        }
6825
6826
        return (string) self::substr($str1, $end - $len, $len, $encoding);
6827
    }
6828
6829
    /**
6830
     * Returns the longest common suffix between the $str1 and $str2.
6831
     *
6832
     * @param string $str1
6833
     * @param string $str2     <p>Second string for comparison.</p>
6834
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6835
     *
6836
     * @psalm-pure
6837
     *
6838
     * @return string
6839
     */
6840
    public static function str_longest_common_suffix(
6841
        string $str1,
6842
        string $str2,
6843
        string $encoding = 'UTF-8'
6844
    ): string {
6845 10
        if ($str1 === '' || $str2 === '') {
6846 2
            return '';
6847
        }
6848
6849 8
        if ($encoding === 'UTF-8') {
6850 4
            $max_length = (int) \min(
6851 4
                \mb_strlen($str1, $encoding),
6852 4
                \mb_strlen($str2, $encoding)
6853
            );
6854
6855 4
            $longest_common_suffix = '';
6856 4
            for ($i = 1; $i <= $max_length; ++$i) {
6857 4
                $char = \mb_substr($str1, -$i, 1);
6858
6859
                if (
6860 4
                    $char !== false
6861
                    &&
6862 4
                    $char === \mb_substr($str2, -$i, 1)
6863
                ) {
6864 3
                    $longest_common_suffix = $char . $longest_common_suffix;
6865
                } else {
6866 3
                    break;
6867
                }
6868
            }
6869
        } else {
6870 4
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
6871
6872 4
            $max_length = (int) \min(
6873 4
                self::strlen($str1, $encoding),
6874 4
                self::strlen($str2, $encoding)
6875
            );
6876
6877 4
            $longest_common_suffix = '';
6878 4
            for ($i = 1; $i <= $max_length; ++$i) {
6879 4
                $char = self::substr($str1, -$i, 1, $encoding);
6880
6881
                if (
6882 4
                    $char !== false
6883
                    &&
6884 4
                    $char === self::substr($str2, -$i, 1, $encoding)
6885
                ) {
6886 3
                    $longest_common_suffix = $char . $longest_common_suffix;
6887
                } else {
6888 3
                    break;
6889
                }
6890
            }
6891
        }
6892
6893 8
        return $longest_common_suffix;
6894
    }
6895
6896
    /**
6897
     * Returns true if $str matches the supplied pattern, false otherwise.
6898
     *
6899
     * @param string $str     <p>The input string.</p>
6900
     * @param string $pattern <p>Regex pattern to match against.</p>
6901
     *
6902
     * @psalm-pure
6903
     *
6904
     * @return bool
6905
     *              <p>Whether or not $str matches the pattern.</p>
6906
     */
6907
    public static function str_matches_pattern(string $str, string $pattern): bool
6908
    {
6909 10
        return (bool) \preg_match('/' . $pattern . '/u', $str);
6910
    }
6911
6912
    /**
6913
     * Returns whether or not a character exists at an index. Offsets may be
6914
     * negative to count from the last character in the string. Implements
6915
     * part of the ArrayAccess interface.
6916
     *
6917
     * @param string $str      <p>The input string.</p>
6918
     * @param int    $offset   <p>The index to check.</p>
6919
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6920
     *
6921
     * @psalm-pure
6922
     *
6923
     * @return bool
6924
     *              <p>Whether or not the index exists.</p>
6925
     */
6926
    public static function str_offset_exists(string $str, int $offset, string $encoding = 'UTF-8'): bool
6927
    {
6928
        // init
6929 6
        $length = (int) self::strlen($str, $encoding);
6930
6931 6
        if ($offset >= 0) {
6932 3
            return $length > $offset;
6933
        }
6934
6935 3
        return $length >= \abs($offset);
6936
    }
6937
6938
    /**
6939
     * Returns the character at the given index. Offsets may be negative to
6940
     * count from the last character in the string. Implements part of the
6941
     * ArrayAccess interface, and throws an OutOfBoundsException if the index
6942
     * does not exist.
6943
     *
6944
     * @param string $str      <p>The input string.</p>
6945
     * @param int    $index    <p>The <strong>index</strong> from which to retrieve the char.</p>
6946
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6947
     *
6948
     * @throws \OutOfBoundsException if the positive or negative offset does not exist
6949
     *
6950
     * @return string
6951
     *                <p>The character at the specified index.</p>
6952
     *
6953
     * @psalm-pure
6954
     */
6955
    public static function str_offset_get(string $str, int $index, string $encoding = 'UTF-8'): string
6956
    {
6957
        // init
6958 2
        $length = (int) self::strlen($str);
6959
6960
        if (
6961 2
            ($index >= 0 && $length <= $index)
6962
            ||
6963 2
            $length < \abs($index)
6964
        ) {
6965 1
            throw new \OutOfBoundsException('No character exists at the index');
6966
        }
6967
6968 1
        return self::char_at($str, $index, $encoding);
6969
    }
6970
6971
    /**
6972
     * Pad a UTF-8 string to a given length with another string.
6973
     *
6974
     * EXAMPLE: <code>UTF8::str_pad('中文空白', 10, '_', STR_PAD_BOTH); // '___中文空白___'</code>
6975
     *
6976
     * @param string     $str        <p>The input string.</p>
6977
     * @param int        $pad_length <p>The length of return string.</p>
6978
     * @param string     $pad_string [optional] <p>String to use for padding the input string.</p>
6979
     * @param int|string $pad_type   [optional] <p>
6980
     *                               Can be <strong>STR_PAD_RIGHT</strong> (default), [or string "right"]<br>
6981
     *                               <strong>STR_PAD_LEFT</strong> [or string "left"] or<br>
6982
     *                               <strong>STR_PAD_BOTH</strong> [or string "both"]
6983
     *                               </p>
6984
     * @param string     $encoding   [optional] <p>Default: 'UTF-8'</p>
6985
     *
6986
     * @psalm-pure
6987
     *
6988
     * @return string
6989
     *                <p>Returns the padded string.</p>
6990
     */
6991
    public static function str_pad(
6992
        string $str,
6993
        int $pad_length,
6994
        string $pad_string = ' ',
6995
        $pad_type = \STR_PAD_RIGHT,
6996
        string $encoding = 'UTF-8'
6997
    ): string {
6998 41
        if ($pad_length === 0 || $pad_string === '') {
6999 1
            return $str;
7000
        }
7001
7002 41
        if ($pad_type !== (int) $pad_type) {
7003 13
            if ($pad_type === 'left') {
7004 3
                $pad_type = \STR_PAD_LEFT;
7005 10
            } elseif ($pad_type === 'right') {
7006 6
                $pad_type = \STR_PAD_RIGHT;
7007 4
            } elseif ($pad_type === 'both') {
7008 3
                $pad_type = \STR_PAD_BOTH;
7009
            } else {
7010 1
                throw new \InvalidArgumentException(
7011 1
                    'Pad expects $pad_type to be "STR_PAD_*" or ' . "to be one of 'left', 'right' or 'both'"
7012
                );
7013
            }
7014
        }
7015
7016 40
        if ($encoding === 'UTF-8') {
7017 25
            $str_length = (int) \mb_strlen($str);
7018
7019 25
            if ($pad_length >= $str_length) {
7020
                switch ($pad_type) {
7021 25
                    case \STR_PAD_LEFT:
7022 8
                        $ps_length = (int) \mb_strlen($pad_string);
7023
7024 8
                        $diff = ($pad_length - $str_length);
7025
7026 8
                        $pre = (string) \mb_substr(
7027 8
                            \str_repeat($pad_string, (int) \ceil($diff / $ps_length)),
7028 8
                            0,
7029 8
                            $diff
7030
                        );
7031 8
                        $post = '';
7032
7033 8
                        break;
7034
7035 20
                    case \STR_PAD_BOTH:
7036 14
                        $diff = ($pad_length - $str_length);
7037
7038 14
                        $ps_length_left = (int) \floor($diff / 2);
7039
7040 14
                        $ps_length_right = (int) \ceil($diff / 2);
7041
7042 14
                        $pre = (string) \mb_substr(
7043 14
                            \str_repeat($pad_string, $ps_length_left),
7044 14
                            0,
7045 14
                            $ps_length_left
7046
                        );
7047 14
                        $post = (string) \mb_substr(
7048 14
                            \str_repeat($pad_string, $ps_length_right),
7049 14
                            0,
7050 14
                            $ps_length_right
7051
                        );
7052
7053 14
                        break;
7054
7055 9
                    case \STR_PAD_RIGHT:
7056
                    default:
7057 9
                        $ps_length = (int) \mb_strlen($pad_string);
7058
7059 9
                        $diff = ($pad_length - $str_length);
7060
7061 9
                        $post = (string) \mb_substr(
7062 9
                            \str_repeat($pad_string, (int) \ceil($diff / $ps_length)),
7063 9
                            0,
7064 9
                            $diff
7065
                        );
7066 9
                        $pre = '';
7067
                }
7068
7069 25
                return $pre . $str . $post;
7070
            }
7071
7072 3
            return $str;
7073
        }
7074
7075 15
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
7076
7077 15
        $str_length = (int) self::strlen($str, $encoding);
7078
7079 15
        if ($pad_length >= $str_length) {
7080
            switch ($pad_type) {
7081 14
                case \STR_PAD_LEFT:
7082 5
                    $ps_length = (int) self::strlen($pad_string, $encoding);
7083
7084 5
                    $diff = ($pad_length - $str_length);
7085
7086 5
                    $pre = (string) self::substr(
7087 5
                        \str_repeat($pad_string, (int) \ceil($diff / $ps_length)),
7088 5
                        0,
7089 5
                        $diff,
7090 5
                        $encoding
7091
                    );
7092 5
                    $post = '';
7093
7094 5
                    break;
7095
7096 9
                case \STR_PAD_BOTH:
7097 3
                    $diff = ($pad_length - $str_length);
7098
7099 3
                    $ps_length_left = (int) \floor($diff / 2);
7100
7101 3
                    $ps_length_right = (int) \ceil($diff / 2);
7102
7103 3
                    $pre = (string) self::substr(
7104 3
                        \str_repeat($pad_string, $ps_length_left),
7105 3
                        0,
7106 3
                        $ps_length_left,
7107 3
                        $encoding
7108
                    );
7109 3
                    $post = (string) self::substr(
7110 3
                        \str_repeat($pad_string, $ps_length_right),
7111 3
                        0,
7112 3
                        $ps_length_right,
7113 3
                        $encoding
7114
                    );
7115
7116 3
                    break;
7117
7118 6
                case \STR_PAD_RIGHT:
7119
                default:
7120 6
                    $ps_length = (int) self::strlen($pad_string, $encoding);
7121
7122 6
                    $diff = ($pad_length - $str_length);
7123
7124 6
                    $post = (string) self::substr(
7125 6
                        \str_repeat($pad_string, (int) \ceil($diff / $ps_length)),
7126 6
                        0,
7127 6
                        $diff,
7128 6
                        $encoding
7129
                    );
7130 6
                    $pre = '';
7131
            }
7132
7133 14
            return $pre . $str . $post;
7134
        }
7135
7136 1
        return $str;
7137
    }
7138
7139
    /**
7140
     * Returns a new string of a given length such that both sides of the
7141
     * string are padded. Alias for "UTF8::str_pad()" with a $pad_type of 'both'.
7142
     *
7143
     * @param string $str
7144
     * @param int    $length   <p>Desired string length after padding.</p>
7145
     * @param string $pad_str  [optional] <p>String used to pad, defaults to space. Default: ' '</p>
7146
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
7147
     *
7148
     * @psalm-pure
7149
     *
7150
     * @return string
7151
     *                <p>The string with padding applied.</p>
7152
     */
7153
    public static function str_pad_both(
7154
        string $str,
7155
        int $length,
7156
        string $pad_str = ' ',
7157
        string $encoding = 'UTF-8'
7158
    ): string {
7159 11
        return self::str_pad(
7160 11
            $str,
7161 11
            $length,
7162 11
            $pad_str,
7163 11
            \STR_PAD_BOTH,
7164 11
            $encoding
7165
        );
7166
    }
7167
7168
    /**
7169
     * Returns a new string of a given length such that the beginning of the
7170
     * string is padded. Alias for "UTF8::str_pad()" with a $pad_type of 'left'.
7171
     *
7172
     * @param string $str
7173
     * @param int    $length   <p>Desired string length after padding.</p>
7174
     * @param string $pad_str  [optional] <p>String used to pad, defaults to space. Default: ' '</p>
7175
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
7176
     *
7177
     * @psalm-pure
7178
     *
7179
     * @return string
7180
     *                <p>The string with left padding.</p>
7181
     */
7182
    public static function str_pad_left(
7183
        string $str,
7184
        int $length,
7185
        string $pad_str = ' ',
7186
        string $encoding = 'UTF-8'
7187
    ): string {
7188 7
        return self::str_pad(
7189 7
            $str,
7190 7
            $length,
7191 7
            $pad_str,
7192 7
            \STR_PAD_LEFT,
7193 7
            $encoding
7194
        );
7195
    }
7196
7197
    /**
7198
     * Returns a new string of a given length such that the end of the string
7199
     * is padded. Alias for "UTF8::str_pad()" with a $pad_type of 'right'.
7200
     *
7201
     * @param string $str
7202
     * @param int    $length   <p>Desired string length after padding.</p>
7203
     * @param string $pad_str  [optional] <p>String used to pad, defaults to space. Default: ' '</p>
7204
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
7205
     *
7206
     * @psalm-pure
7207
     *
7208
     * @return string
7209
     *                <p>The string with right padding.</p>
7210
     */
7211
    public static function str_pad_right(
7212
        string $str,
7213
        int $length,
7214
        string $pad_str = ' ',
7215
        string $encoding = 'UTF-8'
7216
    ): string {
7217 7
        return self::str_pad(
7218 7
            $str,
7219 7
            $length,
7220 7
            $pad_str,
7221 7
            \STR_PAD_RIGHT,
7222 7
            $encoding
7223
        );
7224
    }
7225
7226
    /**
7227
     * Repeat a string.
7228
     *
7229
     * EXAMPLE: <code>UTF8::str_repeat("°~\xf0\x90\x28\xbc", 2); // '°~ð(¼°~ð(¼'</code>
7230
     *
7231
     * @param string $str        <p>
7232
     *                           The string to be repeated.
7233
     *                           </p>
7234
     * @param int    $multiplier <p>
7235
     *                           Number of time the input string should be
7236
     *                           repeated.
7237
     *                           </p>
7238
     *                           <p>
7239
     *                           multiplier has to be greater than or equal to 0.
7240
     *                           If the multiplier is set to 0, the function
7241
     *                           will return an empty string.
7242
     *                           </p>
7243
     *
7244
     * @psalm-pure
7245
     *
7246
     * @return string
7247
     *                <p>The repeated string.</p>
7248
     */
7249
    public static function str_repeat(string $str, int $multiplier): string
7250
    {
7251 9
        $str = self::filter($str);
7252
7253 9
        return \str_repeat($str, $multiplier);
7254
    }
7255
7256
    /**
7257
     * INFO: This is only a wrapper for "str_replace()"  -> the original functions is already UTF-8 safe.
7258
     *
7259
     * Replace all occurrences of the search string with the replacement string
7260
     *
7261
     * @see http://php.net/manual/en/function.str-replace.php
7262
     *
7263
     * @param string|string[] $search  <p>
7264
     *                                 The value being searched for, otherwise known as the needle.
7265
     *                                 An array may be used to designate multiple needles.
7266
     *                                 </p>
7267
     * @param string|string[] $replace <p>
7268
     *                                 The replacement value that replaces found search
7269
     *                                 values. An array may be used to designate multiple replacements.
7270
     *                                 </p>
7271
     * @param string|string[] $subject <p>
7272
     *                                 The string or array of strings being searched and replaced on,
7273
     *                                 otherwise known as the haystack.
7274
     *                                 </p>
7275
     *                                 <p>
7276
     *                                 If subject is an array, then the search and
7277
     *                                 replace is performed with every entry of
7278
     *                                 subject, and the return value is an array as
7279
     *                                 well.
7280
     *                                 </p>
7281
     * @param int|null        $count   [optional] <p>
7282
     *                                 If passed, this will hold the number of matched and replaced needles.
7283
     *                                 </p>
7284
     *
7285
     * @psalm-pure
7286
     *
7287
     * @return string|string[]
7288
     *                         <p>This function returns a string or an array with the replaced values.</p>
7289
     *
7290
     * @template TStrReplaceSubject
7291
     * @phpstan-param TStrReplaceSubject $subject
7292
     * @phpstan-return TStrReplaceSubject
7293
     *
7294
     * @deprecated please use \str_replace() instead
7295
     */
7296
    public static function str_replace(
7297
        $search,
7298
        $replace,
7299
        $subject,
7300
        int &$count = null
7301
    ) {
7302
        /**
7303
         * @psalm-suppress PossiblyNullArgument
7304
         * @phpstan-var TStrReplaceSubject $return;
7305
         */
7306 12
        $return = \str_replace(
7307 12
            $search,
7308 12
            $replace,
7309 12
            $subject,
7310 12
            $count
7311
        );
7312
7313 12
        return $return;
7314
    }
7315
7316
    /**
7317
     * Replaces $search from the beginning of string with $replacement.
7318
     *
7319
     * @param string $str         <p>The input string.</p>
7320
     * @param string $search      <p>The string to search for.</p>
7321
     * @param string $replacement <p>The replacement.</p>
7322
     *
7323
     * @psalm-pure
7324
     *
7325
     * @return string
7326
     *                <p>A string after the replacements.</p>
7327
     */
7328
    public static function str_replace_beginning(
7329
        string $str,
7330
        string $search,
7331
        string $replacement
7332
    ): string {
7333 17
        if ($str === '') {
7334 4
            if ($replacement === '') {
7335 2
                return '';
7336
            }
7337
7338 2
            if ($search === '') {
7339 2
                return $replacement;
7340
            }
7341
        }
7342
7343 13
        if ($search === '') {
7344 2
            return $str . $replacement;
7345
        }
7346
7347 11
        $searchLength = \strlen($search);
7348 11
        if (\strncmp($str, $search, $searchLength) === 0) {
7349 9
            return $replacement . \substr($str, $searchLength);
7350
        }
7351
7352 2
        return $str;
7353
    }
7354
7355
    /**
7356
     * Replaces $search from the ending of string with $replacement.
7357
     *
7358
     * @param string $str         <p>The input string.</p>
7359
     * @param string $search      <p>The string to search for.</p>
7360
     * @param string $replacement <p>The replacement.</p>
7361
     *
7362
     * @psalm-pure
7363
     *
7364
     * @return string
7365
     *                <p>A string after the replacements.</p>
7366
     */
7367
    public static function str_replace_ending(
7368
        string $str,
7369
        string $search,
7370
        string $replacement
7371
    ): string {
7372 17
        if ($str === '') {
7373 4
            if ($replacement === '') {
7374 2
                return '';
7375
            }
7376
7377 2
            if ($search === '') {
7378 2
                return $replacement;
7379
            }
7380
        }
7381
7382 13
        if ($search === '') {
7383 2
            return $str . $replacement;
7384
        }
7385
7386 11
        if (\strpos($str, $search, \strlen($str) - \strlen($search)) !== false) {
7387 8
            $str = \substr($str, 0, -\strlen($search)) . $replacement;
7388
        }
7389
7390 11
        return $str;
7391
    }
7392
7393
    /**
7394
     * Replace the first "$search"-term with the "$replace"-term.
7395
     *
7396
     * @param string $search
7397
     * @param string $replace
7398
     * @param string $subject
7399
     *
7400
     * @psalm-pure
7401
     *
7402
     * @return string
7403
     *
7404
     * @psalm-suppress InvalidReturnType
7405
     */
7406
    public static function str_replace_first(
7407
        string $search,
7408
        string $replace,
7409
        string $subject
7410
    ): string {
7411 2
        $pos = self::strpos($subject, $search);
7412
7413 2
        if ($pos !== false) {
7414
            /**
7415
             * @psalm-suppress InvalidReturnStatement
7416
             */
7417 2
            return self::substr_replace(
7418 2
                $subject,
7419 2
                $replace,
7420 2
                $pos,
7421 2
                (int) self::strlen($search)
7422
            );
7423
        }
7424
7425 2
        return $subject;
7426
    }
7427
7428
    /**
7429
     * Replace the last "$search"-term with the "$replace"-term.
7430
     *
7431
     * @param string $search
7432
     * @param string $replace
7433
     * @param string $subject
7434
     *
7435
     * @psalm-pure
7436
     *
7437
     * @return string
7438
     *
7439
     * @psalm-suppress InvalidReturnType
7440
     */
7441
    public static function str_replace_last(
7442
        string $search,
7443
        string $replace,
7444
        string $subject
7445
    ): string {
7446 2
        $pos = self::strrpos($subject, $search);
7447 2
        if ($pos !== false) {
7448
            /**
7449
             * @psalm-suppress InvalidReturnStatement
7450
             */
7451 2
            return self::substr_replace(
7452 2
                $subject,
7453 2
                $replace,
7454 2
                $pos,
7455 2
                (int) self::strlen($search)
7456
            );
7457
        }
7458
7459 2
        return $subject;
7460
    }
7461
7462
    /**
7463
     * Shuffles all the characters in the string.
7464
     *
7465
     * INFO: uses random algorithm which is weak for cryptography purposes
7466
     *
7467
     * EXAMPLE: <code>UTF8::str_shuffle('fòô bàř fòô'); // 'àòôřb ffòô '</code>
7468
     *
7469
     * @param string $str      <p>The input string</p>
7470
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
7471
     *
7472
     * @return string
7473
     *                <p>The shuffled string.</p>
7474
     */
7475
    public static function str_shuffle(string $str, string $encoding = 'UTF-8'): string
7476
    {
7477 5
        if ($encoding === 'UTF-8') {
7478 5
            $indexes = \range(0, (int) \mb_strlen($str) - 1);
7479 5
            \shuffle($indexes);
7480
7481
            // init
7482 5
            $shuffled_str = '';
7483
7484 5
            foreach ($indexes as &$i) {
7485 5
                $tmp_sub_str = \mb_substr($str, $i, 1);
7486 5
                if ($tmp_sub_str !== false) {
7487 5
                    $shuffled_str .= $tmp_sub_str;
7488
                }
7489
            }
7490
        } else {
7491
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
7492
7493
            $indexes = \range(0, (int) self::strlen($str, $encoding) - 1);
7494
            \shuffle($indexes);
7495
7496
            // init
7497
            $shuffled_str = '';
7498
7499
            foreach ($indexes as &$i) {
7500
                $tmp_sub_str = self::substr($str, $i, 1, $encoding);
7501
                if ($tmp_sub_str !== false) {
7502
                    $shuffled_str .= $tmp_sub_str;
7503
                }
7504
            }
7505
        }
7506
7507 5
        return $shuffled_str;
7508
    }
7509
7510
    /**
7511
     * Returns the substring beginning at $start, and up to, but not including
7512
     * the index specified by $end. If $end is omitted, the function extracts
7513
     * the remaining string. If $end is negative, it is computed from the end
7514
     * of the string.
7515
     *
7516
     * @param string   $str
7517
     * @param int      $start    <p>Initial index from which to begin extraction.</p>
7518
     * @param int|null $end      [optional] <p>Index at which to end extraction. Default: null</p>
7519
     * @param string   $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
7520
     *
7521
     * @psalm-pure
7522
     *
7523
     * @return false|string
7524
     *                      <p>The extracted substring.</p><p>If <i>str</i> is shorter than <i>start</i>
7525
     *                      characters long, <b>FALSE</b> will be returned.
7526
     */
7527
    public static function str_slice(
7528
        string $str,
7529
        int $start,
7530
        int $end = null,
7531
        string $encoding = 'UTF-8'
7532
    ) {
7533 18
        if ($encoding === 'UTF-8') {
7534 7
            if ($end === null) {
7535 1
                $length = (int) \mb_strlen($str);
7536 6
            } elseif ($end >= 0 && $end <= $start) {
7537 2
                return '';
7538 4
            } elseif ($end < 0) {
7539 1
                $length = (int) \mb_strlen($str) + $end - $start;
7540
            } else {
7541 3
                $length = $end - $start;
7542
            }
7543
7544 5
            return \mb_substr($str, $start, $length);
7545
        }
7546
7547 11
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
7548
7549 11
        if ($end === null) {
7550 5
            $length = (int) self::strlen($str, $encoding);
7551 6
        } elseif ($end >= 0 && $end <= $start) {
7552 2
            return '';
7553 4
        } elseif ($end < 0) {
7554 1
            $length = (int) self::strlen($str, $encoding) + $end - $start;
7555
        } else {
7556 3
            $length = $end - $start;
7557
        }
7558
7559 9
        return self::substr($str, $start, $length, $encoding);
7560
    }
7561
7562
    /**
7563
     * Convert a string to e.g.: "snake_case"
7564
     *
7565
     * @param string $str
7566
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
7567
     *
7568
     * @psalm-pure
7569
     *
7570
     * @return string
7571
     *                <p>A string in snake_case.</p>
7572
     */
7573
    public static function str_snakeize(string $str, string $encoding = 'UTF-8'): string
7574
    {
7575 22
        if ($str === '') {
7576
            return '';
7577
        }
7578
7579 22
        $str = \str_replace(
7580 22
            '-',
7581 22
            '_',
7582 22
            self::normalize_whitespace($str)
7583
        );
7584
7585 22
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
7586 19
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
7587
        }
7588
7589 22
        $str = (string) \preg_replace_callback(
7590 22
            '/([\\p{N}|\\p{Lu}])/u',
7591
            /**
7592
             * @param string[] $matches
7593
             *
7594
             * @psalm-pure
7595
             *
7596
             * @return string
7597
             */
7598
            static function (array $matches) use ($encoding): string {
7599 9
                $match = $matches[1];
7600 9
                $match_int = (int) $match;
7601
7602 9
                if ((string) $match_int === $match) {
7603 4
                    return '_' . $match . '_';
7604
                }
7605
7606 5
                if ($encoding === 'UTF-8') {
7607 5
                    return '_' . \mb_strtolower($match);
7608
                }
7609
7610
                return '_' . self::strtolower($match, $encoding);
7611 22
            },
7612 22
            $str
7613
        );
7614
7615 22
        $str = (string) \preg_replace(
7616
            [
7617 22
                '/\\s+/u',           // convert spaces to "_"
7618
                '/^\\s+|\\s+$/u', // trim leading & trailing spaces
7619
                '/_+/',                 // remove double "_"
7620
            ],
7621
            [
7622 22
                '_',
7623
                '',
7624
                '_',
7625
            ],
7626 22
            $str
7627
        );
7628
7629 22
        return \trim(\trim($str, '_')); // trim leading & trailing "_" + whitespace
7630
    }
7631
7632
    /**
7633
     * Sort all characters according to code points.
7634
     *
7635
     * EXAMPLE: <code>UTF8::str_sort('  -ABC-中文空白-  '); // '    ---ABC中文白空'</code>
7636
     *
7637
     * @param string $str    <p>A UTF-8 string.</p>
7638
     * @param bool   $unique <p>Sort unique. If <strong>true</strong>, repeated characters are ignored.</p>
7639
     * @param bool   $desc   <p>If <strong>true</strong>, will sort characters in reverse code point order.</p>
7640
     *
7641
     * @psalm-pure
7642
     *
7643
     * @return string
7644
     *                <p>A string of sorted characters.</p>
7645
     */
7646
    public static function str_sort(string $str, bool $unique = false, bool $desc = false): string
7647
    {
7648
        /** @var int[] $array */
7649 2
        $array = self::codepoints($str);
7650
7651 2
        if ($unique) {
7652 2
            $array = \array_flip(\array_flip($array));
7653
        }
7654
7655 2
        if ($desc) {
7656 2
            \arsort($array);
7657
        } else {
7658 2
            \asort($array);
7659
        }
7660
7661 2
        return self::string($array);
7662
    }
7663
7664
    /**
7665
     * Convert a string to an array of Unicode characters.
7666
     *
7667
     * EXAMPLE: <code>
7668
     * UTF8::str_split_array(['中文空白', 'test'], 2); // [['中文', '空白'], ['te', 'st']]
7669
     * </code>
7670
     *
7671
     * @param int[]|string[] $input                   <p>The string[] or int[] to split into array.</p>
7672
     * @param int            $length                  [optional] <p>Max character length of each array
7673
     *                                                lement.</p>
7674
     * @param bool           $clean_utf8              [optional] <p>Remove non UTF-8 chars from the
7675
     *                                                string.</p>
7676
     * @param bool           $try_to_use_mb_functions [optional] <p>Set to false, if you don't want to use
7677
     *                                                "mb_substr"</p>
7678
     *
7679
     * @psalm-pure
7680
     *
7681
     * @return string[][]
7682
     *                    <p>An array containing chunks of the input.</p>
7683
     */
7684
    public static function str_split_array(
7685
        array $input,
7686
        int $length = 1,
7687
        bool $clean_utf8 = false,
7688
        bool $try_to_use_mb_functions = true
7689
    ): array {
7690 1
        foreach ($input as &$v) {
7691 1
            $v = self::str_split(
7692 1
                $v,
7693 1
                $length,
7694 1
                $clean_utf8,
7695 1
                $try_to_use_mb_functions
7696
            );
7697
        }
7698
7699
        /** @var string[][] $input */
7700 1
        return $input;
7701
    }
7702
7703
    /**
7704
     * Convert a string to an array of unicode characters.
7705
     *
7706
     * EXAMPLE: <code>UTF8::str_split('中文空白'); // array('中', '文', '空', '白')</code>
7707
     *
7708
     * @param int|string $input                   <p>The string or int to split into array.</p>
7709
     * @param int        $length                  [optional] <p>Max character length of each array
7710
     *                                            element.</p>
7711
     * @param bool       $clean_utf8              [optional] <p>Remove non UTF-8 chars from the
7712
     *                                            string.</p>
7713
     * @param bool       $try_to_use_mb_functions [optional] <p>Set to false, if you don't want to use
7714
     *                                            "mb_substr"</p>
7715
     *
7716
     * @psalm-pure
7717
     *
7718
     * @return string[]
7719
     *                  <p>An array containing chunks of chars from the input.</p>
7720
     */
7721
    public static function str_split(
7722
        $input,
7723
        int $length = 1,
7724
        bool $clean_utf8 = false,
7725
        bool $try_to_use_mb_functions = true
7726
    ): array {
7727 90
        if ($length <= 0) {
7728 3
            return [];
7729
        }
7730
7731
        // this is only an old fallback
7732
        /** @noinspection PhpSillyAssignmentInspection - hack for phpstan */
7733
        /** @var int|int[]|string|string[] $input */
7734 89
        $input = $input;
7735 89
        if (\is_array($input)) {
7736
            /** @psalm-suppress InvalidReturnStatement */
7737
            /** @phpstan-ignore-next-line - old code :/ */
7738
            return self::str_split_array(
0 ignored issues
show
Bug Best Practice introduced by
The expression return self::str_split_a...ry_to_use_mb_functions) returns the type array<mixed,string[]> which is incompatible with the documented return type string[].
Loading history...
7739
                $input,
7740
                $length,
7741
                $clean_utf8,
7742
                $try_to_use_mb_functions
7743
            );
7744
        }
7745
7746
        // init
7747 89
        $input = (string) $input;
7748
7749 89
        if ($input === '') {
7750 14
            return [];
7751
        }
7752
7753 86
        if ($clean_utf8) {
7754 19
            $input = self::clean($input);
7755
        }
7756
7757
        if (
7758 86
            $try_to_use_mb_functions
7759
            &&
7760 86
            self::$SUPPORT['mbstring'] === true
7761
        ) {
7762 82
            if (\function_exists('mb_str_split')) {
7763
                /**
7764
                 * @psalm-suppress ImpureFunctionCall - why?
7765
                 */
7766 82
                $return = \mb_str_split($input, $length);
7767 82
                if ($return !== false) {
7768 82
                    return $return;
7769
                }
7770
            }
7771
7772
            $i_max = \mb_strlen($input);
7773
            if ($i_max <= 127) {
7774
                $ret = [];
7775
                for ($i = 0; $i < $i_max; ++$i) {
7776
                    $ret[] = \mb_substr($input, $i, 1);
7777
                }
7778
            } else {
7779
                $return_array = [];
7780
                \preg_match_all('/./us', $input, $return_array);
7781
                $ret = $return_array[0] ?? [];
7782
            }
7783 23
        } elseif (self::$SUPPORT['pcre_utf8'] === true) {
7784 17
            $return_array = [];
7785 17
            \preg_match_all('/./us', $input, $return_array);
7786 17
            $ret = $return_array[0] ?? [];
7787
        } else {
7788
7789
            // fallback
7790
7791 8
            $ret = [];
7792 8
            $len = \strlen($input);
7793
7794 8
            for ($i = 0; $i < $len; ++$i) {
7795 8
                if (($input[$i] & "\x80") === "\x00") {
7796 8
                    $ret[] = $input[$i];
7797
                } elseif (
7798 8
                    isset($input[$i + 1])
7799
                    &&
7800 8
                    ($input[$i] & "\xE0") === "\xC0"
7801
                ) {
7802 4
                    if (($input[$i + 1] & "\xC0") === "\x80") {
7803 4
                        $ret[] = $input[$i] . $input[$i + 1];
7804
7805 4
                        ++$i;
7806
                    }
7807
                } elseif (
7808 6
                    isset($input[$i + 2])
7809
                    &&
7810 6
                    ($input[$i] & "\xF0") === "\xE0"
7811
                ) {
7812
                    if (
7813 6
                        ($input[$i + 1] & "\xC0") === "\x80"
7814
                        &&
7815 6
                        ($input[$i + 2] & "\xC0") === "\x80"
7816
                    ) {
7817 6
                        $ret[] = $input[$i] . $input[$i + 1] . $input[$i + 2];
7818
7819 6
                        $i += 2;
7820
                    }
7821
                } elseif (
7822
                    isset($input[$i + 3])
7823
                    &&
7824
                    ($input[$i] & "\xF8") === "\xF0"
7825
                ) {
7826
                    if (
7827
                        ($input[$i + 1] & "\xC0") === "\x80"
7828
                        &&
7829
                        ($input[$i + 2] & "\xC0") === "\x80"
7830
                        &&
7831
                        ($input[$i + 3] & "\xC0") === "\x80"
7832
                    ) {
7833
                        $ret[] = $input[$i] . $input[$i + 1] . $input[$i + 2] . $input[$i + 3];
7834
7835
                        $i += 3;
7836
                    }
7837
                }
7838
            }
7839
        }
7840
7841 23
        if ($length > 1) {
7842 2
            return \array_map(
7843
                static function (array $item): string {
7844 2
                    return \implode('', $item);
7845 2
                },
7846 2
                \array_chunk($ret, $length)
7847
            );
7848
        }
7849
7850 23
        if (isset($ret[0]) && $ret[0] === '') {
7851
            return [];
7852
        }
7853
7854 23
        return $ret;
7855
    }
7856
7857
    /**
7858
     * Splits the string with the provided regular expression, returning an
7859
     * array of strings. An optional integer $limit will truncate the
7860
     * results.
7861
     *
7862
     * @param string $str
7863
     * @param string $pattern <p>The regex with which to split the string.</p>
7864
     * @param int    $limit   [optional] <p>Maximum number of results to return. Default: -1 === no limit</p>
7865
     *
7866
     * @psalm-pure
7867
     *
7868
     * @return string[]
7869
     *                  <p>An array of strings.</p>
7870
     */
7871
    public static function str_split_pattern(string $str, string $pattern, int $limit = -1): array
7872
    {
7873 16
        if ($limit === 0) {
7874 2
            return [];
7875
        }
7876
7877 14
        if ($pattern === '') {
7878 1
            return [$str];
7879
        }
7880
7881 13
        if (self::$SUPPORT['mbstring'] === true) {
7882 13
            if ($limit >= 0) {
7883 8
                $result_tmp = \mb_split($pattern, $str);
7884 8
                if ($result_tmp === false) {
7885
                    return [];
7886
                }
7887
7888 8
                $result = [];
7889 8
                foreach ($result_tmp as $item_tmp) {
7890 8
                    if ($limit === 0) {
7891 4
                        break;
7892
                    }
7893 8
                    --$limit;
7894
7895 8
                    $result[] = $item_tmp;
7896
                }
7897
7898 8
                return $result;
7899
            }
7900
7901 5
            $result = \mb_split($pattern, $str);
7902 5
            if ($result === false) {
7903
                return [];
7904
            }
7905
7906 5
            return $result;
7907
        }
7908
7909
        if ($limit > 0) {
7910
            ++$limit;
7911
        } else {
7912
            $limit = -1;
7913
        }
7914
7915
        $array = \preg_split('/' . \preg_quote($pattern, '/') . '/u', $str, $limit);
7916
        if ($array === false) {
7917
            return [];
7918
        }
7919
7920
        if ($limit > 0 && \count($array) === $limit) {
7921
            \array_pop($array);
7922
        }
7923
7924
        return $array;
7925
    }
7926
7927
    /**
7928
     * Check if the string starts with the given substring.
7929
     *
7930
     * EXAMPLE: <code>
7931
     * UTF8::str_starts_with('ΚόσμεMiddleEnd', 'Κόσμε'); // true
7932
     * UTF8::str_starts_with('ΚόσμεMiddleEnd', 'κόσμε'); // false
7933
     * </code>
7934
     *
7935
     * @param string $haystack <p>The string to search in.</p>
7936
     * @param string $needle   <p>The substring to search for.</p>
7937
     *
7938
     * @psalm-pure
7939
     *
7940
     * @return bool
7941
     */
7942
    public static function str_starts_with(string $haystack, string $needle): bool
7943
    {
7944 19
        if ($needle === '') {
7945 2
            return true;
7946
        }
7947
7948 19
        if ($haystack === '') {
7949
            return false;
7950
        }
7951
7952 19
        if (\PHP_VERSION_ID >= 80000) {
7953
            /** @phpstan-ignore-next-line - only for PHP8 */
7954
            return \str_starts_with($haystack, $needle);
7955
        }
7956
7957 19
        return \strncmp($haystack, $needle, \strlen($needle)) === 0;
7958
    }
7959
7960
    /**
7961
     * Returns true if the string begins with any of $substrings, false otherwise.
7962
     *
7963
     * - case-sensitive
7964
     *
7965
     * @param string $str        <p>The input string.</p>
7966
     * @param array  $substrings <p>Substrings to look for.</p>
7967
     *
7968
     * @psalm-pure
7969
     *
7970
     * @return bool
7971
     *              <p>Whether or not $str starts with $substring.</p>
7972
     */
7973
    public static function str_starts_with_any(string $str, array $substrings): bool
7974
    {
7975 8
        if ($str === '') {
7976
            return false;
7977
        }
7978
7979 8
        if ($substrings === []) {
7980
            return false;
7981
        }
7982
7983 8
        foreach ($substrings as &$substring) {
7984 8
            if (self::str_starts_with($str, $substring)) {
7985 8
                return true;
7986
            }
7987
        }
7988
7989 6
        return false;
7990
    }
7991
7992
    /**
7993
     * Gets the substring after the first occurrence of a separator.
7994
     *
7995
     * @param string $str       <p>The input string.</p>
7996
     * @param string $separator <p>The string separator.</p>
7997
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
7998
     *
7999
     * @psalm-pure
8000
     *
8001
     * @return string
8002
     */
8003
    public static function str_substr_after_first_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
8004
    {
8005 1
        if ($separator === '' || $str === '') {
8006 1
            return '';
8007
        }
8008
8009 1
        if ($encoding === 'UTF-8') {
8010 1
            $offset = \mb_strpos($str, $separator);
8011 1
            if ($offset === false) {
8012 1
                return '';
8013
            }
8014
8015 1
            return (string) \mb_substr(
8016 1
                $str,
8017 1
                $offset + (int) \mb_strlen($separator)
8018
            );
8019
        }
8020
8021
        $offset = self::strpos($str, $separator, 0, $encoding);
8022
        if ($offset === false) {
8023
            return '';
8024
        }
8025
8026
        return (string) \mb_substr(
8027
            $str,
8028
            $offset + (int) self::strlen($separator, $encoding),
8029
            null,
8030
            $encoding
8031
        );
8032
    }
8033
8034
    /**
8035
     * Gets the substring after the last occurrence of a separator.
8036
     *
8037
     * @param string $str       <p>The input string.</p>
8038
     * @param string $separator <p>The string separator.</p>
8039
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
8040
     *
8041
     * @psalm-pure
8042
     *
8043
     * @return string
8044
     */
8045
    public static function str_substr_after_last_separator(
8046
        string $str,
8047
        string $separator,
8048
        string $encoding = 'UTF-8'
8049
    ): string {
8050 1
        if ($separator === '' || $str === '') {
8051 1
            return '';
8052
        }
8053
8054 1
        if ($encoding === 'UTF-8') {
8055 1
            $offset = \mb_strrpos($str, $separator);
8056 1
            if ($offset === false) {
8057 1
                return '';
8058
            }
8059
8060 1
            return (string) \mb_substr(
8061 1
                $str,
8062 1
                $offset + (int) \mb_strlen($separator)
8063
            );
8064
        }
8065
8066
        $offset = self::strrpos($str, $separator, 0, $encoding);
8067
        if ($offset === false) {
8068
            return '';
8069
        }
8070
8071
        return (string) self::substr(
8072
            $str,
8073
            $offset + (int) self::strlen($separator, $encoding),
8074
            null,
8075
            $encoding
8076
        );
8077
    }
8078
8079
    /**
8080
     * Gets the substring before the first occurrence of a separator.
8081
     *
8082
     * @param string $str       <p>The input string.</p>
8083
     * @param string $separator <p>The string separator.</p>
8084
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
8085
     *
8086
     * @psalm-pure
8087
     *
8088
     * @return string
8089
     */
8090
    public static function str_substr_before_first_separator(
8091
        string $str,
8092
        string $separator,
8093
        string $encoding = 'UTF-8'
8094
    ): string {
8095 1
        if ($separator === '' || $str === '') {
8096 1
            return '';
8097
        }
8098
8099 1
        if ($encoding === 'UTF-8') {
8100 1
            $offset = \mb_strpos($str, $separator);
8101 1
            if ($offset === false) {
8102 1
                return '';
8103
            }
8104
8105 1
            return (string) \mb_substr(
8106 1
                $str,
8107 1
                0,
8108 1
                $offset
8109
            );
8110
        }
8111
8112
        $offset = self::strpos($str, $separator, 0, $encoding);
8113
        if ($offset === false) {
8114
            return '';
8115
        }
8116
8117
        return (string) self::substr(
8118
            $str,
8119
            0,
8120
            $offset,
8121
            $encoding
8122
        );
8123
    }
8124
8125
    /**
8126
     * Gets the substring before the last occurrence of a separator.
8127
     *
8128
     * @param string $str       <p>The input string.</p>
8129
     * @param string $separator <p>The string separator.</p>
8130
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
8131
     *
8132
     * @psalm-pure
8133
     *
8134
     * @return string
8135
     */
8136
    public static function str_substr_before_last_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
8137
    {
8138 1
        if ($separator === '' || $str === '') {
8139 1
            return '';
8140
        }
8141
8142 1
        if ($encoding === 'UTF-8') {
8143 1
            $offset = \mb_strrpos($str, $separator);
8144 1
            if ($offset === false) {
8145 1
                return '';
8146
            }
8147
8148 1
            return (string) \mb_substr(
8149 1
                $str,
8150 1
                0,
8151 1
                $offset
8152
            );
8153
        }
8154
8155
        $offset = self::strrpos($str, $separator, 0, $encoding);
8156
        if ($offset === false) {
8157
            return '';
8158
        }
8159
8160
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
8161
8162
        return (string) self::substr(
8163
            $str,
8164
            0,
8165
            $offset,
8166
            $encoding
8167
        );
8168
    }
8169
8170
    /**
8171
     * Gets the substring after (or before via "$before_needle") the first occurrence of the "$needle".
8172
     *
8173
     * @param string $str           <p>The input string.</p>
8174
     * @param string $needle        <p>The string to look for.</p>
8175
     * @param bool   $before_needle [optional] <p>Default: false</p>
8176
     * @param string $encoding      [optional] <p>Default: 'UTF-8'</p>
8177
     *
8178
     * @psalm-pure
8179
     *
8180
     * @return string
8181
     */
8182
    public static function str_substr_first(
8183
        string $str,
8184
        string $needle,
8185
        bool $before_needle = false,
8186
        string $encoding = 'UTF-8'
8187
    ): string {
8188 2
        if ($str === '' || $needle === '') {
8189 2
            return '';
8190
        }
8191
8192 2
        if ($encoding === 'UTF-8') {
8193 2
            if ($before_needle) {
8194 1
                $part = \mb_strstr(
8195 1
                    $str,
8196 1
                    $needle,
8197 1
                    $before_needle
8198
                );
8199
            } else {
8200 1
                $part = \mb_strstr(
8201 1
                    $str,
8202 2
                    $needle
8203
                );
8204
            }
8205
        } else {
8206
            $part = self::strstr(
8207
                $str,
8208
                $needle,
8209
                $before_needle,
8210
                $encoding
8211
            );
8212
        }
8213
8214 2
        return $part === false ? '' : $part;
8215
    }
8216
8217
    /**
8218
     * Gets the substring after (or before via "$before_needle") the last occurrence of the "$needle".
8219
     *
8220
     * @param string $str           <p>The input string.</p>
8221
     * @param string $needle        <p>The string to look for.</p>
8222
     * @param bool   $before_needle [optional] <p>Default: false</p>
8223
     * @param string $encoding      [optional] <p>Default: 'UTF-8'</p>
8224
     *
8225
     * @psalm-pure
8226
     *
8227
     * @return string
8228
     */
8229
    public static function str_substr_last(
8230
        string $str,
8231
        string $needle,
8232
        bool $before_needle = false,
8233
        string $encoding = 'UTF-8'
8234
    ): string {
8235 2
        if ($str === '' || $needle === '') {
8236 2
            return '';
8237
        }
8238
8239 2
        if ($encoding === 'UTF-8') {
8240 2
            if ($before_needle) {
8241 1
                $part = \mb_strrchr(
8242 1
                    $str,
8243 1
                    $needle,
8244 1
                    $before_needle
8245
                );
8246
            } else {
8247 1
                $part = \mb_strrchr(
8248 1
                    $str,
8249 2
                    $needle
8250
                );
8251
            }
8252
        } else {
8253
            $part = self::strrchr(
8254
                $str,
8255
                $needle,
8256
                $before_needle,
8257
                $encoding
8258
            );
8259
        }
8260
8261 2
        return $part === false ? '' : $part;
8262
    }
8263
8264
    /**
8265
     * Surrounds $str with the given substring.
8266
     *
8267
     * @param string $str
8268
     * @param string $substring <p>The substring to add to both sides.</p>
8269
     *
8270
     * @psalm-pure
8271
     *
8272
     * @return string
8273
     *                <p>A string with the substring both prepended and appended.</p>
8274
     */
8275
    public static function str_surround(string $str, string $substring): string
8276
    {
8277 5
        return $substring . $str . $substring;
8278
    }
8279
8280
    /**
8281
     * Returns a trimmed string with the first letter of each word capitalized.
8282
     * Also accepts an array, $ignore, allowing you to list words not to be
8283
     * capitalized.
8284
     *
8285
     * @param string              $str
8286
     * @param array|string[]|null $ignore                        [optional] <p>An array of words not to capitalize or
8287
     *                                                           null. Default: null</p>
8288
     * @param string              $encoding                      [optional] <p>Default: 'UTF-8'</p>
8289
     * @param bool                $clean_utf8                    [optional] <p>Remove non UTF-8 chars from the
8290
     *                                                           string.</p>
8291
     * @param string|null         $lang                          [optional] <p>Set the language for special cases: az,
8292
     *                                                           el, lt, tr</p>
8293
     * @param bool                $try_to_keep_the_string_length [optional] <p>true === try to keep the string length:
8294
     *                                                           e.g. ẞ -> ß</p>
8295
     * @param bool                $use_trim_first                [optional] <p>true === trim the input string,
8296
     *                                                           first</p>
8297
     * @param string|null         $word_define_chars             [optional] <p>An string of chars that will be used as
8298
     *                                                           whitespace separator === words.</p>
8299
     *
8300
     * @psalm-pure
8301
     *
8302
     * @return string
8303
     *                <p>The titleized string.</p>
8304
     */
8305
    public static function str_titleize(
8306
        string $str,
8307
        array $ignore = null,
8308
        string $encoding = 'UTF-8',
8309
        bool $clean_utf8 = false,
8310
        string $lang = null,
8311
        bool $try_to_keep_the_string_length = false,
8312
        bool $use_trim_first = true,
8313
        string $word_define_chars = null
8314
    ): string {
8315 10
        if ($str === '') {
8316
            return '';
8317
        }
8318
8319 10
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
8320 9
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
8321
        }
8322
8323 10
        if ($use_trim_first) {
8324 10
            $str = \trim($str);
8325
        }
8326
8327 10
        if ($clean_utf8) {
8328
            $str = self::clean($str);
8329
        }
8330
8331 10
        $use_mb_functions = $lang === null && !$try_to_keep_the_string_length;
8332
8333 10
        if ($word_define_chars) {
8334 4
            $word_define_chars = \preg_quote($word_define_chars, '/');
8335
        } else {
8336 6
            $word_define_chars = '';
8337
        }
8338
8339 10
        $str = (string) \preg_replace_callback(
8340 10
            '/([^\\s' . $word_define_chars . ']+)/u',
8341
            static function (array $match) use ($try_to_keep_the_string_length, $lang, $ignore, $use_mb_functions, $encoding): string {
8342 10
                if ($ignore !== null && \in_array($match[0], $ignore, true)) {
8343 4
                    return $match[0];
8344
                }
8345
8346 10
                if ($use_mb_functions) {
8347 10
                    if ($encoding === 'UTF-8') {
8348 10
                        return \mb_strtoupper(\mb_substr($match[0], 0, 1))
8349 10
                               . \mb_strtolower(\mb_substr($match[0], 1));
8350
                    }
8351
8352
                    return \mb_strtoupper(\mb_substr($match[0], 0, 1, $encoding), $encoding)
8353
                           . \mb_strtolower(\mb_substr($match[0], 1, null, $encoding), $encoding);
8354
                }
8355
8356
                return self::ucfirst(
8357
                    self::strtolower(
8358
                        $match[0],
8359
                        $encoding,
8360
                        false,
8361
                        $lang,
8362
                        $try_to_keep_the_string_length
8363
                    ),
8364
                    $encoding,
8365
                    false,
8366
                    $lang,
8367
                    $try_to_keep_the_string_length
8368
                );
8369 10
            },
8370 10
            $str
8371
        );
8372
8373 10
        return $str;
8374
    }
8375
8376
    /**
8377
     * Convert a string into a obfuscate string.
8378
     *
8379
     * EXAMPLE: <code>
8380
     *
8381
     * UTF8::str_obfuscate('[email protected]', 0.5, '*', ['@', '.']); // e.g. "l***@m**lleke*.*r*"
8382
     * </code>
8383
     *
8384
     * @param string   $str
8385
     * @param float    $percent
8386
     * @param string   $obfuscateChar
8387
     * @param string[] $keepChars
8388
     *
8389
     * @psalm-pure
8390
     *
8391
     * @return string
8392
     *                <p>The obfuscate string.</p>
8393
     */
8394
    public static function str_obfuscate(
8395
        string $str,
8396
        float $percent = 0.5,
8397
        string $obfuscateChar = '*',
8398
        array $keepChars = []
8399
    ): string {
8400 1
        $obfuscateCharHelper = "\u{2603}";
8401 1
        $str = \str_replace($obfuscateChar, $obfuscateCharHelper, $str);
8402
8403 1
        $chars = self::chars($str);
8404 1
        $charsMax = \count($chars);
8405 1
        $charsMaxChange = \round($charsMax * $percent);
8406 1
        $charsCounter = 0;
8407 1
        $charKeyDone = [];
8408
8409 1
        while ($charsCounter < $charsMaxChange) {
8410 1
            foreach ($chars as $charKey => $char) {
8411 1
                if (isset($charKeyDone[$charKey])) {
8412 1
                    continue;
8413
                }
8414
8415 1
                if (\random_int(0, 100) > 50) {
8416 1
                    continue;
8417
                }
8418
8419 1
                if ($char === $obfuscateChar) {
8420
                    continue;
8421
                }
8422
8423 1
                ++$charsCounter;
8424 1
                $charKeyDone[$charKey] = true;
8425
8426 1
                if ($charsCounter > $charsMaxChange) {
8427
                    break;
8428
                }
8429
8430 1
                if (\in_array($char, $keepChars, true)) {
8431 1
                    continue;
8432
                }
8433
8434 1
                $chars[$charKey] = $obfuscateChar;
8435
            }
8436
        }
8437
8438 1
        $str = \implode('', $chars);
8439
8440 1
        return \str_replace($obfuscateCharHelper, $obfuscateChar, $str);
8441
    }
8442
8443
    /**
8444
     * Returns a trimmed string in proper title case.
8445
     *
8446
     * Also accepts an array, $ignore, allowing you to list words not to be
8447
     * capitalized.
8448
     *
8449
     * Adapted from John Gruber's script.
8450
     *
8451
     * @see https://gist.github.com/gruber/9f9e8650d68b13ce4d78
8452
     *
8453
     * @param string $str
8454
     * @param array  $ignore   <p>An array of words not to capitalize.</p>
8455
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
8456
     *
8457
     * @psalm-pure
8458
     *
8459
     * @return string
8460
     *                <p>The titleized string.</p>
8461
     */
8462
    public static function str_titleize_for_humans(
8463
        string $str,
8464
        array $ignore = [],
8465
        string $encoding = 'UTF-8'
8466
    ): string {
8467 35
        if ($str === '') {
8468
            return '';
8469
        }
8470
8471
        $small_words = [
8472 35
            '(?<!q&)a',
8473
            'an',
8474
            'and',
8475
            'as',
8476
            'at(?!&t)',
8477
            'but',
8478
            'by',
8479
            'en',
8480
            'for',
8481
            'if',
8482
            'in',
8483
            'of',
8484
            'on',
8485
            'or',
8486
            'the',
8487
            'to',
8488
            'v[.]?',
8489
            'via',
8490
            'vs[.]?',
8491
        ];
8492
8493 35
        if ($ignore !== []) {
8494 1
            $small_words = \array_merge($small_words, $ignore);
8495
        }
8496
8497 35
        $small_words_rx = \implode('|', $small_words);
8498 35
        $apostrophe_rx = '(?x: [\'’] [[:lower:]]* )?';
8499
8500 35
        $str = \trim($str);
8501
8502 35
        if (!self::has_lowercase($str)) {
8503 2
            $str = self::strtolower($str, $encoding);
8504
        }
8505
8506
        // the main substitutions
8507 35
        $str = (string) \preg_replace_callback(
8508
            '~\\b (_*) (?:                                                           # 1. Leading underscore and
8509
                        ( (?<=[ ][/\\\\]) [[:alpha:]]+ [-_[:alpha:]/\\\\]+ |                # 2. file path or 
8510 35
                          [-_[:alpha:]]+ [@.:] [-_[:alpha:]@.:/]+ ' . $apostrophe_rx . ' )  #    URL, domain, or email
8511
                        |
8512 35
                        ( (?i: ' . $small_words_rx . ' ) ' . $apostrophe_rx . ' )           # 3. or small word (case-insensitive)
8513
                        |
8514 35
                        ( [[:alpha:]] [[:lower:]\'’()\[\]{}]* ' . $apostrophe_rx . ' )     # 4. or word w/o internal caps
8515
                        |
8516 35
                        ( [[:alpha:]] [[:alpha:]\'’()\[\]{}]* ' . $apostrophe_rx . ' )     # 5. or some other word
8517
                      ) (_*) \\b                                                          # 6. With trailing underscore
8518
                    ~ux',
8519
            /**
8520
             * @param string[] $matches
8521
             *
8522
             * @psalm-pure
8523
             *
8524
             * @return string
8525
             */
8526
            static function (array $matches) use ($encoding): string {
8527
                // preserve leading underscore
8528 35
                $str = $matches[1];
8529 35
                if ($matches[2]) {
8530
                    // preserve URLs, domains, emails and file paths
8531 5
                    $str .= $matches[2];
8532 35
                } elseif ($matches[3]) {
8533
                    // lower-case small words
8534 25
                    $str .= self::strtolower($matches[3], $encoding);
8535 35
                } elseif ($matches[4]) {
8536
                    // capitalize word w/o internal caps
8537 34
                    $str .= static::ucfirst($matches[4], $encoding);
8538
                } else {
8539
                    // preserve other kinds of word (iPhone)
8540 7
                    $str .= $matches[5];
8541
                }
8542
                // preserve trailing underscore
8543 35
                $str .= $matches[6];
8544
8545 35
                return $str;
8546 35
            },
8547 35
            $str
8548
        );
8549
8550
        // Exceptions for small words: capitalize at start of title...
8551 35
        $str = (string) \preg_replace_callback(
8552
            '~(  \\A [[:punct:]]*            # start of title...
8553
                      |  [:.;?!][ ]+                # or of subsentence...
8554
                      |  [ ][\'"“‘(\[][ ]* )        # or of inserted subphrase...
8555 35
                      ( ' . $small_words_rx . ' ) \\b # ...followed by small word
8556
                     ~uxi',
8557
            /**
8558
             * @param string[] $matches
8559
             *
8560
             * @psalm-pure
8561
             *
8562
             * @return string
8563
             */
8564
            static function (array $matches) use ($encoding): string {
8565 11
                return $matches[1] . static::ucfirst($matches[2], $encoding);
8566 35
            },
8567 35
            $str
8568
        );
8569
8570
        // ...and end of title
8571 35
        $str = (string) \preg_replace_callback(
8572 35
            '~\\b ( ' . $small_words_rx . ' ) # small word...
8573
                      (?= [[:punct:]]* \Z          # ...at the end of the title...
8574
                      |   [\'"’”)\]] [ ] )         # ...or of an inserted subphrase?
8575
                     ~uxi',
8576
            /**
8577
             * @param string[] $matches
8578
             *
8579
             * @psalm-pure
8580
             *
8581
             * @return string
8582
             */
8583
            static function (array $matches) use ($encoding): string {
8584 3
                return static::ucfirst($matches[1], $encoding);
8585 35
            },
8586 35
            $str
8587
        );
8588
8589
        // Exceptions for small words in hyphenated compound words.
8590
        // e.g. "in-flight" -> In-Flight
8591 35
        $str = (string) \preg_replace_callback(
8592
            '~\\b
8593
                        (?<! -)                   # Negative lookbehind for a hyphen; we do not want to match man-in-the-middle but do want (in-flight)
8594 35
                        ( ' . $small_words_rx . ' )
8595
                        (?= -[[:alpha:]]+)        # lookahead for "-someword"
8596
                       ~uxi',
8597
            /**
8598
             * @param string[] $matches
8599
             *
8600
             * @psalm-pure
8601
             *
8602
             * @return string
8603
             */
8604
            static function (array $matches) use ($encoding): string {
8605
                return static::ucfirst($matches[1], $encoding);
8606 35
            },
8607 35
            $str
8608
        );
8609
8610
        // e.g. "Stand-in" -> "Stand-In" (Stand is already capped at this point)
8611 35
        $str = (string) \preg_replace_callback(
8612
            '~\\b
8613
                      (?<!…)                    # Negative lookbehind for a hyphen; we do not want to match man-in-the-middle but do want (stand-in)
8614
                      ( [[:alpha:]]+- )         # $1 = first word and hyphen, should already be properly capped
8615 35
                      ( ' . $small_words_rx . ' ) # ...followed by small word
8616
                      (?!	- )                 # Negative lookahead for another -
8617
                     ~uxi',
8618
            /**
8619
             * @param string[] $matches
8620
             *
8621
             * @psalm-pure
8622
             *
8623
             * @return string
8624
             */
8625
            static function (array $matches) use ($encoding): string {
8626
                return $matches[1] . static::ucfirst($matches[2], $encoding);
8627 35
            },
8628 35
            $str
8629
        );
8630
8631 35
        return $str;
8632
    }
8633
8634
    /**
8635
     * Get a binary representation of a specific string.
8636
     *
8637
     * EXAPLE: <code>UTF8::str_to_binary('😃'); // '11110000100111111001100010000011'</code>
8638
     *
8639
     * @param string $str <p>The input string.</p>
8640
     *
8641
     * @psalm-pure
8642
     *
8643
     * @return false|string
8644
     *                      <p>false on error</p>
8645
     */
8646
    public static function str_to_binary(string $str)
8647
    {
8648
        /** @var array|false $value - needed for PhpStan (stubs error) */
8649 2
        $value = \unpack('H*', $str);
8650 2
        if ($value === false) {
8651
            return false;
8652
        }
8653
8654
        /** @noinspection OffsetOperationsInspection */
8655 2
        return \base_convert($value[1], 16, 2);
8656
    }
8657
8658
    /**
8659
     * @param string   $str
8660
     * @param bool     $remove_empty_values <p>Remove empty values.</p>
8661
     * @param int|null $remove_short_values <p>The min. string length or null to disable</p>
8662
     *
8663
     * @psalm-pure
8664
     *
8665
     * @return string[]
8666
     */
8667
    public static function str_to_lines(string $str, bool $remove_empty_values = false, int $remove_short_values = null): array
8668
    {
8669 17
        if ($str === '') {
8670 1
            return $remove_empty_values ? [] : [''];
8671
        }
8672
8673 16
        if (self::$SUPPORT['mbstring'] === true) {
8674 16
            $return = \mb_split("[\r\n]{1,2}", $str);
8675
        } else {
8676
            $return = \preg_split("/[\r\n]{1,2}/u", $str);
8677
        }
8678
8679 16
        if ($return === false) {
8680
            return $remove_empty_values ? [] : [''];
8681
        }
8682
8683
        if (
8684 16
            $remove_short_values === null
8685
            &&
8686 16
            !$remove_empty_values
8687
        ) {
8688 16
            return $return;
8689
        }
8690
8691
        return self::reduce_string_array(
8692
            $return,
8693
            $remove_empty_values,
8694
            $remove_short_values
8695
        );
8696
    }
8697
8698
    /**
8699
     * Convert a string into an array of words.
8700
     *
8701
     * EXAMPLE: <code>UTF8::str_to_words('中文空白 oöäü#s', '#') // array('', '中文空白', ' ', 'oöäü#s', '')</code>
8702
     *
8703
     * @param string   $str
8704
     * @param string   $char_list           <p>Additional chars for the definition of "words".</p>
8705
     * @param bool     $remove_empty_values <p>Remove empty values.</p>
8706
     * @param int|null $remove_short_values <p>The min. string length or null to disable</p>
8707
     *
8708
     * @psalm-pure
8709
     *
8710
     * @return string[]
8711
     */
8712
    public static function str_to_words(
8713
        string $str,
8714
        string $char_list = '',
8715
        bool $remove_empty_values = false,
8716
        int $remove_short_values = null
8717
    ): array {
8718 16
        if ($str === '') {
8719 4
            return $remove_empty_values ? [] : [''];
8720
        }
8721
8722 16
        $char_list = self::rxClass($char_list, '\pL');
8723
8724 16
        $return = \preg_split("/({$char_list}+(?:[\p{Pd}’']{$char_list}+)*)/u", $str, -1, \PREG_SPLIT_DELIM_CAPTURE);
8725 16
        if ($return === false) {
8726
            return $remove_empty_values ? [] : [''];
8727
        }
8728
8729
        if (
8730 16
            $remove_short_values === null
8731
            &&
8732 16
            !$remove_empty_values
8733
        ) {
8734 16
            return $return;
0 ignored issues
show
Bug Best Practice introduced by
The expression return $return returns an array which contains values of type array which are incompatible with the documented value type string.
Loading history...
8735
        }
8736
8737 2
        $tmp_return = self::reduce_string_array(
8738 2
            $return,
8739 2
            $remove_empty_values,
8740 2
            $remove_short_values
8741
        );
8742
8743 2
        foreach ($tmp_return as &$item) {
8744 2
            $item = (string) $item;
8745
        }
8746
8747 2
        return $tmp_return;
8748
    }
8749
8750
    /**
8751
     * Truncates the string to a given length. If $substring is provided, and
8752
     * truncating occurs, the string is further truncated so that the substring
8753
     * may be appended without exceeding the desired length.
8754
     *
8755
     * @param string $str
8756
     * @param int    $length    <p>Desired length of the truncated string.</p>
8757
     * @param string $substring [optional] <p>The substring to append if it can fit. Default: ''</p>
8758
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
8759
     *
8760
     * @psalm-pure
8761
     *
8762
     * @return string
8763
     *                <p>A string after truncating.</p>
8764
     */
8765
    public static function str_truncate(
8766
        string $str,
8767
        int $length,
8768
        string $substring = '',
8769
        string $encoding = 'UTF-8'
8770
    ): string {
8771 22
        if ($str === '') {
8772
            return '';
8773
        }
8774
8775 22
        if ($encoding === 'UTF-8') {
8776 10
            if ($length >= (int) \mb_strlen($str)) {
8777 2
                return $str;
8778
            }
8779
8780 8
            if ($substring !== '') {
8781 4
                $length -= (int) \mb_strlen($substring);
8782
8783
                /** @noinspection UnnecessaryCastingInspection */
8784 4
                return (string) \mb_substr($str, 0, $length) . $substring;
8785
            }
8786
8787 4
            return (string) \mb_substr($str, 0, $length);
8788
        }
8789
8790 12
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
8791
8792 12
        if ($length >= (int) self::strlen($str, $encoding)) {
8793 2
            return $str;
8794
        }
8795
8796 10
        if ($substring !== '') {
8797 6
            $length -= (int) self::strlen($substring, $encoding);
8798
        }
8799
8800
        return (
8801 10
               (string) self::substr(
8802 10
                   $str,
8803 10
                   0,
8804 10
                   $length,
8805 10
                   $encoding
8806
               )
8807 10
               ) . $substring;
8808
    }
8809
8810
    /**
8811
     * Truncates the string to a given length, while ensuring that it does not
8812
     * split words. If $substring is provided, and truncating occurs, the
8813
     * string is further truncated so that the substring may be appended without
8814
     * exceeding the desired length.
8815
     *
8816
     * @param string $str
8817
     * @param int    $length                                 <p>Desired length of the truncated string.</p>
8818
     * @param string $substring                              [optional] <p>The substring to append if it can fit.
8819
     *                                                       Default:
8820
     *                                                       ''</p>
8821
     * @param string $encoding                               [optional] <p>Default: 'UTF-8'</p>
8822
     * @param bool   $ignore_do_not_split_words_for_one_word [optional] <p>Default: false</p>
8823
     *
8824
     * @psalm-pure
8825
     *
8826
     * @return string
8827
     *                <p>A string after truncating.</p>
8828
     */
8829
    public static function str_truncate_safe(
8830
        string $str,
8831
        int $length,
8832
        string $substring = '',
8833
        string $encoding = 'UTF-8',
8834
        bool $ignore_do_not_split_words_for_one_word = false
8835
    ): string {
8836 47
        if ($str === '' || $length <= 0) {
8837 1
            return $substring;
8838
        }
8839
8840 47
        if ($encoding === 'UTF-8') {
8841 21
            if ($length >= (int) \mb_strlen($str)) {
8842 5
                return $str;
8843
            }
8844
8845
            // need to further trim the string so we can append the substring
8846 17
            $length -= (int) \mb_strlen($substring);
8847 17
            if ($length <= 0) {
8848 1
                return $substring;
8849
            }
8850
8851
            /** @var false|string $truncated - needed for PhpStan (stubs error) */
8852 17
            $truncated = \mb_substr($str, 0, $length);
8853 17
            if ($truncated === false) {
8854
                return '';
8855
            }
8856
8857
            // if the last word was truncated
8858 17
            $space_position = \mb_strpos($str, ' ', $length - 1);
8859 17
            if ($space_position !== $length) {
8860
                // find pos of the last occurrence of a space, get up to that
8861 13
                $last_position = \mb_strrpos($truncated, ' ', 0);
8862
8863
                if (
8864 13
                    $last_position !== false
8865
                    ||
8866
                    (
8867 3
                        $space_position !== false
8868
                        &&
8869 13
                        !$ignore_do_not_split_words_for_one_word
8870
                    )
8871
                ) {
8872 17
                    $truncated = (string) \mb_substr($truncated, 0, (int) $last_position);
8873
                }
8874
            }
8875
        } else {
8876 26
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
8877
8878 26
            if ($length >= (int) self::strlen($str, $encoding)) {
8879 4
                return $str;
8880
            }
8881
8882
            // need to further trim the string so we can append the substring
8883 22
            $length -= (int) self::strlen($substring, $encoding);
8884 22
            if ($length <= 0) {
8885
                return $substring;
8886
            }
8887
8888 22
            $truncated = self::substr($str, 0, $length, $encoding);
8889
8890 22
            if ($truncated === false) {
8891
                return '';
8892
            }
8893
8894
            // if the last word was truncated
8895 22
            $space_position = self::strpos($str, ' ', $length - 1, $encoding);
8896 22
            if ($space_position !== $length) {
8897
                // find pos of the last occurrence of a space, get up to that
8898 12
                $last_position = self::strrpos($truncated, ' ', 0, $encoding);
8899
8900
                if (
8901 12
                    $last_position !== false
8902
                    ||
8903
                    (
8904 4
                        $space_position !== false
8905
                        &&
8906 12
                        !$ignore_do_not_split_words_for_one_word
8907
                    )
8908
                ) {
8909 9
                    $truncated = (string) self::substr($truncated, 0, (int) $last_position, $encoding);
8910
                }
8911
            }
8912
        }
8913
8914 39
        return $truncated . $substring;
8915
    }
8916
8917
    /**
8918
     * Returns a lowercase and trimmed string separated by underscores.
8919
     * Underscores are inserted before uppercase characters (with the exception
8920
     * of the first character of the string), and in place of spaces as well as
8921
     * dashes.
8922
     *
8923
     * @param string $str
8924
     *
8925
     * @psalm-pure
8926
     *
8927
     * @return string
8928
     *                <p>The underscored string.</p>
8929
     */
8930
    public static function str_underscored(string $str): string
8931
    {
8932 16
        return self::str_delimit($str, '_');
8933
    }
8934
8935
    /**
8936
     * Returns an UpperCamelCase version of the supplied string. It trims
8937
     * surrounding spaces, capitalizes letters following digits, spaces, dashes
8938
     * and underscores, and removes spaces, dashes, underscores.
8939
     *
8940
     * @param string      $str                           <p>The input string.</p>
8941
     * @param string      $encoding                      [optional] <p>Default: 'UTF-8'</p>
8942
     * @param bool        $clean_utf8                    [optional] <p>Remove non UTF-8 chars from the string.</p>
8943
     * @param string|null $lang                          [optional] <p>Set the language for special cases: az, el, lt,
8944
     *                                                   tr</p>
8945
     * @param bool        $try_to_keep_the_string_length [optional] <p>true === try to keep the string length: e.g. ẞ
8946
     *                                                   -> ß</p>
8947
     *
8948
     * @psalm-pure
8949
     *
8950
     * @return string
8951
     *                <p>A string in UpperCamelCase.</p>
8952
     */
8953
    public static function str_upper_camelize(
8954
        string $str,
8955
        string $encoding = 'UTF-8',
8956
        bool $clean_utf8 = false,
8957
        string $lang = null,
8958
        bool $try_to_keep_the_string_length = false
8959
    ): string {
8960 13
        return self::ucfirst(self::str_camelize($str, $encoding), $encoding, $clean_utf8, $lang, $try_to_keep_the_string_length);
8961
    }
8962
8963
    /**
8964
     * Get the number of words in a specific string.
8965
     *
8966
     * EXAMPLES: <code>
8967
     * // format: 0 -> return only word count (int)
8968
     * //
8969
     * UTF8::str_word_count('中文空白 öäü abc#c'); // 4
8970
     * UTF8::str_word_count('中文空白 öäü abc#c', 0, '#'); // 3
8971
     *
8972
     * // format: 1 -> return words (array)
8973
     * //
8974
     * UTF8::str_word_count('中文空白 öäü abc#c', 1); // array('中文空白', 'öäü', 'abc', 'c')
8975
     * UTF8::str_word_count('中文空白 öäü abc#c', 1, '#'); // array('中文空白', 'öäü', 'abc#c')
8976
     *
8977
     * // format: 2 -> return words with offset (array)
8978
     * //
8979
     * UTF8::str_word_count('中文空白 öäü ab#c', 2); // array(0 => '中文空白', 5 => 'öäü', 9 => 'abc', 13 => 'c')
8980
     * UTF8::str_word_count('中文空白 öäü ab#c', 2, '#'); // array(0 => '中文空白', 5 => 'öäü', 9 => 'abc#c')
8981
     * </code>
8982
     *
8983
     * @param string $str       <p>The input string.</p>
8984
     * @param int    $format    [optional] <p>
8985
     *                          <strong>0</strong> => return a number of words (default)<br>
8986
     *                          <strong>1</strong> => return an array of words<br>
8987
     *                          <strong>2</strong> => return an array of words with word-offset as key
8988
     *                          </p>
8989
     * @param string $char_list [optional] <p>Additional chars that contains to words and do not start a new word.</p>
8990
     *
8991
     * @psalm-pure
8992
     *
8993
     * @return int|string[]
8994
     *                      <p>The number of words in the string.</p>
8995
     */
8996
    public static function str_word_count(string $str, int $format = 0, string $char_list = '')
8997
    {
8998 2
        $str_parts = self::str_to_words($str, $char_list);
8999
9000 2
        $len = \count($str_parts);
9001
9002 2
        if ($format === 1) {
9003 2
            $number_of_words = [];
9004 2
            for ($i = 1; $i < $len; $i += 2) {
9005 2
                $number_of_words[] = $str_parts[$i];
9006
            }
9007 2
        } elseif ($format === 2) {
9008 2
            $number_of_words = [];
9009 2
            $offset = (int) self::strlen($str_parts[0]);
9010 2
            for ($i = 1; $i < $len; $i += 2) {
9011 2
                $number_of_words[$offset] = $str_parts[$i];
9012 2
                $offset += (int) self::strlen($str_parts[$i]) + (int) self::strlen($str_parts[$i + 1]);
9013
            }
9014
        } else {
9015 2
            $number_of_words = (int) (($len - 1) / 2);
9016
        }
9017
9018 2
        return $number_of_words;
9019
    }
9020
9021
    /**
9022
     * Case-insensitive string comparison.
9023
     *
9024
     * INFO: Case-insensitive version of UTF8::strcmp()
9025
     *
9026
     * EXAMPLE: <code>UTF8::strcasecmp("iñtërnâtiôn\nàlizætiøn", "Iñtërnâtiôn\nàlizætiøn"); // 0</code>
9027
     *
9028
     * @param string $str1     <p>The first string.</p>
9029
     * @param string $str2     <p>The second string.</p>
9030
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
9031
     *
9032
     * @psalm-pure
9033
     *
9034
     * @return int
9035
     *             <strong>&lt; 0</strong> if str1 is less than str2;<br>
9036
     *             <strong>&gt; 0</strong> if str1 is greater than str2,<br>
9037
     *             <strong>0</strong> if they are equal
9038
     */
9039
    public static function strcasecmp(
9040
        string $str1,
9041
        string $str2,
9042
        string $encoding = 'UTF-8'
9043
    ): int {
9044 23
        return self::strcmp(
9045 23
            self::strtocasefold(
9046 23
                $str1,
9047 23
                true,
9048 23
                false,
9049 23
                $encoding,
9050 23
                null,
9051 23
                false
9052
            ),
9053 23
            self::strtocasefold(
9054 23
                $str2,
9055 23
                true,
9056 23
                false,
9057 23
                $encoding,
9058 23
                null,
9059 23
                false
9060
            )
9061
        );
9062
    }
9063
9064
    /**
9065
     * Case-sensitive string comparison.
9066
     *
9067
     * EXAMPLE: <code>UTF8::strcmp("iñtërnâtiôn\nàlizætiøn", "iñtërnâtiôn\nàlizætiøn"); // 0</code>
9068
     *
9069
     * @param string $str1 <p>The first string.</p>
9070
     * @param string $str2 <p>The second string.</p>
9071
     *
9072
     * @psalm-pure
9073
     *
9074
     * @return int
9075
     *             <strong>&lt; 0</strong> if str1 is less than str2<br>
9076
     *             <strong>&gt; 0</strong> if str1 is greater than str2<br>
9077
     *             <strong>0</strong> if they are equal
9078
     */
9079
    public static function strcmp(string $str1, string $str2): int
9080
    {
9081 29
        if ($str1 === $str2) {
9082 21
            return 0;
9083
        }
9084
9085 24
        return \strcmp(
9086
            /** @phpstan-ignore-next-line - we use only NFD */
9087 24
            \Normalizer::normalize($str1, \Normalizer::NFD),
9088
            /** @phpstan-ignore-next-line - we use only NFD */
9089 24
            \Normalizer::normalize($str2, \Normalizer::NFD)
9090
        );
9091
    }
9092
9093
    /**
9094
     * Find length of initial segment not matching mask.
9095
     *
9096
     * @param string   $str
9097
     * @param string   $char_list
9098
     * @param int      $offset
9099
     * @param int|null $length
9100
     * @param string   $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
9101
     *
9102
     * @psalm-pure
9103
     *
9104
     * @return int
9105
     */
9106
    public static function strcspn(
9107
        string $str,
9108
        string $char_list,
9109
        int $offset = 0,
9110
        int $length = null,
9111
        string $encoding = 'UTF-8'
9112
    ): int {
9113 12
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
9114
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
9115
        }
9116
9117 12
        if ($char_list === '') {
9118 2
            return (int) self::strlen($str, $encoding);
9119
        }
9120
9121 11
        if ($offset || $length !== null) {
9122 3
            if ($encoding === 'UTF-8') {
9123 3
                if ($length === null) {
9124 2
                    $str_tmp = \mb_substr($str, $offset);
9125
                } else {
9126 3
                    $str_tmp = \mb_substr($str, $offset, $length);
9127
                }
9128
            } else {
9129
                $str_tmp = self::substr($str, $offset, $length, $encoding);
9130
            }
9131
9132 3
            if ($str_tmp === false) {
9133
                return 0;
9134
            }
9135
9136 3
            $str = $str_tmp;
9137
        }
9138
9139 11
        if ($str === '') {
9140 2
            return 0;
9141
        }
9142
9143 10
        $matches = [];
9144 10
        if (\preg_match('/^(.*?)' . self::rxClass($char_list) . '/us', $str, $matches)) {
9145 9
            $return = self::strlen($matches[1], $encoding);
9146 9
            if ($return === false) {
9147
                return 0;
9148
            }
9149
9150 9
            return $return;
9151
        }
9152
9153 2
        return (int) self::strlen($str, $encoding);
9154
    }
9155
9156
    /**
9157
     * Create a UTF-8 string from code points.
9158
     *
9159
     * INFO: opposite to UTF8::codepoints()
9160
     *
9161
     * EXAMPLE: <code>UTF8::string(array(246, 228, 252)); // 'öäü'</code>
9162
     *
9163
     * @param int|int[]|string|string[] $intOrHex <p>Integer or Hexadecimal codepoints.</p>
9164
     *
9165
     * @phpstan-param int[]|numeric-string[]|int|numeric-string $intOrHex
9166
     *
9167
     * @psalm-pure
9168
     *
9169
     * @return string
9170
     *                <p>A UTF-8 encoded string.</p>
9171
     */
9172
    public static function string($intOrHex): string
9173
    {
9174 4
        if ($intOrHex === []) {
9175 4
            return '';
9176
        }
9177
9178 4
        if (!\is_array($intOrHex)) {
9179 1
            $intOrHex = [$intOrHex];
9180
        }
9181
9182 4
        $str = '';
9183 4
        foreach ($intOrHex as $strPart) {
9184 4
            $str .= '&#' . (int) $strPart . ';';
9185
        }
9186
9187 4
        return self::html_entity_decode($str, \ENT_QUOTES | \ENT_HTML5);
9188
    }
9189
9190
    /**
9191
     * Checks if string starts with "BOM" (Byte Order Mark Character) character.
9192
     *
9193
     * EXAMPLE: <code>UTF8::string_has_bom("\xef\xbb\xbf foobar"); // true</code>
9194
     *
9195
     * @param string $str <p>The input string.</p>
9196
     *
9197
     * @psalm-pure
9198
     *
9199
     * @return bool
9200
     *              <p>
9201
     *              <strong>true</strong> if the string has BOM at the start,<br>
9202
     *              <strong>false</strong> otherwise
9203
     *              </p>
9204
     */
9205
    public static function string_has_bom(string $str): bool
9206
    {
9207 6
        foreach (self::$BOM as $bom_string => &$bom_byte_length) {
9208 6
            if (\strncmp($str, $bom_string, $bom_byte_length) === 0) {
9209 6
                return true;
9210
            }
9211
        }
9212
9213 6
        return false;
9214
    }
9215
9216
    /**
9217
     * Strip HTML and PHP tags from a string + clean invalid UTF-8.
9218
     *
9219
     * EXAMPLE: <code>UTF8::strip_tags("<span>κόσμε\xa0\xa1</span>"); // 'κόσμε'</code>
9220
     *
9221
     * @see http://php.net/manual/en/function.strip-tags.php
9222
     *
9223
     * @param string      $str            <p>
9224
     *                                    The input string.
9225
     *                                    </p>
9226
     * @param string|null $allowable_tags [optional] <p>
9227
     *                                    You can use the optional second parameter to specify tags which should
9228
     *                                    not be stripped.
9229
     *                                    </p>
9230
     *                                    <p>
9231
     *                                    HTML comments and PHP tags are also stripped. This is hardcoded and
9232
     *                                    can not be changed with allowable_tags.
9233
     *                                    </p>
9234
     * @param bool        $clean_utf8     [optional] <p>Remove non UTF-8 chars from the string.</p>
9235
     *
9236
     * @psalm-pure
9237
     *
9238
     * @return string
9239
     *                <p>The stripped string.</p>
9240
     */
9241
    public static function strip_tags(
9242
        string $str,
9243
        string $allowable_tags = null,
9244
        bool $clean_utf8 = false
9245
    ): string {
9246 4
        if ($str === '') {
9247 1
            return '';
9248
        }
9249
9250 4
        if ($clean_utf8) {
9251 2
            $str = self::clean($str);
9252
        }
9253
9254 4
        if ($allowable_tags === null) {
9255 4
            return \strip_tags($str);
9256
        }
9257
9258 2
        return \strip_tags($str, $allowable_tags);
9259
    }
9260
9261
    /**
9262
     * Strip all whitespace characters. This includes tabs and newline
9263
     * characters, as well as multibyte whitespace such as the thin space
9264
     * and ideographic space.
9265
     *
9266
     * EXAMPLE: <code>UTF8::strip_whitespace('   Ο     συγγραφέας  '); // 'Οσυγγραφέας'</code>
9267
     *
9268
     * @param string $str
9269
     *
9270
     * @psalm-pure
9271
     *
9272
     * @return string
9273
     */
9274
    public static function strip_whitespace(string $str): string
9275
    {
9276 36
        if ($str === '') {
9277 3
            return '';
9278
        }
9279
9280 33
        return (string) \preg_replace('/[[:space:]]+/u', '', $str);
9281
    }
9282
9283
    /**
9284
     * Find the position of the first occurrence of a substring in a string, case-insensitive.
9285
     *
9286
     * INFO: use UTF8::stripos_in_byte() for the byte-length
9287
     *
9288
     * EXAMPLE: <code>UTF8::stripos('aσσb', 'ΣΣ'); // 1</code> (σσ == ΣΣ)
9289
     *
9290
     * @see http://php.net/manual/en/function.mb-stripos.php
9291
     *
9292
     * @param string $haystack   <p>The string from which to get the position of the first occurrence of needle.</p>
9293
     * @param string $needle     <p>The string to find in haystack.</p>
9294
     * @param int    $offset     [optional] <p>The position in haystack to start searching.</p>
9295
     * @param string $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
9296
     * @param bool   $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
9297
     *
9298
     * @psalm-pure
9299
     *
9300
     * @return false|int
9301
     *                   Return the <strong>(int)</strong> numeric position of the first occurrence of needle in the
9302
     *                   haystack string,<br> or <strong>false</strong> if needle is not found
9303
     */
9304
    public static function stripos(
9305
        string $haystack,
9306
        string $needle,
9307
        int $offset = 0,
9308
        string $encoding = 'UTF-8',
9309
        bool $clean_utf8 = false
9310
    ) {
9311 25
        if ($haystack === '') {
9312 5
            if (\PHP_VERSION_ID >= 80000 && $needle === '') {
9313
                return 0;
9314
            }
9315
9316 5
            return false;
9317
        }
9318
9319 24
        if ($needle === '' && \PHP_VERSION_ID < 80000) {
9320 2
            return false;
9321
        }
9322
9323 24
        if ($clean_utf8) {
9324
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
9325
            // if invalid characters are found in $haystack before $needle
9326 1
            $haystack = self::clean($haystack);
9327 1
            $needle = self::clean($needle);
9328
        }
9329
9330 24
        if (self::$SUPPORT['mbstring'] === true) {
9331 24
            if ($encoding === 'UTF-8') {
9332 24
                return \mb_stripos($haystack, $needle, $offset);
9333
            }
9334
9335 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
9336
9337 2
            return \mb_stripos($haystack, $needle, $offset, $encoding);
9338
        }
9339
9340 2
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
9341
9342
        if (
9343 2
            $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
9344
            &&
9345 2
            $offset >= 0 // grapheme_stripos() can't handle negative offset
9346
            &&
9347 2
            self::$SUPPORT['intl'] === true
9348
        ) {
9349
            $return_tmp = \grapheme_stripos($haystack, $needle, $offset);
9350
            if ($return_tmp !== false) {
9351
                return $return_tmp;
9352
            }
9353
        }
9354
9355
        //
9356
        // fallback for ascii only
9357
        //
9358
9359 2
        if (ASCII::is_ascii($haystack . $needle)) {
9360 2
            return \stripos($haystack, $needle, $offset);
9361
        }
9362
9363
        //
9364
        // fallback via vanilla php
9365
        //
9366
9367 2
        $haystack = self::strtocasefold($haystack, true, false, $encoding, null, false);
9368 2
        $needle = self::strtocasefold($needle, true, false, $encoding, null, false);
9369
9370 2
        return self::strpos($haystack, $needle, $offset, $encoding);
9371
    }
9372
9373
    /**
9374
     * Returns all of haystack starting from and including the first occurrence of needle to the end.
9375
     *
9376
     * EXAMPLE: <code>
9377
     * $str = 'iñtërnâtiônàlizætiøn';
9378
     * $search = 'NÂT';
9379
     *
9380
     * UTF8::stristr($str, $search)); // 'nâtiônàlizætiøn'
9381
     * UTF8::stristr($str, $search, true)); // 'iñtër'
9382
     * </code>
9383
     *
9384
     * @param string $haystack      <p>The input string. Must be valid UTF-8.</p>
9385
     * @param string $needle        <p>The string to look for. Must be valid UTF-8.</p>
9386
     * @param bool   $before_needle [optional] <p>
9387
     *                              If <b>TRUE</b>, it returns the part of the
9388
     *                              haystack before the first occurrence of the needle (excluding the needle).
9389
     *                              </p>
9390
     * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
9391
     * @param bool   $clean_utf8    [optional] <p>Remove non UTF-8 chars from the string.</p>
9392
     *
9393
     * @psalm-pure
9394
     *
9395
     * @return false|string
9396
     *                      <p>A sub-string,<br>or <strong>false</strong> if needle is not found.</p>
9397
     */
9398
    public static function stristr(
9399
        string $haystack,
9400
        string $needle,
9401
        bool $before_needle = false,
9402
        string $encoding = 'UTF-8',
9403
        bool $clean_utf8 = false
9404
    ) {
9405 13
        if ($haystack === '') {
9406 3
            if (\PHP_VERSION_ID >= 80000 && $needle === '') {
9407
                return '';
9408
            }
9409
9410 3
            return false;
9411
        }
9412
9413 11
        if ($clean_utf8) {
9414
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
9415
            // if invalid characters are found in $haystack before $needle
9416 1
            $needle = self::clean($needle);
9417 1
            $haystack = self::clean($haystack);
9418
        }
9419
9420 11
        if ($needle === '') {
9421 2
            if (\PHP_VERSION_ID >= 80000) {
9422
                return $haystack;
9423
            }
9424
9425 2
            return false;
9426
        }
9427
9428 10
        if (self::$SUPPORT['mbstring'] === true) {
9429 10
            if ($encoding === 'UTF-8') {
9430 10
                return \mb_stristr($haystack, $needle, $before_needle);
9431
            }
9432
9433 1
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
9434
9435 1
            return \mb_stristr($haystack, $needle, $before_needle, $encoding);
9436
        }
9437
9438
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
9439
9440
        if (
9441
            $encoding !== 'UTF-8'
9442
            &&
9443
            self::$SUPPORT['mbstring'] === false
9444
        ) {
9445
            /**
9446
             * @psalm-suppress ImpureFunctionCall - is is only a warning
9447
             */
9448
            \trigger_error('UTF8::stristr() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
9449
        }
9450
9451
        if (
9452
            $encoding === 'UTF-8' // INFO: "grapheme_stristr()" can't handle other encodings
9453
            &&
9454
            self::$SUPPORT['intl'] === true
9455
        ) {
9456
            $return_tmp = \grapheme_stristr($haystack, $needle, $before_needle);
9457
            if ($return_tmp !== false) {
9458
                return $return_tmp;
9459
            }
9460
        }
9461
9462
        if (ASCII::is_ascii($needle . $haystack)) {
9463
            return \stristr($haystack, $needle, $before_needle);
9464
        }
9465
9466
        \preg_match('/^(.*?)' . \preg_quote($needle, '/') . '/usi', $haystack, $match);
9467
9468
        if (!isset($match[1])) {
9469
            return false;
9470
        }
9471
9472
        if ($before_needle) {
9473
            return $match[1];
9474
        }
9475
9476
        return self::substr($haystack, (int) self::strlen($match[1], $encoding), null, $encoding);
9477
    }
9478
9479
    /**
9480
     * Get the string length, not the byte-length!
9481
     *
9482
     * INFO: use UTF8::strwidth() for the char-length
9483
     *
9484
     * EXAMPLE: <code>UTF8::strlen("Iñtërnâtiôn\xE9àlizætiøn")); // 20</code>
9485
     *
9486
     * @see http://php.net/manual/en/function.mb-strlen.php
9487
     *
9488
     * @param string $str        <p>The string being checked for length.</p>
9489
     * @param string $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
9490
     * @param bool   $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
9491
     *
9492
     * @psalm-pure
9493
     *
9494
     * @return false|int
9495
     *                   <p>
9496
     *                   The number <strong>(int)</strong> of characters in the string $str having character encoding
9497
     *                   $encoding.
9498
     *                   (One multi-byte character counted as +1).
9499
     *                   <br>
9500
     *                   Can return <strong>false</strong>, if e.g. mbstring is not installed and we process invalid
9501
     *                   chars.
9502
     *                   </p>
9503
     */
9504
    public static function strlen(
9505
        string $str,
9506
        string $encoding = 'UTF-8',
9507
        bool $clean_utf8 = false
9508
    ) {
9509 174
        if ($str === '') {
9510 21
            return 0;
9511
        }
9512
9513 172
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
9514 12
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
9515
        }
9516
9517 172
        if ($clean_utf8) {
9518
            // "mb_strlen" and "\iconv_strlen" returns wrong length,
9519
            // if invalid characters are found in $str
9520 5
            $str = self::clean($str);
9521
        }
9522
9523
        //
9524
        // fallback via mbstring
9525
        //
9526
9527 172
        if (self::$SUPPORT['mbstring'] === true) {
9528 166
            if ($encoding === 'UTF-8') {
9529
                /** @noinspection PhpUsageOfSilenceOperatorInspection - ignore warnings, it's working anyway */
9530 166
                return @\mb_strlen($str);
9531
            }
9532
9533
            /** @noinspection PhpUsageOfSilenceOperatorInspection - ignore warnings, it's working anyway */
9534 4
            return @\mb_strlen($str, $encoding);
9535
        }
9536
9537
        //
9538
        // fallback for binary || ascii only
9539
        //
9540
9541
        if (
9542 8
            $encoding === 'CP850'
9543
            ||
9544 8
            $encoding === 'ASCII'
9545
        ) {
9546
            return \strlen($str);
9547
        }
9548
9549
        if (
9550 8
            $encoding !== 'UTF-8'
9551
            &&
9552 8
            self::$SUPPORT['mbstring'] === false
9553
            &&
9554 8
            self::$SUPPORT['iconv'] === false
9555
        ) {
9556
            /**
9557
             * @psalm-suppress ImpureFunctionCall - is is only a warning
9558
             */
9559 2
            \trigger_error('UTF8::strlen() without mbstring / iconv cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
9560
        }
9561
9562
        //
9563
        // fallback via iconv
9564
        //
9565
9566 8
        if (self::$SUPPORT['iconv'] === true) {
9567
            $return_tmp = \iconv_strlen($str, $encoding);
9568
            if ($return_tmp !== false) {
9569
                return $return_tmp;
9570
            }
9571
        }
9572
9573
        //
9574
        // fallback via intl
9575
        //
9576
9577
        if (
9578 8
            $encoding === 'UTF-8' // INFO: "grapheme_strlen()" can't handle other encodings
9579
            &&
9580 8
            self::$SUPPORT['intl'] === true
9581
        ) {
9582
            $return_tmp = \grapheme_strlen($str);
9583
            if ($return_tmp !== null) {
9584
                return $return_tmp;
9585
            }
9586
        }
9587
9588
        //
9589
        // fallback for ascii only
9590
        //
9591
9592 8
        if (ASCII::is_ascii($str)) {
9593 4
            return \strlen($str);
9594
        }
9595
9596
        //
9597
        // fallback via vanilla php
9598
        //
9599
9600 8
        \preg_match_all('/./us', $str, $parts);
9601
9602 8
        $return_tmp = \count($parts[0]);
9603 8
        if ($return_tmp === 0) {
9604
            return false;
9605
        }
9606
9607 8
        return $return_tmp;
9608
    }
9609
9610
    /**
9611
     * Get string length in byte.
9612
     *
9613
     * @param string $str
9614
     *
9615
     * @psalm-pure
9616
     *
9617
     * @return int
9618
     */
9619
    public static function strlen_in_byte(string $str): int
9620
    {
9621 1
        if ($str === '') {
9622
            return 0;
9623
        }
9624
9625 1
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
9626
            // "mb_" is available if overload is used, so use it ...
9627
            return \mb_strlen($str, 'CP850'); // 8-BIT
9628
        }
9629
9630 1
        return \strlen($str);
9631
    }
9632
9633
    /**
9634
     * Case-insensitive string comparisons using a "natural order" algorithm.
9635
     *
9636
     * INFO: natural order version of UTF8::strcasecmp()
9637
     *
9638
     * EXAMPLES: <code>
9639
     * UTF8::strnatcasecmp('2', '10Hello WORLD 中文空白!'); // -1
9640
     * UTF8::strcasecmp('2Hello world 中文空白!', '10Hello WORLD 中文空白!'); // 1
9641
     *
9642
     * UTF8::strnatcasecmp('10Hello world 中文空白!', '2Hello WORLD 中文空白!'); // 1
9643
     * UTF8::strcasecmp('10Hello world 中文空白!', '2Hello WORLD 中文空白!'); // -1
9644
     * </code>
9645
     *
9646
     * @param string $str1     <p>The first string.</p>
9647
     * @param string $str2     <p>The second string.</p>
9648
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
9649
     *
9650
     * @psalm-pure
9651
     *
9652
     * @return int
9653
     *             <strong>&lt; 0</strong> if str1 is less than str2<br>
9654
     *             <strong>&gt; 0</strong> if str1 is greater than str2<br>
9655
     *             <strong>0</strong> if they are equal
9656
     */
9657
    public static function strnatcasecmp(string $str1, string $str2, string $encoding = 'UTF-8'): int
9658
    {
9659 2
        return self::strnatcmp(
9660 2
            self::strtocasefold($str1, true, false, $encoding, null, false),
9661 2
            self::strtocasefold($str2, true, false, $encoding, null, false)
9662
        );
9663
    }
9664
9665
    /**
9666
     * String comparisons using a "natural order" algorithm
9667
     *
9668
     * INFO: natural order version of UTF8::strcmp()
9669
     *
9670
     * EXAMPLES: <code>
9671
     * UTF8::strnatcmp('2Hello world 中文空白!', '10Hello WORLD 中文空白!'); // -1
9672
     * UTF8::strcmp('2Hello world 中文空白!', '10Hello WORLD 中文空白!'); // 1
9673
     *
9674
     * UTF8::strnatcmp('10Hello world 中文空白!', '2Hello WORLD 中文空白!'); // 1
9675
     * UTF8::strcmp('10Hello world 中文空白!', '2Hello WORLD 中文空白!'); // -1
9676
     * </code>
9677
     *
9678
     * @see http://php.net/manual/en/function.strnatcmp.php
9679
     *
9680
     * @param string $str1 <p>The first string.</p>
9681
     * @param string $str2 <p>The second string.</p>
9682
     *
9683
     * @psalm-pure
9684
     *
9685
     * @return int
9686
     *             <strong>&lt; 0</strong> if str1 is less than str2;<br>
9687
     *             <strong>&gt; 0</strong> if str1 is greater than str2;<br>
9688
     *             <strong>0</strong> if they are equal
9689
     */
9690
    public static function strnatcmp(string $str1, string $str2): int
9691
    {
9692 4
        if ($str1 === $str2) {
9693 4
            return 0;
9694
        }
9695
9696 4
        return \strnatcmp(
9697 4
            (string) self::strtonatfold($str1),
9698 4
            (string) self::strtonatfold($str2)
9699
        );
9700
    }
9701
9702
    /**
9703
     * Case-insensitive string comparison of the first n characters.
9704
     *
9705
     * EXAMPLE: <code>
9706
     * UTF8::strcasecmp("iñtërnâtiôn\nàlizætiøn321", "iñtërnâtiôn\nàlizætiøn123", 5); // 0
9707
     * </code>
9708
     *
9709
     * @see http://php.net/manual/en/function.strncasecmp.php
9710
     *
9711
     * @param string $str1     <p>The first string.</p>
9712
     * @param string $str2     <p>The second string.</p>
9713
     * @param int    $len      <p>The length of strings to be used in the comparison.</p>
9714
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
9715
     *
9716
     * @psalm-pure
9717
     *
9718
     * @return int
9719
     *             <strong>&lt; 0</strong> if <i>str1</i> is less than <i>str2</i>;<br>
9720
     *             <strong>&gt; 0</strong> if <i>str1</i> is greater than <i>str2</i>;<br>
9721
     *             <strong>0</strong> if they are equal
9722
     */
9723
    public static function strncasecmp(
9724
        string $str1,
9725
        string $str2,
9726
        int $len,
9727
        string $encoding = 'UTF-8'
9728
    ): int {
9729 2
        return self::strncmp(
9730 2
            self::strtocasefold($str1, true, false, $encoding, null, false),
9731 2
            self::strtocasefold($str2, true, false, $encoding, null, false),
9732 2
            $len
9733
        );
9734
    }
9735
9736
    /**
9737
     * String comparison of the first n characters.
9738
     *
9739
     * EXAMPLE: <code>
9740
     * UTF8::strncmp("Iñtërnâtiôn\nàlizætiøn321", "Iñtërnâtiôn\nàlizætiøn123", 5); // 0
9741
     * </code>
9742
     *
9743
     * @see http://php.net/manual/en/function.strncmp.php
9744
     *
9745
     * @param string $str1     <p>The first string.</p>
9746
     * @param string $str2     <p>The second string.</p>
9747
     * @param int    $len      <p>Number of characters to use in the comparison.</p>
9748
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
9749
     *
9750
     * @psalm-pure
9751
     *
9752
     * @return int
9753
     *             <strong>&lt; 0</strong> if <i>str1</i> is less than <i>str2</i>;<br>
9754
     *             <strong>&gt; 0</strong> if <i>str1</i> is greater than <i>str2</i>;<br>
9755
     *             <strong>0</strong> if they are equal
9756
     */
9757
    public static function strncmp(
9758
        string $str1,
9759
        string $str2,
9760
        int $len,
9761
        string $encoding = 'UTF-8'
9762
    ): int {
9763 4
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
9764
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
9765
        }
9766
9767 4
        if ($encoding === 'UTF-8') {
9768 4
            $str1 = (string) \mb_substr($str1, 0, $len);
9769 4
            $str2 = (string) \mb_substr($str2, 0, $len);
9770
        } else {
9771
            $str1 = (string) self::substr($str1, 0, $len, $encoding);
9772
            $str2 = (string) self::substr($str2, 0, $len, $encoding);
9773
        }
9774
9775 4
        return self::strcmp($str1, $str2);
9776
    }
9777
9778
    /**
9779
     * Search a string for any of a set of characters.
9780
     *
9781
     * EXAMPLE: <code>UTF8::strpbrk('-中文空白-', '白'); // '白-'</code>
9782
     *
9783
     * @see http://php.net/manual/en/function.strpbrk.php
9784
     *
9785
     * @param string $haystack  <p>The string where char_list is looked for.</p>
9786
     * @param string $char_list <p>This parameter is case-sensitive.</p>
9787
     *
9788
     * @psalm-pure
9789
     *
9790
     * @return false|string
9791
     *                      <p>The string starting from the character found, or false if it is not found.</p>
9792
     */
9793
    public static function strpbrk(string $haystack, string $char_list)
9794
    {
9795 2
        if ($haystack === '' || $char_list === '') {
9796 2
            return false;
9797
        }
9798
9799 2
        if (\preg_match('/' . self::rxClass($char_list) . '/us', $haystack, $m)) {
9800 2
            return \substr($haystack, (int) \strpos($haystack, $m[0]));
9801
        }
9802
9803 2
        return false;
9804
    }
9805
9806
    /**
9807
     * Find the position of the first occurrence of a substring in a string.
9808
     *
9809
     * INFO: use UTF8::strpos_in_byte() for the byte-length
9810
     *
9811
     * EXAMPLE: <code>UTF8::strpos('ABC-ÖÄÜ-中文空白-中文空白', '中'); // 8</code>
9812
     *
9813
     * @see http://php.net/manual/en/function.mb-strpos.php
9814
     *
9815
     * @param string     $haystack   <p>The string from which to get the position of the first occurrence of needle.</p>
9816
     * @param int|string $needle     <p>The string to find in haystack.<br>Or a code point as int.</p>
9817
     * @param int        $offset     [optional] <p>The search offset. If it is not specified, 0 is used.</p>
9818
     * @param string     $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
9819
     * @param bool       $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
9820
     *
9821
     * @psalm-pure
9822
     *
9823
     * @return false|int
9824
     *                   The <strong>(int)</strong> numeric position of the first occurrence of needle in the haystack
9825
     *                   string.<br> If needle is not found it returns false.
9826
     */
9827
    public static function strpos(
9828
        string $haystack,
9829
        $needle,
9830
        int $offset = 0,
9831
        string $encoding = 'UTF-8',
9832
        bool $clean_utf8 = false
9833
    ) {
9834 52
        if ($haystack === '') {
9835 4
            if (\PHP_VERSION_ID >= 80000) {
9836
                if ($needle === '') {
9837
                    return 0;
9838
                }
9839
            } else {
9840 4
                return false;
9841
            }
9842
        }
9843
9844
        // iconv and mbstring do not support integer $needle
9845 51
        if ((int) $needle === $needle) {
9846
            $needle = (string) self::chr($needle);
9847
        }
9848 51
        $needle = (string) $needle;
9849
9850 51
        if ($haystack === '') {
9851
            if (\PHP_VERSION_ID >= 80000 && $needle === '') {
9852
                return 0;
9853
            }
9854
9855
            return false;
9856
        }
9857
9858 51
        if ($needle === '' && \PHP_VERSION_ID < 80000) {
9859 2
            return false;
9860
        }
9861
9862 51
        if ($clean_utf8) {
9863
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
9864
            // if invalid characters are found in $haystack before $needle
9865 3
            $needle = self::clean($needle);
9866 3
            $haystack = self::clean($haystack);
9867
        }
9868
9869 51
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
9870 10
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
9871
        }
9872
9873
        //
9874
        // fallback via mbstring
9875
        //
9876
9877 51
        if (self::$SUPPORT['mbstring'] === true) {
9878 49
            if ($encoding === 'UTF-8') {
9879
                /** @noinspection PhpUsageOfSilenceOperatorInspection - Offset not contained in string */
9880 49
                return @\mb_strpos($haystack, $needle, $offset);
9881
            }
9882
9883
            /** @noinspection PhpUsageOfSilenceOperatorInspection - Offset not contained in string */
9884 2
            return @\mb_strpos($haystack, $needle, $offset, $encoding);
9885
        }
9886
9887
        //
9888
        // fallback for binary || ascii only
9889
        //
9890
        if (
9891 4
            $encoding === 'CP850'
9892
            ||
9893 4
            $encoding === 'ASCII'
9894
        ) {
9895 2
            return \strpos($haystack, $needle, $offset);
9896
        }
9897
9898
        if (
9899 4
            $encoding !== 'UTF-8'
9900
            &&
9901 4
            self::$SUPPORT['iconv'] === false
9902
            &&
9903 4
            self::$SUPPORT['mbstring'] === false
9904
        ) {
9905
            /**
9906
             * @psalm-suppress ImpureFunctionCall - is is only a warning
9907
             */
9908 2
            \trigger_error('UTF8::strpos() without mbstring / iconv cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
9909
        }
9910
9911
        //
9912
        // fallback via intl
9913
        //
9914
9915
        if (
9916 4
            $encoding === 'UTF-8' // INFO: "grapheme_strpos()" can't handle other encodings
9917
            &&
9918 4
            $offset >= 0 // grapheme_strpos() can't handle negative offset
9919
            &&
9920 4
            self::$SUPPORT['intl'] === true
9921
        ) {
9922
            $return_tmp = \grapheme_strpos($haystack, $needle, $offset);
9923
            if ($return_tmp !== false) {
9924
                return $return_tmp;
9925
            }
9926
        }
9927
9928
        //
9929
        // fallback via iconv
9930
        //
9931
9932
        if (
9933 4
            $offset >= 0 // iconv_strpos() can't handle negative offset
9934
            &&
9935 4
            self::$SUPPORT['iconv'] === true
9936
        ) {
9937
            // ignore invalid negative offset to keep compatibility
9938
            // with php < 5.5.35, < 5.6.21, < 7.0.6
9939
            $return_tmp = \iconv_strpos($haystack, $needle, $offset > 0 ? $offset : 0, $encoding);
9940
            if ($return_tmp !== false) {
9941
                return $return_tmp;
9942
            }
9943
        }
9944
9945
        //
9946
        // fallback for ascii only
9947
        //
9948
9949 4
        if (ASCII::is_ascii($haystack . $needle)) {
9950
            /** @noinspection PhpUsageOfSilenceOperatorInspection - Offset not contained in string */
9951 2
            return @\strpos($haystack, $needle, $offset);
9952
        }
9953
9954
        //
9955
        // fallback via vanilla php
9956
        //
9957
9958 4
        $haystack_tmp = self::substr($haystack, $offset, null, $encoding);
9959 4
        if ($haystack_tmp === false) {
9960
            $haystack_tmp = '';
9961
        }
9962 4
        $haystack = (string) $haystack_tmp;
9963
9964 4
        if ($offset < 0) {
9965
            $offset = 0;
9966
        }
9967
9968 4
        $pos = \strpos($haystack, $needle);
9969 4
        if ($pos === false) {
9970 3
            return false;
9971
        }
9972
9973 4
        if ($pos) {
9974 4
            return $offset + (int) self::strlen(\substr($haystack, 0, $pos), $encoding);
9975
        }
9976
9977 2
        return $offset + 0;
9978
    }
9979
9980
    /**
9981
     * Find the position of the first occurrence of a substring in a string.
9982
     *
9983
     * @param string $haystack <p>
9984
     *                         The string being checked.
9985
     *                         </p>
9986
     * @param string $needle   <p>
9987
     *                         The position counted from the beginning of haystack.
9988
     *                         </p>
9989
     * @param int    $offset   [optional] <p>
9990
     *                         The search offset. If it is not specified, 0 is used.
9991
     *                         </p>
9992
     *
9993
     * @psalm-pure
9994
     *
9995
     * @return false|int
9996
     *                   <p>The numeric position of the first occurrence of needle in the
9997
     *                   haystack string. If needle is not found, it returns false.</p>
9998
     */
9999
    public static function strpos_in_byte(string $haystack, string $needle, int $offset = 0)
10000
    {
10001 2
        if ($haystack === '' || $needle === '') {
10002
            return false;
10003
        }
10004
10005 2
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
10006
            // "mb_" is available if overload is used, so use it ...
10007
            return \mb_strpos($haystack, $needle, $offset, 'CP850'); // 8-BIT
10008
        }
10009
10010 2
        return \strpos($haystack, $needle, $offset);
10011
    }
10012
10013
    /**
10014
     * Find the position of the first occurrence of a substring in a string, case-insensitive.
10015
     *
10016
     * @param string $haystack <p>
10017
     *                         The string being checked.
10018
     *                         </p>
10019
     * @param string $needle   <p>
10020
     *                         The position counted from the beginning of haystack.
10021
     *                         </p>
10022
     * @param int    $offset   [optional] <p>
10023
     *                         The search offset. If it is not specified, 0 is used.
10024
     *                         </p>
10025
     *
10026
     * @psalm-pure
10027
     *
10028
     * @return false|int
10029
     *                   <p>The numeric position of the first occurrence of needle in the
10030
     *                   haystack string. If needle is not found, it returns false.</p>
10031
     */
10032
    public static function stripos_in_byte(string $haystack, string $needle, int $offset = 0)
10033
    {
10034 2
        if ($haystack === '' || $needle === '') {
10035
            return false;
10036
        }
10037
10038 2
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
10039
            // "mb_" is available if overload is used, so use it ...
10040
            return \mb_stripos($haystack, $needle, $offset, 'CP850'); // 8-BIT
10041
        }
10042
10043 2
        return \stripos($haystack, $needle, $offset);
10044
    }
10045
10046
    /**
10047
     * Find the last occurrence of a character in a string within another.
10048
     *
10049
     * EXAMPLE: <code>UTF8::strrchr('κόσμεκόσμε-äöü', 'κόσμε'); // 'κόσμε-äöü'</code>
10050
     *
10051
     * @see http://php.net/manual/en/function.mb-strrchr.php
10052
     *
10053
     * @param string $haystack      <p>The string from which to get the last occurrence of needle.</p>
10054
     * @param string $needle        <p>The string to find in haystack</p>
10055
     * @param bool   $before_needle [optional] <p>
10056
     *                              Determines which portion of haystack
10057
     *                              this function returns.
10058
     *                              If set to true, it returns all of haystack
10059
     *                              from the beginning to the last occurrence of needle.
10060
     *                              If set to false, it returns all of haystack
10061
     *                              from the last occurrence of needle to the end,
10062
     *                              </p>
10063
     * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
10064
     * @param bool   $clean_utf8    [optional] <p>Remove non UTF-8 chars from the string.</p>
10065
     *
10066
     * @psalm-pure
10067
     *
10068
     * @return false|string
10069
     *                      <p>The portion of haystack or false if needle is not found.</p>
10070
     */
10071
    public static function strrchr(
10072
        string $haystack,
10073
        string $needle,
10074
        bool $before_needle = false,
10075
        string $encoding = 'UTF-8',
10076
        bool $clean_utf8 = false
10077
    ) {
10078 2
        if ($haystack === '' || $needle === '') {
10079 2
            return false;
10080
        }
10081
10082 2
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
10083 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
10084
        }
10085
10086 2
        if ($clean_utf8) {
10087
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
10088
            // if invalid characters are found in $haystack before $needle
10089 2
            $needle = self::clean($needle);
10090 2
            $haystack = self::clean($haystack);
10091
        }
10092
10093
        //
10094
        // fallback via mbstring
10095
        //
10096
10097 2
        if (self::$SUPPORT['mbstring'] === true) {
10098 2
            if ($encoding === 'UTF-8') {
10099 2
                return \mb_strrchr($haystack, $needle, $before_needle);
10100
            }
10101
10102 2
            return \mb_strrchr($haystack, $needle, $before_needle, $encoding);
10103
        }
10104
10105
        //
10106
        // fallback for binary || ascii only
10107
        //
10108
10109
        if (
10110
            !$before_needle
10111
            &&
10112
            (
10113
                $encoding === 'CP850'
10114
                ||
10115
                $encoding === 'ASCII'
10116
            )
10117
        ) {
10118
            return \strrchr($haystack, $needle);
10119
        }
10120
10121
        if (
10122
            $encoding !== 'UTF-8'
10123
            &&
10124
            self::$SUPPORT['mbstring'] === false
10125
        ) {
10126
            /**
10127
             * @psalm-suppress ImpureFunctionCall - is is only a warning
10128
             */
10129
            \trigger_error('UTF8::strrchr() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
10130
        }
10131
10132
        //
10133
        // fallback via iconv
10134
        //
10135
10136
        if (self::$SUPPORT['iconv'] === true) {
10137
            $needle_tmp = self::substr($needle, 0, 1, $encoding);
10138
            if ($needle_tmp === false) {
10139
                return false;
10140
            }
10141
            $needle = $needle_tmp;
10142
10143
            $pos = \iconv_strrpos($haystack, $needle, $encoding);
10144
            if ($pos === false) {
10145
                return false;
10146
            }
10147
10148
            if ($before_needle) {
10149
                return self::substr($haystack, 0, $pos, $encoding);
10150
            }
10151
10152
            return self::substr($haystack, $pos, null, $encoding);
10153
        }
10154
10155
        //
10156
        // fallback via vanilla php
10157
        //
10158
10159
        $needle_tmp = self::substr($needle, 0, 1, $encoding);
10160
        if ($needle_tmp === false) {
10161
            return false;
10162
        }
10163
        $needle = $needle_tmp;
10164
10165
        $pos = self::strrpos($haystack, $needle, 0, $encoding);
10166
        if ($pos === false) {
10167
            return false;
10168
        }
10169
10170
        if ($before_needle) {
10171
            return self::substr($haystack, 0, $pos, $encoding);
10172
        }
10173
10174
        return self::substr($haystack, $pos, null, $encoding);
10175
    }
10176
10177
    /**
10178
     * Reverses characters order in the string.
10179
     *
10180
     * EXAMPLE: <code>UTF8::strrev('κ-öäü'); // 'üäö-κ'</code>
10181
     *
10182
     * @param string $str      <p>The input string.</p>
10183
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
10184
     *
10185
     * @psalm-pure
10186
     *
10187
     * @return string
10188
     *                <p>The string with characters in the reverse sequence.</p>
10189
     */
10190
    public static function strrev(string $str, string $encoding = 'UTF-8'): string
10191
    {
10192 10
        if ($str === '') {
10193 4
            return '';
10194
        }
10195
10196
        // init
10197 8
        $reversed = '';
10198
10199 8
        $str = self::emoji_encode($str, true);
10200
10201 8
        if ($encoding === 'UTF-8') {
10202 8
            if (self::$SUPPORT['intl'] === true) {
10203
                // try "grapheme" first: https://stackoverflow.com/questions/17496493/strrev-dosent-support-utf-8
10204 8
                $i = (int) \grapheme_strlen($str);
10205 8
                while ($i--) {
10206 8
                    $reversed_tmp = \grapheme_substr($str, $i, 1);
10207 8
                    if ($reversed_tmp !== false) {
10208 8
                        $reversed .= $reversed_tmp;
10209
                    }
10210
                }
10211
            } else {
10212
                $i = (int) \mb_strlen($str);
10213 8
                while ($i--) {
10214
                    $reversed_tmp = \mb_substr($str, $i, 1);
10215
                    if ($reversed_tmp !== false) {
10216
                        $reversed .= $reversed_tmp;
10217
                    }
10218
                }
10219
            }
10220
        } else {
10221
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
10222
10223
            $i = (int) self::strlen($str, $encoding);
10224
            while ($i--) {
10225
                $reversed_tmp = self::substr($str, $i, 1, $encoding);
10226
                if ($reversed_tmp !== false) {
10227
                    $reversed .= $reversed_tmp;
10228
                }
10229
            }
10230
        }
10231
10232 8
        return self::emoji_decode($reversed, true);
10233
    }
10234
10235
    /**
10236
     * Find the last occurrence of a character in a string within another, case-insensitive.
10237
     *
10238
     * EXAMPLE: <code>UTF8::strrichr('Aκόσμεκόσμε-äöü', 'aκόσμε'); // 'Aκόσμεκόσμε-äöü'</code>
10239
     *
10240
     * @see http://php.net/manual/en/function.mb-strrichr.php
10241
     *
10242
     * @param string $haystack      <p>The string from which to get the last occurrence of needle.</p>
10243
     * @param string $needle        <p>The string to find in haystack.</p>
10244
     * @param bool   $before_needle [optional] <p>
10245
     *                              Determines which portion of haystack
10246
     *                              this function returns.
10247
     *                              If set to true, it returns all of haystack
10248
     *                              from the beginning to the last occurrence of needle.
10249
     *                              If set to false, it returns all of haystack
10250
     *                              from the last occurrence of needle to the end,
10251
     *                              </p>
10252
     * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
10253
     * @param bool   $clean_utf8    [optional] <p>Remove non UTF-8 chars from the string.</p>
10254
     *
10255
     * @psalm-pure
10256
     *
10257
     * @return false|string
10258
     *                      <p>The portion of haystack or<br>false if needle is not found.</p>
10259
     */
10260
    public static function strrichr(
10261
        string $haystack,
10262
        string $needle,
10263
        bool $before_needle = false,
10264
        string $encoding = 'UTF-8',
10265
        bool $clean_utf8 = false
10266
    ) {
10267 3
        if ($haystack === '' || $needle === '') {
10268 2
            return false;
10269
        }
10270
10271 3
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
10272 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
10273
        }
10274
10275 3
        if ($clean_utf8) {
10276
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
10277
            // if invalid characters are found in $haystack before $needle
10278 2
            $needle = self::clean($needle);
10279 2
            $haystack = self::clean($haystack);
10280
        }
10281
10282
        //
10283
        // fallback via mbstring
10284
        //
10285
10286 3
        if (self::$SUPPORT['mbstring'] === true) {
10287 3
            if ($encoding === 'UTF-8') {
10288 3
                return \mb_strrichr($haystack, $needle, $before_needle);
10289
            }
10290
10291 2
            return \mb_strrichr($haystack, $needle, $before_needle, $encoding);
10292
        }
10293
10294
        //
10295
        // fallback via vanilla php
10296
        //
10297
10298
        $needle_tmp = self::substr($needle, 0, 1, $encoding);
10299
        if ($needle_tmp === false) {
10300
            return false;
10301
        }
10302
        $needle = $needle_tmp;
10303
10304
        $pos = self::strripos($haystack, $needle, 0, $encoding);
10305
        if ($pos === false) {
10306
            return false;
10307
        }
10308
10309
        if ($before_needle) {
10310
            return self::substr($haystack, 0, $pos, $encoding);
10311
        }
10312
10313
        return self::substr($haystack, $pos, null, $encoding);
10314
    }
10315
10316
    /**
10317
     * Find the position of the last occurrence of a substring in a string, case-insensitive.
10318
     *
10319
     * EXAMPLE: <code>UTF8::strripos('ABC-ÖÄÜ-中文空白-中文空白', '中'); // 13</code>
10320
     *
10321
     * @param string     $haystack   <p>The string to look in.</p>
10322
     * @param int|string $needle     <p>The string to look for.</p>
10323
     * @param int        $offset     [optional] <p>Number of characters to ignore in the beginning or end.</p>
10324
     * @param string     $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
10325
     * @param bool       $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
10326
     *
10327
     * @psalm-pure
10328
     *
10329
     * @return false|int
10330
     *                   <p>The <strong>(int)</strong> numeric position of the last occurrence of needle in the haystack
10331
     *                   string.<br>If needle is not found, it returns false.</p>
10332
     */
10333
    public static function strripos(
10334
        string $haystack,
10335
        $needle,
10336
        int $offset = 0,
10337
        string $encoding = 'UTF-8',
10338
        bool $clean_utf8 = false
10339
    ) {
10340 14
        if ($haystack === '') {
10341 3
            if (\PHP_VERSION_ID >= 80000) {
10342
                if ($needle === '') {
10343
                    return 0;
10344
                }
10345
            } else {
10346 3
                return false;
10347
            }
10348
        }
10349
10350
        // iconv and mbstring do not support integer $needle
10351 14
        if ((int) $needle === $needle && $needle >= 0) {
10352
            $needle = (string) self::chr($needle);
10353
        }
10354 14
        $needle = (string) $needle;
10355
10356 14
        if ($haystack === '') {
10357
            if (\PHP_VERSION_ID >= 80000 && $needle === '') {
10358
                return 0;
10359
            }
10360
10361
            return false;
10362
        }
10363
10364 14
        if ($needle === '' && \PHP_VERSION_ID < 80000) {
10365 3
            return false;
10366
        }
10367
10368 14
        if ($clean_utf8) {
10369
            // mb_strripos() && iconv_strripos() is not tolerant to invalid characters
10370 3
            $needle = self::clean($needle);
10371 3
            $haystack = self::clean($haystack);
10372
        }
10373
10374 14
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
10375 9
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
10376
        }
10377
10378
        //
10379
        // fallback via mbstrig
10380
        //
10381
10382 14
        if (self::$SUPPORT['mbstring'] === true) {
10383 14
            if ($encoding === 'UTF-8') {
10384 14
                return \mb_strripos($haystack, $needle, $offset);
10385
            }
10386
10387
            return \mb_strripos($haystack, $needle, $offset, $encoding);
10388
        }
10389
10390
        //
10391
        // fallback for binary || ascii only
10392
        //
10393
10394
        if (
10395
            $encoding === 'CP850'
10396
            ||
10397
            $encoding === 'ASCII'
10398
        ) {
10399
            return \strripos($haystack, $needle, $offset);
10400
        }
10401
10402
        if (
10403
            $encoding !== 'UTF-8'
10404
            &&
10405
            self::$SUPPORT['mbstring'] === false
10406
        ) {
10407
            /**
10408
             * @psalm-suppress ImpureFunctionCall - is is only a warning
10409
             */
10410
            \trigger_error('UTF8::strripos() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
10411
        }
10412
10413
        //
10414
        // fallback via intl
10415
        //
10416
10417
        if (
10418
            $encoding === 'UTF-8' // INFO: "grapheme_strripos()" can't handle other encodings
10419
            &&
10420
            $offset >= 0 // grapheme_strripos() can't handle negative offset
10421
            &&
10422
            self::$SUPPORT['intl'] === true
10423
        ) {
10424
            $return_tmp = \grapheme_strripos($haystack, $needle, $offset);
10425
            if ($return_tmp !== false) {
10426
                return $return_tmp;
10427
            }
10428
        }
10429
10430
        //
10431
        // fallback for ascii only
10432
        //
10433
10434
        if (ASCII::is_ascii($haystack . $needle)) {
10435
            return \strripos($haystack, $needle, $offset);
10436
        }
10437
10438
        //
10439
        // fallback via vanilla php
10440
        //
10441
10442
        $haystack = self::strtocasefold($haystack, true, false, $encoding);
10443
        $needle = self::strtocasefold($needle, true, false, $encoding);
10444
10445
        return self::strrpos($haystack, $needle, $offset, $encoding, $clean_utf8);
10446
    }
10447
10448
    /**
10449
     * Finds position of last occurrence of a string within another, case-insensitive.
10450
     *
10451
     * @param string $haystack <p>
10452
     *                         The string from which to get the position of the last occurrence
10453
     *                         of needle.
10454
     *                         </p>
10455
     * @param string $needle   <p>
10456
     *                         The string to find in haystack.
10457
     *                         </p>
10458
     * @param int    $offset   [optional] <p>
10459
     *                         The position in haystack
10460
     *                         to start searching.
10461
     *                         </p>
10462
     *
10463
     * @psalm-pure
10464
     *
10465
     * @return false|int
10466
     *                   <p>eturn the numeric position of the last occurrence of needle in the
10467
     *                   haystack string, or false if needle is not found.</p>
10468
     */
10469
    public static function strripos_in_byte(string $haystack, string $needle, int $offset = 0)
10470
    {
10471 2
        if ($haystack === '' || $needle === '') {
10472
            return false;
10473
        }
10474
10475 2
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
10476
            // "mb_" is available if overload is used, so use it ...
10477
            return \mb_strripos($haystack, $needle, $offset, 'CP850'); // 8-BIT
10478
        }
10479
10480 2
        return \strripos($haystack, $needle, $offset);
10481
    }
10482
10483
    /**
10484
     * Find the position of the last occurrence of a substring in a string.
10485
     *
10486
     * EXAMPLE: <code>UTF8::strrpos('ABC-ÖÄÜ-中文空白-中文空白', '中'); // 13</code>
10487
     *
10488
     * @see http://php.net/manual/en/function.mb-strrpos.php
10489
     *
10490
     * @param string     $haystack   <p>The string being checked, for the last occurrence of needle</p>
10491
     * @param int|string $needle     <p>The string to find in haystack.<br>Or a code point as int.</p>
10492
     * @param int        $offset     [optional] <p>May be specified to begin searching an arbitrary number of characters
10493
     *                               into the string. Negative values will stop searching at an arbitrary point prior to
10494
     *                               the end of the string.
10495
     *                               </p>
10496
     * @param string     $encoding   [optional] <p>Set the charset.</p>
10497
     * @param bool       $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
10498
     *
10499
     * @psalm-pure
10500
     *
10501
     * @return false|int
10502
     *                   <p>The <strong>(int)</strong> numeric position of the last occurrence of needle in the haystack
10503
     *                   string.<br>If needle is not found, it returns false.</p>
10504
     */
10505
    public static function strrpos(
10506
        string $haystack,
10507
        $needle,
10508
        int $offset = 0,
10509
        string $encoding = 'UTF-8',
10510
        bool $clean_utf8 = false
10511
    ) {
10512 35
        if ($haystack === '') {
10513 4
            if (\PHP_VERSION_ID >= 80000) {
10514
                if ($needle === '') {
10515
                    return 0;
10516
                }
10517
            } else {
10518 4
                return false;
10519
            }
10520
        }
10521
10522
        // iconv and mbstring do not support integer $needle
10523 34
        if ((int) $needle === $needle && $needle >= 0) {
10524 1
            $needle = (string) self::chr($needle);
10525
        }
10526 34
        $needle = (string) $needle;
10527
10528 34
        if ($haystack === '') {
10529
            if (\PHP_VERSION_ID >= 80000 && $needle === '') {
10530
                return 0;
10531
            }
10532
10533
            return false;
10534
        }
10535
10536 34
        if ($needle === '' && \PHP_VERSION_ID < 80000) {
10537 2
            return false;
10538
        }
10539
10540 34
        if ($clean_utf8) {
10541
            // mb_strrpos && iconv_strrpos is not tolerant to invalid characters
10542 4
            $needle = self::clean($needle);
10543 4
            $haystack = self::clean($haystack);
10544
        }
10545
10546 34
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
10547 8
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
10548
        }
10549
10550
        //
10551
        // fallback via mbstring
10552
        //
10553
10554 34
        if (self::$SUPPORT['mbstring'] === true) {
10555 34
            if ($encoding === 'UTF-8') {
10556 34
                return \mb_strrpos($haystack, $needle, $offset);
10557
            }
10558
10559 2
            return \mb_strrpos($haystack, $needle, $offset, $encoding);
10560
        }
10561
10562
        //
10563
        // fallback for binary || ascii only
10564
        //
10565
10566
        if (
10567
            $encoding === 'CP850'
10568
            ||
10569
            $encoding === 'ASCII'
10570
        ) {
10571
            return \strrpos($haystack, $needle, $offset);
10572
        }
10573
10574
        if (
10575
            $encoding !== 'UTF-8'
10576
            &&
10577
            self::$SUPPORT['mbstring'] === false
10578
        ) {
10579
            /**
10580
             * @psalm-suppress ImpureFunctionCall - is is only a warning
10581
             */
10582
            \trigger_error('UTF8::strrpos() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
10583
        }
10584
10585
        //
10586
        // fallback via intl
10587
        //
10588
10589
        if (
10590
            $offset >= 0 // grapheme_strrpos() can't handle negative offset
10591
            &&
10592
            $encoding === 'UTF-8' // INFO: "grapheme_strrpos()" can't handle other encodings
10593
            &&
10594
            self::$SUPPORT['intl'] === true
10595
        ) {
10596
            $return_tmp = \grapheme_strrpos($haystack, $needle, $offset);
10597
            if ($return_tmp !== false) {
10598
                return $return_tmp;
10599
            }
10600
        }
10601
10602
        //
10603
        // fallback for ascii only
10604
        //
10605
10606
        if (ASCII::is_ascii($haystack . $needle)) {
10607
            return \strrpos($haystack, $needle, $offset);
10608
        }
10609
10610
        //
10611
        // fallback via vanilla php
10612
        //
10613
10614
        $haystack_tmp = null;
10615
        if ($offset > 0) {
10616
            $haystack_tmp = self::substr($haystack, $offset);
10617
        } elseif ($offset < 0) {
10618
            $haystack_tmp = self::substr($haystack, 0, $offset);
10619
            $offset = 0;
10620
        }
10621
10622
        if ($haystack_tmp !== null) {
10623
            if ($haystack_tmp === false) {
10624
                $haystack_tmp = '';
10625
            }
10626
            $haystack = (string) $haystack_tmp;
10627
        }
10628
10629
        $pos = \strrpos($haystack, $needle);
10630
        if ($pos === false) {
10631
            return false;
10632
        }
10633
10634
        /** @var false|string $str_tmp - needed for PhpStan (stubs error) */
10635
        $str_tmp = \substr($haystack, 0, $pos);
10636
        if ($str_tmp === false) {
10637
            return false;
10638
        }
10639
10640
        return $offset + (int) self::strlen($str_tmp);
10641
    }
10642
10643
    /**
10644
     * Find the position of the last occurrence of a substring in a string.
10645
     *
10646
     * @param string $haystack <p>
10647
     *                         The string being checked, for the last occurrence
10648
     *                         of needle.
10649
     *                         </p>
10650
     * @param string $needle   <p>
10651
     *                         The string to find in haystack.
10652
     *                         </p>
10653
     * @param int    $offset   [optional] <p>May be specified to begin searching an arbitrary number of characters into
10654
     *                         the string. Negative values will stop searching at an arbitrary point
10655
     *                         prior to the end of the string.
10656
     *                         </p>
10657
     *
10658
     * @psalm-pure
10659
     *
10660
     * @return false|int
10661
     *                   <p>The numeric position of the last occurrence of needle in the
10662
     *                   haystack string. If needle is not found, it returns false.</p>
10663
     */
10664
    public static function strrpos_in_byte(string $haystack, string $needle, int $offset = 0)
10665
    {
10666 2
        if ($haystack === '' || $needle === '') {
10667
            return false;
10668
        }
10669
10670 2
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
10671
            // "mb_" is available if overload is used, so use it ...
10672
            return \mb_strrpos($haystack, $needle, $offset, 'CP850'); // 8-BIT
10673
        }
10674
10675 2
        return \strrpos($haystack, $needle, $offset);
10676
    }
10677
10678
    /**
10679
     * Finds the length of the initial segment of a string consisting entirely of characters contained within a given
10680
     * mask.
10681
     *
10682
     * EXAMPLE: <code>UTF8::strspn('iñtërnâtiônàlizætiøn', 'itñ'); // '3'</code>
10683
     *
10684
     * @param string   $str      <p>The input string.</p>
10685
     * @param string   $mask     <p>The mask of chars</p>
10686
     * @param int      $offset   [optional]
10687
     * @param int|null $length   [optional]
10688
     * @param string   $encoding [optional] <p>Set the charset.</p>
10689
     *
10690
     * @psalm-pure
10691
     *
10692
     * @return false|int
10693
     */
10694
    public static function strspn(
10695
        string $str,
10696
        string $mask,
10697
        int $offset = 0,
10698
        int $length = null,
10699
        string $encoding = 'UTF-8'
10700
    ) {
10701 10
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
10702
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
10703
        }
10704
10705 10
        if ($offset || $length !== null) {
10706 2
            if ($encoding === 'UTF-8') {
10707 2
                if ($length === null) {
10708
                    $str = (string) \mb_substr($str, $offset);
10709
                } else {
10710 2
                    $str = (string) \mb_substr($str, $offset, $length);
10711
                }
10712
            } else {
10713
                $str = (string) self::substr($str, $offset, $length, $encoding);
10714
            }
10715
        }
10716
10717 10
        if ($str === '' || $mask === '') {
10718 2
            return 0;
10719
        }
10720
10721 8
        $matches = [];
10722
10723 8
        return \preg_match('/^' . self::rxClass($mask) . '+/u', $str, $matches) ? (int) self::strlen($matches[0], $encoding) : 0;
10724
    }
10725
10726
    /**
10727
     * Returns part of haystack string from the first occurrence of needle to the end of haystack.
10728
     *
10729
     * EXAMPLE: <code>
10730
     * $str = 'iñtërnâtiônàlizætiøn';
10731
     * $search = 'nât';
10732
     *
10733
     * UTF8::strstr($str, $search)); // 'nâtiônàlizætiøn'
10734
     * UTF8::strstr($str, $search, true)); // 'iñtër'
10735
     * </code>
10736
     *
10737
     * @param string $haystack      <p>The input string. Must be valid UTF-8.</p>
10738
     * @param string $needle        <p>The string to look for. Must be valid UTF-8.</p>
10739
     * @param bool   $before_needle [optional] <p>
10740
     *                              If <b>TRUE</b>, strstr() returns the part of the
10741
     *                              haystack before the first occurrence of the needle (excluding the needle).
10742
     *                              </p>
10743
     * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
10744
     * @param bool   $clean_utf8    [optional] <p>Remove non UTF-8 chars from the string.</p>
10745
     *
10746
     * @psalm-pure
10747
     *
10748
     * @return false|string
10749
     *                      <p>A sub-string,<br>or <strong>false</strong> if needle is not found.</p>
10750
     */
10751
    public static function strstr(
10752
        string $haystack,
10753
        string $needle,
10754
        bool $before_needle = false,
10755
        string $encoding = 'UTF-8',
10756
        bool $clean_utf8 = false
10757
    ) {
10758 3
        if ($haystack === '') {
10759 2
            if (\PHP_VERSION_ID >= 80000 && $needle === '') {
10760
                return '';
10761
            }
10762
10763 2
            return false;
10764
        }
10765
10766 3
        if ($clean_utf8) {
10767
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
10768
            // if invalid characters are found in $haystack before $needle
10769
            $needle = self::clean($needle);
10770
            $haystack = self::clean($haystack);
10771
        }
10772
10773 3
        if ($needle === '') {
10774 1
            if (\PHP_VERSION_ID >= 80000) {
10775
                return $haystack;
10776
            }
10777
10778 1
            return false;
10779
        }
10780
10781 3
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
10782 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
10783
        }
10784
10785
        //
10786
        // fallback via mbstring
10787
        //
10788
10789 3
        if (self::$SUPPORT['mbstring'] === true) {
10790 3
            if ($encoding === 'UTF-8') {
10791 3
                return \mb_strstr($haystack, $needle, $before_needle);
10792
            }
10793
10794 2
            return \mb_strstr($haystack, $needle, $before_needle, $encoding);
10795
        }
10796
10797
        //
10798
        // fallback for binary || ascii only
10799
        //
10800
10801
        if (
10802
            $encoding === 'CP850'
10803
            ||
10804
            $encoding === 'ASCII'
10805
        ) {
10806
            return \strstr($haystack, $needle, $before_needle);
10807
        }
10808
10809
        if (
10810
            $encoding !== 'UTF-8'
10811
            &&
10812
            self::$SUPPORT['mbstring'] === false
10813
        ) {
10814
            /**
10815
             * @psalm-suppress ImpureFunctionCall - is is only a warning
10816
             */
10817
            \trigger_error('UTF8::strstr() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
10818
        }
10819
10820
        //
10821
        // fallback via intl
10822
        //
10823
10824
        if (
10825
            $encoding === 'UTF-8' // INFO: "grapheme_strstr()" can't handle other encodings
10826
            &&
10827
            self::$SUPPORT['intl'] === true
10828
        ) {
10829
            $return_tmp = \grapheme_strstr($haystack, $needle, $before_needle);
10830
            if ($return_tmp !== false) {
10831
                return $return_tmp;
10832
            }
10833
        }
10834
10835
        //
10836
        // fallback for ascii only
10837
        //
10838
10839
        if (ASCII::is_ascii($haystack . $needle)) {
10840
            return \strstr($haystack, $needle, $before_needle);
10841
        }
10842
10843
        //
10844
        // fallback via vanilla php
10845
        //
10846
10847
        \preg_match('/^(.*?)' . \preg_quote($needle, '/') . '/us', $haystack, $match);
10848
10849
        if (!isset($match[1])) {
10850
            return false;
10851
        }
10852
10853
        if ($before_needle) {
10854
            return $match[1];
10855
        }
10856
10857
        return self::substr($haystack, (int) self::strlen($match[1]));
10858
    }
10859
10860
    /**
10861
     * Finds first occurrence of a string within another.
10862
     *
10863
     * @param string $haystack      <p>
10864
     *                              The string from which to get the first occurrence
10865
     *                              of needle.
10866
     *                              </p>
10867
     * @param string $needle        <p>
10868
     *                              The string to find in haystack.
10869
     *                              </p>
10870
     * @param bool   $before_needle [optional] <p>
10871
     *                              Determines which portion of haystack
10872
     *                              this function returns.
10873
     *                              If set to true, it returns all of haystack
10874
     *                              from the beginning to the first occurrence of needle.
10875
     *                              If set to false, it returns all of haystack
10876
     *                              from the first occurrence of needle to the end,
10877
     *                              </p>
10878
     *
10879
     * @psalm-pure
10880
     *
10881
     * @return false|string
10882
     *                      <p>The portion of haystack,
10883
     *                      or false if needle is not found.</p>
10884
     */
10885
    public static function strstr_in_byte(
10886
        string $haystack,
10887
        string $needle,
10888
        bool $before_needle = false
10889
    ) {
10890 2
        if ($haystack === '' || $needle === '') {
10891
            return false;
10892
        }
10893
10894 2
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
10895
            // "mb_" is available if overload is used, so use it ...
10896
            return \mb_strstr($haystack, $needle, $before_needle, 'CP850'); // 8-BIT
10897
        }
10898
10899 2
        return \strstr($haystack, $needle, $before_needle);
10900
    }
10901
10902
    /**
10903
     * Unicode transformation for case-less matching.
10904
     *
10905
     * EXAMPLE: <code>UTF8::strtocasefold('ǰ◌̱'); // 'ǰ◌̱'</code>
10906
     *
10907
     * @see http://unicode.org/reports/tr21/tr21-5.html
10908
     *
10909
     * @param string      $str        <p>The input string.</p>
10910
     * @param bool        $full       [optional] <p>
10911
     *                                <b>true</b>, replace full case folding chars (default)<br>
10912
     *                                <b>false</b>, use only limited static array [UTF8::$COMMON_CASE_FOLD]
10913
     *                                </p>
10914
     * @param bool        $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
10915
     * @param string      $encoding   [optional] <p>Set the charset.</p>
10916
     * @param string|null $lang       [optional] <p>Set the language for special cases: az, el, lt, tr</p>
10917
     * @param bool        $lower      [optional] <p>Use lowercase string, otherwise use uppercase string. PS: uppercase
10918
     *                                is for some languages better ...</p>
10919
     *
10920
     * @psalm-pure
10921
     *
10922
     * @return string
10923
     */
10924
    public static function strtocasefold(
10925
        string $str,
10926
        bool $full = true,
10927
        bool $clean_utf8 = false,
10928
        string $encoding = 'UTF-8',
10929
        string $lang = null,
10930
        bool $lower = true
10931
    ): string {
10932 32
        if ($str === '') {
10933 5
            return '';
10934
        }
10935
10936 31
        if ($clean_utf8) {
10937
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
10938
            // if invalid characters are found in $haystack before $needle
10939 2
            $str = self::clean($str);
10940
        }
10941
10942 31
        $str = self::fixStrCaseHelper($str, $lower, $full);
10943
10944 31
        if ($lang === null && $encoding === 'UTF-8') {
10945 31
            if ($lower) {
10946 2
                return \mb_strtolower($str);
10947
            }
10948
10949 29
            return \mb_strtoupper($str);
10950
        }
10951
10952 2
        if ($lower) {
10953
            return self::strtolower($str, $encoding, false, $lang);
10954
        }
10955
10956 2
        return self::strtoupper($str, $encoding, false, $lang);
10957
    }
10958
10959
    /**
10960
     * Make a string lowercase.
10961
     *
10962
     * EXAMPLE: <code>UTF8::strtolower('DÉJÀ Σσς Iıİi'); // 'déjà σσς iıii'</code>
10963
     *
10964
     * @see http://php.net/manual/en/function.mb-strtolower.php
10965
     *
10966
     * @param string      $str                           <p>The string being lowercased.</p>
10967
     * @param string      $encoding                      [optional] <p>Set the charset for e.g. "mb_" function</p>
10968
     * @param bool        $clean_utf8                    [optional] <p>Remove non UTF-8 chars from the string.</p>
10969
     * @param string|null $lang                          [optional] <p>Set the language for special cases: az, el, lt,
10970
     *                                                   tr</p>
10971
     * @param bool        $try_to_keep_the_string_length [optional] <p>true === try to keep the string length: e.g. ẞ
10972
     *                                                   -> ß</p>
10973
     *
10974
     * @psalm-pure
10975
     *
10976
     * @return string
10977
     *                <p>String with all alphabetic characters converted to lowercase.</p>
10978
     */
10979
    public static function strtolower(
10980
        $str,
10981
        string $encoding = 'UTF-8',
10982
        bool $clean_utf8 = false,
10983
        string $lang = null,
10984
        bool $try_to_keep_the_string_length = false
10985
    ): string {
10986
        // init
10987 73
        $str = (string) $str;
10988
10989 73
        if ($str === '') {
10990 1
            return '';
10991
        }
10992
10993 72
        if ($clean_utf8) {
10994
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
10995
            // if invalid characters are found in $haystack before $needle
10996 2
            $str = self::clean($str);
10997
        }
10998
10999
        // hack for old php version or for the polyfill ...
11000 72
        if ($try_to_keep_the_string_length) {
11001
            $str = self::fixStrCaseHelper($str, true);
11002
        }
11003
11004 72
        if ($lang === null && $encoding === 'UTF-8') {
11005 13
            return \mb_strtolower($str);
11006
        }
11007
11008 61
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
11009
11010 61
        if ($lang !== null) {
11011 2
            if (self::$SUPPORT['intl'] === true) {
11012 2
                if (self::$INTL_TRANSLITERATOR_LIST === null) {
11013
                    self::$INTL_TRANSLITERATOR_LIST = self::getData('transliterator_list');
11014
                }
11015
11016 2
                $language_code = $lang . '-Lower';
11017 2
                if (!\in_array($language_code, self::$INTL_TRANSLITERATOR_LIST, true)) {
11018
                    /**
11019
                     * @psalm-suppress ImpureFunctionCall - is is only a warning
11020
                     */
11021
                    \trigger_error('UTF8::strtolower() cannot handle special language: ' . $lang . ' | supported: ' . \print_r(self::$INTL_TRANSLITERATOR_LIST, true), \E_USER_WARNING);
0 ignored issues
show
Bug introduced by
Are you sure print_r(self::INTL_TRANSLITERATOR_LIST, true) of type string|true can be used in concatenation? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

11021
                    \trigger_error('UTF8::strtolower() cannot handle special language: ' . $lang . ' | supported: ' . /** @scrutinizer ignore-type */ \print_r(self::$INTL_TRANSLITERATOR_LIST, true), \E_USER_WARNING);
Loading history...
11022
11023
                    $language_code = 'Any-Lower';
11024
                }
11025
11026 2
                return (string) \transliterator_transliterate($language_code, $str);
11027
            }
11028
11029
            /**
11030
             * @psalm-suppress ImpureFunctionCall - is is only a warning
11031
             */
11032
            \trigger_error('UTF8::strtolower() without intl cannot handle the "lang" parameter: ' . $lang, \E_USER_WARNING);
11033
        }
11034
11035
        // always fallback via symfony polyfill
11036 61
        return \mb_strtolower($str, $encoding);
11037
    }
11038
11039
    /**
11040
     * Make a string uppercase.
11041
     *
11042
     * EXAMPLE: <code>UTF8::strtoupper('Déjà Σσς Iıİi'); // 'DÉJÀ ΣΣΣ IIİI'</code>
11043
     *
11044
     * @see http://php.net/manual/en/function.mb-strtoupper.php
11045
     *
11046
     * @param string      $str                           <p>The string being uppercased.</p>
11047
     * @param string      $encoding                      [optional] <p>Set the charset.</p>
11048
     * @param bool        $clean_utf8                    [optional] <p>Remove non UTF-8 chars from the string.</p>
11049
     * @param string|null $lang                          [optional] <p>Set the language for special cases: az, el, lt,
11050
     *                                                   tr</p>
11051
     * @param bool        $try_to_keep_the_string_length [optional] <p>true === try to keep the string length: e.g. ẞ
11052
     *                                                   -> ß</p>
11053
     *
11054
     * @psalm-pure
11055
     *
11056
     * @return string
11057
     *                <p>String with all alphabetic characters converted to uppercase.</p>
11058
     */
11059
    public static function strtoupper(
11060
        $str,
11061
        string $encoding = 'UTF-8',
11062
        bool $clean_utf8 = false,
11063
        string $lang = null,
11064
        bool $try_to_keep_the_string_length = false
11065
    ): string {
11066
        // init
11067 17
        $str = (string) $str;
11068
11069 17
        if ($str === '') {
11070 1
            return '';
11071
        }
11072
11073 16
        if ($clean_utf8) {
11074
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
11075
            // if invalid characters are found in $haystack before $needle
11076 2
            $str = self::clean($str);
11077
        }
11078
11079
        // hack for old php version or for the polyfill ...
11080 16
        if ($try_to_keep_the_string_length) {
11081 2
            $str = self::fixStrCaseHelper($str);
11082
        }
11083
11084 16
        if ($lang === null && $encoding === 'UTF-8') {
11085 8
            return \mb_strtoupper($str);
11086
        }
11087
11088 10
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
11089
11090 10
        if ($lang !== null) {
11091 2
            if (self::$SUPPORT['intl'] === true) {
11092 2
                if (self::$INTL_TRANSLITERATOR_LIST === null) {
11093
                    self::$INTL_TRANSLITERATOR_LIST = self::getData('transliterator_list');
11094
                }
11095
11096 2
                $language_code = $lang . '-Upper';
11097 2
                if (!\in_array($language_code, self::$INTL_TRANSLITERATOR_LIST, true)) {
11098
                    /**
11099
                     * @psalm-suppress ImpureFunctionCall - is is only a warning
11100
                     */
11101
                    \trigger_error('UTF8::strtoupper() without intl for special language: ' . $lang, \E_USER_WARNING);
11102
11103
                    $language_code = 'Any-Upper';
11104
                }
11105
11106 2
                return (string) \transliterator_transliterate($language_code, $str);
11107
            }
11108
11109
            /**
11110
             * @psalm-suppress ImpureFunctionCall - is is only a warning
11111
             */
11112
            \trigger_error('UTF8::strtolower() without intl cannot handle the "lang"-parameter: ' . $lang, \E_USER_WARNING);
11113
        }
11114
11115
        // always fallback via symfony polyfill
11116 10
        return \mb_strtoupper($str, $encoding);
11117
    }
11118
11119
    /**
11120
     * Translate characters or replace sub-strings.
11121
     *
11122
     * EXAMPLE:
11123
     * <code>
11124
     * $array = [
11125
     *     'Hello'   => '○●◎',
11126
     *     '中文空白' => 'earth',
11127
     * ];
11128
     * UTF8::strtr('Hello 中文空白', $array); // '○●◎ earth'
11129
     * </code>
11130
     *
11131
     * @see http://php.net/manual/en/function.strtr.php
11132
     *
11133
     * @param string          $str  <p>The string being translated.</p>
11134
     * @param string|string[] $from <p>The string replacing from.</p>
11135
     * @param string|string[] $to   [optional] <p>The string being translated to to.</p>
11136
     *
11137
     * @psalm-pure
11138
     *
11139
     * @return string
11140
     *                <p>This function returns a copy of str, translating all occurrences of each character in "from"
11141
     *                to the corresponding character in "to".</p>
11142
     */
11143
    public static function strtr(string $str, $from, $to = ''): string
11144
    {
11145 2
        if ($str === '') {
11146
            return '';
11147
        }
11148
11149 2
        if ($from === $to) {
11150
            return $str;
11151
        }
11152
11153 2
        if ($to !== '') {
11154 2
            if (!\is_array($from)) {
11155 2
                $from = self::str_split($from);
11156
            }
11157
11158 2
            if (!\is_array($to)) {
11159 2
                $to = self::str_split($to);
11160
            }
11161
11162 2
            $count_from = \count($from);
11163 2
            $count_to = \count($to);
11164
11165 2
            if ($count_from !== $count_to) {
11166 2
                if ($count_from > $count_to) {
11167 2
                    $from = \array_slice($from, 0, $count_to);
11168 2
                } elseif ($count_from < $count_to) {
11169 2
                    $to = \array_slice($to, 0, $count_from);
11170
                }
11171
            }
11172
11173 2
            $from = \array_combine($from, $to);
11174 2
            if ($from === false) {
11175
                throw new \InvalidArgumentException('The number of elements for each array isn\'t equal or the arrays are empty: (from: ' . \print_r($from, true) . ' | to: ' . \print_r($to, true) . ')');
0 ignored issues
show
Bug introduced by
Are you sure print_r($from, true) of type string|true can be used in concatenation? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

11175
                throw new \InvalidArgumentException('The number of elements for each array isn\'t equal or the arrays are empty: (from: ' . /** @scrutinizer ignore-type */ \print_r($from, true) . ' | to: ' . \print_r($to, true) . ')');
Loading history...
Bug introduced by
Are you sure print_r($to, true) of type string|true can be used in concatenation? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

11175
                throw new \InvalidArgumentException('The number of elements for each array isn\'t equal or the arrays are empty: (from: ' . \print_r($from, true) . ' | to: ' . /** @scrutinizer ignore-type */ \print_r($to, true) . ')');
Loading history...
11176
            }
11177
        }
11178
11179 2
        if (\is_string($from)) {
11180 2
            return \str_replace($from, $to, $str);
0 ignored issues
show
Bug introduced by
It seems like $to can also be of type array<mixed,string[]>; however, parameter $replace of str_replace() does only seem to accept string|string[], maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

11180
            return \str_replace($from, /** @scrutinizer ignore-type */ $to, $str);
Loading history...
11181
        }
11182
11183 2
        return \strtr($str, $from);
11184
    }
11185
11186
    /**
11187
     * Return the width of a string.
11188
     *
11189
     * INFO: use UTF8::strlen() for the byte-length
11190
     *
11191
     * EXAMPLE: <code>UTF8::strwidth("Iñtërnâtiôn\xE9àlizætiøn")); // 21</code>
11192
     *
11193
     * @param string $str        <p>The input string.</p>
11194
     * @param string $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
11195
     * @param bool   $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
11196
     *
11197
     * @psalm-pure
11198
     *
11199
     * @return int
11200
     */
11201
    public static function strwidth(
11202
        string $str,
11203
        string $encoding = 'UTF-8',
11204
        bool $clean_utf8 = false
11205
    ): int {
11206 2
        if ($str === '') {
11207 2
            return 0;
11208
        }
11209
11210 2
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
11211 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
11212
        }
11213
11214 2
        if ($clean_utf8) {
11215
            // iconv and mbstring are not tolerant to invalid encoding
11216
            // further, their behaviour is inconsistent with that of PHP's substr
11217 2
            $str = self::clean($str);
11218
        }
11219
11220
        //
11221
        // fallback via mbstring
11222
        //
11223
11224 2
        if (self::$SUPPORT['mbstring'] === true) {
11225 2
            if ($encoding === 'UTF-8') {
11226 2
                return \mb_strwidth($str);
11227
            }
11228
11229
            return \mb_strwidth($str, $encoding);
11230
        }
11231
11232
        //
11233
        // fallback via vanilla php
11234
        //
11235
11236
        if ($encoding !== 'UTF-8') {
11237
            $str = self::encode('UTF-8', $str, false, $encoding);
11238
        }
11239
11240
        $wide = 0;
11241
        $str = (string) \preg_replace('/[\x{1100}-\x{115F}\x{2329}\x{232A}\x{2E80}-\x{303E}\x{3040}-\x{A4CF}\x{AC00}-\x{D7A3}\x{F900}-\x{FAFF}\x{FE10}-\x{FE19}\x{FE30}-\x{FE6F}\x{FF00}-\x{FF60}\x{FFE0}-\x{FFE6}\x{20000}-\x{2FFFD}\x{30000}-\x{3FFFD}]/u', '', $str, -1, $wide);
11242
11243
        return ($wide << 1) + (int) self::strlen($str);
11244
    }
11245
11246
    /**
11247
     * Get part of a string.
11248
     *
11249
     * EXAMPLE: <code>UTF8::substr('中文空白', 1, 2); // '文空'</code>
11250
     *
11251
     * @see http://php.net/manual/en/function.mb-substr.php
11252
     *
11253
     * @param string   $str        <p>The string being checked.</p>
11254
     * @param int      $offset     <p>The first position used in str.</p>
11255
     * @param int|null $length     [optional] <p>The maximum length of the returned string.</p>
11256
     * @param string   $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
11257
     * @param bool     $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
11258
     *
11259
     * @psalm-pure
11260
     *
11261
     * @return false|string
11262
     *                      The portion of <i>str</i> specified by the <i>offset</i> and
11263
     *                      <i>length</i> parameters.</p><p>If <i>str</i> is shorter than <i>offset</i>
11264
     *                      characters long, <b>FALSE</b> will be returned.
11265
     */
11266
    public static function substr(
11267
        string $str,
11268
        int $offset = 0,
11269
        int $length = null,
11270
        string $encoding = 'UTF-8',
11271
        bool $clean_utf8 = false
11272
    ) {
11273
        // empty string
11274 172
        if ($str === '' || $length === 0) {
11275 8
            return '';
11276
        }
11277
11278 168
        if ($clean_utf8) {
11279
            // iconv and mbstring are not tolerant to invalid encoding
11280
            // further, their behaviour is inconsistent with that of PHP's substr
11281 2
            $str = self::clean($str);
11282
        }
11283
11284
        // whole string
11285 168
        if (!$offset && $length === null) {
11286 7
            return $str;
11287
        }
11288
11289 163
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
11290 19
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
11291
        }
11292
11293
        //
11294
        // fallback via mbstring
11295
        //
11296
11297 163
        if (self::$SUPPORT['mbstring'] === true && $encoding === 'UTF-8') {
11298 161
            if ($length === null) {
11299 64
                return \mb_substr($str, $offset);
11300
            }
11301
11302 102
            return \mb_substr($str, $offset, $length);
11303
        }
11304
11305
        //
11306
        // fallback for binary || ascii only
11307
        //
11308
11309
        if (
11310 4
            $encoding === 'CP850'
11311
            ||
11312 4
            $encoding === 'ASCII'
11313
        ) {
11314
            if ($length === null) {
11315
                return \substr($str, $offset);
11316
            }
11317
11318
            return \substr($str, $offset, $length);
11319
        }
11320
11321
        // otherwise we need the string-length
11322 4
        $str_length = 0;
11323 4
        if ($offset || $length === null) {
11324 4
            $str_length = self::strlen($str, $encoding);
11325
        }
11326
11327
        // e.g.: invalid chars + mbstring not installed
11328 4
        if ($str_length === false) {
11329
            return false;
11330
        }
11331
11332
        // empty string
11333 4
        if ($offset === $str_length && !$length) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $length of type integer|null is loosely compared to false; this is ambiguous if the integer can be 0. You might want to explicitly use === null instead.

In PHP, under loose comparison (like ==, or !=, or switch conditions), values of different types might be equal.

For integer values, zero is a special case, in particular the following results might be unexpected:

0   == false // true
0   == null  // true
123 == false // false
123 == null  // false

// It is often better to use strict comparison
0 === false // false
0 === null  // false
Loading history...
11334
            return '';
11335
        }
11336
11337
        // impossible
11338 4
        if ($offset && $offset > $str_length) {
11339
            return '';
11340
        }
11341
11342 4
        $length = $length ?? $str_length;
11343
11344
        if (
11345 4
            $encoding !== 'UTF-8'
11346
            &&
11347 4
            self::$SUPPORT['mbstring'] === false
11348
        ) {
11349
            /**
11350
             * @psalm-suppress ImpureFunctionCall - is is only a warning
11351
             */
11352 2
            \trigger_error('UTF8::substr() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
11353
        }
11354
11355
        //
11356
        // fallback via intl
11357
        //
11358
11359
        if (
11360 4
            $encoding === 'UTF-8' // INFO: "grapheme_substr()" can't handle other encodings
11361
            &&
11362 4
            $offset >= 0 // grapheme_substr() can't handle negative offset
11363
            &&
11364 4
            self::$SUPPORT['intl'] === true
11365
        ) {
11366
            $return_tmp = \grapheme_substr($str, $offset, $length);
11367
            if ($return_tmp !== false) {
11368
                return $return_tmp;
11369
            }
11370
        }
11371
11372
        //
11373
        // fallback via iconv
11374
        //
11375
11376
        if (
11377 4
            $length >= 0 // "iconv_substr()" can't handle negative length
11378
            &&
11379 4
            self::$SUPPORT['iconv'] === true
11380
        ) {
11381
            $return_tmp = \iconv_substr($str, $offset, $length);
11382
            if ($return_tmp !== false) {
11383
                return $return_tmp;
11384
            }
11385
        }
11386
11387
        //
11388
        // fallback for ascii only
11389
        //
11390
11391 4
        if (ASCII::is_ascii($str)) {
11392
            return \substr($str, $offset, $length);
11393
        }
11394
11395
        //
11396
        // fallback via vanilla php
11397
        //
11398
11399
        // split to array, and remove invalid characters
11400
        // &&
11401
        // extract relevant part, and join to make sting again
11402 4
        return \implode('', \array_slice(self::str_split($str), $offset, $length));
11403
    }
11404
11405
    /**
11406
     * Binary-safe comparison of two strings from an offset, up to a length of characters.
11407
     *
11408
     * EXAMPLE: <code>
11409
     * UTF8::substr_compare("○●◎\r", '●◎', 0, 2); // -1
11410
     * UTF8::substr_compare("○●◎\r", '◎●', 1, 2); // 1
11411
     * UTF8::substr_compare("○●◎\r", '●◎', 1, 2); // 0
11412
     * </code>
11413
     *
11414
     * @param string   $str1               <p>The main string being compared.</p>
11415
     * @param string   $str2               <p>The secondary string being compared.</p>
11416
     * @param int      $offset             [optional] <p>The start position for the comparison. If negative, it starts
11417
     *                                     counting from the end of the string.</p>
11418
     * @param int|null $length             [optional] <p>The length of the comparison. The default value is the largest
11419
     *                                     of the length of the str compared to the length of main_str less the
11420
     *                                     offset.</p>
11421
     * @param bool     $case_insensitivity [optional] <p>If case_insensitivity is TRUE, comparison is case
11422
     *                                     insensitive.</p>
11423
     * @param string   $encoding           [optional] <p>Set the charset for e.g. "mb_" function</p>
11424
     *
11425
     * @psalm-pure
11426
     *
11427
     * @return int
11428
     *             <strong>&lt; 0</strong> if str1 is less than str2;<br>
11429
     *             <strong>&gt; 0</strong> if str1 is greater than str2,<br>
11430
     *             <strong>0</strong> if they are equal
11431
     */
11432
    public static function substr_compare(
11433
        string $str1,
11434
        string $str2,
11435
        int $offset = 0,
11436
        int $length = null,
11437
        bool $case_insensitivity = false,
11438
        string $encoding = 'UTF-8'
11439
    ): int {
11440
        if (
11441 2
            $offset !== 0
11442
            ||
11443 2
            $length !== null
11444
        ) {
11445 2
            if ($encoding === 'UTF-8') {
11446 2
                if ($length === null) {
11447 2
                    $str1 = (string) \mb_substr($str1, $offset);
11448
                } else {
11449 2
                    $str1 = (string) \mb_substr($str1, $offset, $length);
11450
                }
11451 2
                $str2 = (string) \mb_substr($str2, 0, (int) self::strlen($str1));
11452
            } else {
11453
                $encoding = self::normalize_encoding($encoding, 'UTF-8');
11454
11455
                $str1 = (string) self::substr($str1, $offset, $length, $encoding);
11456
                $str2 = (string) self::substr($str2, 0, (int) self::strlen($str1), $encoding);
11457
            }
11458
        }
11459
11460 2
        if ($case_insensitivity) {
11461 2
            return self::strcasecmp($str1, $str2, $encoding);
11462
        }
11463
11464 2
        return self::strcmp($str1, $str2);
11465
    }
11466
11467
    /**
11468
     * Count the number of substring occurrences.
11469
     *
11470
     * EXAMPLE: <code>UTF8::substr_count('中文空白', '文空', 1, 2); // 1</code>
11471
     *
11472
     * @see http://php.net/manual/en/function.substr-count.php
11473
     *
11474
     * @param string   $haystack   <p>The string to search in.</p>
11475
     * @param string   $needle     <p>The substring to search for.</p>
11476
     * @param int      $offset     [optional] <p>The offset where to start counting.</p>
11477
     * @param int|null $length     [optional] <p>
11478
     *                             The maximum length after the specified offset to search for the
11479
     *                             substring. It outputs a warning if the offset plus the length is
11480
     *                             greater than the haystack length.
11481
     *                             </p>
11482
     * @param string   $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
11483
     * @param bool     $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
11484
     *
11485
     * @psalm-pure
11486
     *
11487
     * @return false|int
11488
     *                   <p>This functions returns an integer or false if there isn't a string.</p>
11489
     */
11490
    public static function substr_count(
11491
        string $haystack,
11492
        string $needle,
11493
        int $offset = 0,
11494
        int $length = null,
11495
        string $encoding = 'UTF-8',
11496
        bool $clean_utf8 = false
11497
    ) {
11498 5
        if ($needle === '') {
11499 2
            return false;
11500
        }
11501
11502 5
        if ($haystack === '') {
11503 2
            if (\PHP_VERSION_ID >= 80000) {
11504
                return 0;
11505
            }
11506
11507 2
            return 0;
11508
        }
11509
11510 5
        if ($length === 0) {
11511 2
            return 0;
11512
        }
11513
11514 5
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
11515 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
11516
        }
11517
11518 5
        if ($clean_utf8) {
11519
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
11520
            // if invalid characters are found in $haystack before $needle
11521
            $needle = self::clean($needle);
11522
            $haystack = self::clean($haystack);
11523
        }
11524
11525 5
        if ($offset || $length > 0) {
11526 2
            if ($length === null) {
11527 2
                $length_tmp = self::strlen($haystack, $encoding);
11528 2
                if ($length_tmp === false) {
11529
                    return false;
11530
                }
11531 2
                $length = $length_tmp;
11532
            }
11533
11534 2
            if ($encoding === 'UTF-8') {
11535 2
                $haystack = (string) \mb_substr($haystack, $offset, $length);
11536
            } else {
11537 2
                $haystack = (string) \mb_substr($haystack, $offset, $length, $encoding);
11538
            }
11539
        }
11540
11541
        if (
11542 5
            $encoding !== 'UTF-8'
11543
            &&
11544 5
            self::$SUPPORT['mbstring'] === false
11545
        ) {
11546
            /**
11547
             * @psalm-suppress ImpureFunctionCall - is is only a warning
11548
             */
11549
            \trigger_error('UTF8::substr_count() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
11550
        }
11551
11552 5
        if (self::$SUPPORT['mbstring'] === true) {
11553 5
            if ($encoding === 'UTF-8') {
11554 5
                return \mb_substr_count($haystack, $needle);
11555
            }
11556
11557 2
            return \mb_substr_count($haystack, $needle, $encoding);
11558
        }
11559
11560
        \preg_match_all('/' . \preg_quote($needle, '/') . '/us', $haystack, $matches, \PREG_SET_ORDER);
11561
11562
        return \count($matches);
11563
    }
11564
11565
    /**
11566
     * Count the number of substring occurrences.
11567
     *
11568
     * @param string   $haystack <p>
11569
     *                           The string being checked.
11570
     *                           </p>
11571
     * @param string   $needle   <p>
11572
     *                           The string being found.
11573
     *                           </p>
11574
     * @param int      $offset   [optional] <p>
11575
     *                           The offset where to start counting
11576
     *                           </p>
11577
     * @param int|null $length   [optional] <p>
11578
     *                           The maximum length after the specified offset to search for the
11579
     *                           substring. It outputs a warning if the offset plus the length is
11580
     *                           greater than the haystack length.
11581
     *                           </p>
11582
     *
11583
     * @psalm-pure
11584
     *
11585
     * @return false|int
11586
     *                   <p>The number of times the
11587
     *                   needle substring occurs in the
11588
     *                   haystack string.</p>
11589
     */
11590
    public static function substr_count_in_byte(
11591
        string $haystack,
11592
        string $needle,
11593
        int $offset = 0,
11594
        int $length = null
11595
    ) {
11596 4
        if ($haystack === '' || $needle === '') {
11597 1
            return 0;
11598
        }
11599
11600
        if (
11601 3
            ($offset || $length !== null)
11602
            &&
11603 3
            self::$SUPPORT['mbstring_func_overload'] === true
11604
        ) {
11605
            if ($length === null) {
11606
                $length_tmp = self::strlen($haystack);
11607
                if ($length_tmp === false) {
11608
                    return false;
11609
                }
11610
                $length = $length_tmp;
11611
            }
11612
11613
            if (
11614
                (
11615
                    $length !== 0
11616
                    &&
11617
                    $offset !== 0
11618
                )
11619
                &&
11620
                ($length + $offset) <= 0
11621
                &&
11622
                \PHP_VERSION_ID < 71000 // output from "substr_count()" have changed in PHP 7.1
11623
            ) {
11624
                return false;
11625
            }
11626
11627
            /** @var false|string $haystack_tmp - needed for PhpStan (stubs error) */
11628
            $haystack_tmp = \substr($haystack, $offset, $length);
11629
            if ($haystack_tmp === false) {
11630
                $haystack_tmp = '';
11631
            }
11632
            $haystack = (string) $haystack_tmp;
11633
        }
11634
11635 3
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
11636
            // "mb_" is available if overload is used, so use it ...
11637
            return \mb_substr_count($haystack, $needle, 'CP850'); // 8-BIT
11638
        }
11639
11640 3
        if ($length === null) {
11641 3
            return \substr_count($haystack, $needle, $offset);
11642
        }
11643
11644
        return \substr_count($haystack, $needle, $offset, $length);
11645
    }
11646
11647
    /**
11648
     * Returns the number of occurrences of $substring in the given string.
11649
     * By default, the comparison is case-sensitive, but can be made insensitive
11650
     * by setting $case_sensitive to false.
11651
     *
11652
     * @param string $str            <p>The input string.</p>
11653
     * @param string $substring      <p>The substring to search for.</p>
11654
     * @param bool   $case_sensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
11655
     * @param string $encoding       [optional] <p>Set the charset for e.g. "mb_" function</p>
11656
     *
11657
     * @psalm-pure
11658
     *
11659
     * @return int
11660
     */
11661
    public static function substr_count_simple(
11662
        string $str,
11663
        string $substring,
11664
        bool $case_sensitive = true,
11665
        string $encoding = 'UTF-8'
11666
    ): int {
11667 15
        if ($str === '' || $substring === '') {
11668 2
            return 0;
11669
        }
11670
11671 13
        if ($encoding === 'UTF-8') {
11672 7
            if ($case_sensitive) {
11673
                return (int) \mb_substr_count($str, $substring);
11674
            }
11675
11676 7
            return (int) \mb_substr_count(
11677 7
                \mb_strtoupper($str),
11678 7
                \mb_strtoupper($substring)
11679
            );
11680
        }
11681
11682 6
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
11683
11684 6
        if ($case_sensitive) {
11685 3
            return (int) \mb_substr_count($str, $substring, $encoding);
11686
        }
11687
11688 3
        return (int) \mb_substr_count(
11689 3
            self::strtocasefold($str, true, false, $encoding, null, false),
11690 3
            self::strtocasefold($substring, true, false, $encoding, null, false),
11691 3
            $encoding
11692
        );
11693
    }
11694
11695
    /**
11696
     * Removes a prefix ($needle) from the beginning of the string ($haystack), case-insensitive.
11697
     *
11698
     * EXMAPLE: <code>
11699
     * UTF8::substr_ileft('ΚόσμεMiddleEnd', 'Κόσμε'); // 'MiddleEnd'
11700
     * UTF8::substr_ileft('ΚόσμεMiddleEnd', 'κόσμε'); // 'MiddleEnd'
11701
     * </code>
11702
     *
11703
     * @param string $haystack <p>The string to search in.</p>
11704
     * @param string $needle   <p>The substring to search for.</p>
11705
     *
11706
     * @psalm-pure
11707
     *
11708
     * @return string
11709
     *                <p>Return the sub-string.</p>
11710
     */
11711
    public static function substr_ileft(string $haystack, string $needle): string
11712
    {
11713 2
        if ($haystack === '') {
11714 2
            return '';
11715
        }
11716
11717 2
        if ($needle === '') {
11718 2
            return $haystack;
11719
        }
11720
11721 2
        if (self::str_istarts_with($haystack, $needle)) {
11722 2
            $haystack = (string) \mb_substr($haystack, (int) self::strlen($needle));
11723
        }
11724
11725 2
        return $haystack;
11726
    }
11727
11728
    /**
11729
     * Get part of a string process in bytes.
11730
     *
11731
     * @param string   $str    <p>The string being checked.</p>
11732
     * @param int      $offset <p>The first position used in str.</p>
11733
     * @param int|null $length [optional] <p>The maximum length of the returned string.</p>
11734
     *
11735
     * @psalm-pure
11736
     *
11737
     * @return false|string
11738
     *                      The portion of <i>str</i> specified by the <i>offset</i> and
11739
     *                      <i>length</i> parameters.</p><p>If <i>str</i> is shorter than <i>offset</i>
11740
     *                      characters long, <b>FALSE</b> will be returned.
11741
     */
11742
    public static function substr_in_byte(string $str, int $offset = 0, int $length = null)
11743
    {
11744
        // empty string
11745 1
        if ($str === '' || $length === 0) {
11746
            return '';
11747
        }
11748
11749
        // whole string
11750 1
        if (!$offset && $length === null) {
11751
            return $str;
11752
        }
11753
11754 1
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
11755
            // "mb_" is available if overload is used, so use it ...
11756
            return \mb_substr($str, $offset, $length, 'CP850'); // 8-BIT
11757
        }
11758
11759 1
        return \substr($str, $offset, $length ?? 2147483647);
11760
    }
11761
11762
    /**
11763
     * Removes a suffix ($needle) from the end of the string ($haystack), case-insensitive.
11764
     *
11765
     * EXAMPLE: <code>
11766
     * UTF8::substr_iright('BeginMiddleΚόσμε', 'Κόσμε'); // 'BeginMiddle'
11767
     * UTF8::substr_iright('BeginMiddleΚόσμε', 'κόσμε'); // 'BeginMiddle'
11768
     * </code>
11769
     *
11770
     * @param string $haystack <p>The string to search in.</p>
11771
     * @param string $needle   <p>The substring to search for.</p>
11772
     *
11773
     * @psalm-pure
11774
     *
11775
     * @return string
11776
     *                <p>Return the sub-string.<p>
11777
     */
11778
    public static function substr_iright(string $haystack, string $needle): string
11779
    {
11780 2
        if ($haystack === '') {
11781 2
            return '';
11782
        }
11783
11784 2
        if ($needle === '') {
11785 2
            return $haystack;
11786
        }
11787
11788 2
        if (self::str_iends_with($haystack, $needle)) {
11789 2
            $haystack = (string) \mb_substr($haystack, 0, (int) self::strlen($haystack) - (int) self::strlen($needle));
11790
        }
11791
11792 2
        return $haystack;
11793
    }
11794
11795
    /**
11796
     * Removes a prefix ($needle) from the beginning of the string ($haystack).
11797
     *
11798
     * EXAMPLE: <code>
11799
     * UTF8::substr_left('ΚόσμεMiddleEnd', 'Κόσμε'); // 'MiddleEnd'
11800
     * UTF8::substr_left('ΚόσμεMiddleEnd', 'κόσμε'); // 'ΚόσμεMiddleEnd'
11801
     * </code>
11802
     *
11803
     * @param string $haystack <p>The string to search in.</p>
11804
     * @param string $needle   <p>The substring to search for.</p>
11805
     *
11806
     * @psalm-pure
11807
     *
11808
     * @return string
11809
     *                <p>Return the sub-string.</p>
11810
     */
11811
    public static function substr_left(string $haystack, string $needle): string
11812
    {
11813 2
        if ($haystack === '') {
11814 2
            return '';
11815
        }
11816
11817 2
        if ($needle === '') {
11818 2
            return $haystack;
11819
        }
11820
11821 2
        if (self::str_starts_with($haystack, $needle)) {
11822 2
            $haystack = (string) \mb_substr($haystack, (int) self::strlen($needle));
11823
        }
11824
11825 2
        return $haystack;
11826
    }
11827
11828
    /**
11829
     * Replace text within a portion of a string.
11830
     *
11831
     * EXAMPLE: <code>UTF8::substr_replace(array('Iñtërnâtiônàlizætiøn', 'foo'), 'æ', 1); // array('Iæñtërnâtiônàlizætiøn', 'fæoo')</code>
11832
     *
11833
     * source: https://gist.github.com/stemar/8287074
11834
     *
11835
     * @param string|string[] $str         <p>The input string or an array of stings.</p>
11836
     * @param string|string[] $replacement <p>The replacement string or an array of stings.</p>
11837
     * @param int|int[]       $offset      <p>
11838
     *                                     If start is positive, the replacing will begin at the start'th offset
11839
     *                                     into string.
11840
     *                                     <br><br>
11841
     *                                     If start is negative, the replacing will begin at the start'th character
11842
     *                                     from the end of string.
11843
     *                                     </p>
11844
     * @param int|int[]|null  $length      [optional] <p>If given and is positive, it represents the length of the
11845
     *                                     portion of string which is to be replaced. If it is negative, it
11846
     *                                     represents the number of characters from the end of string at which to
11847
     *                                     stop replacing. If it is not given, then it will default to strlen(
11848
     *                                     string ); i.e. end the replacing at the end of string. Of course, if
11849
     *                                     length is zero then this function will have the effect of inserting
11850
     *                                     replacement into string at the given start offset.</p>
11851
     * @param string          $encoding    [optional] <p>Set the charset for e.g. "mb_" function</p>
11852
     *
11853
     * @psalm-pure
11854
     *
11855
     * @return string|string[]
11856
     *                         <p>The result string is returned. If string is an array then array is returned.</p>
11857
     *
11858
     * @template TSubstrReplace
11859
     * @phpstan-param TSubstrReplace $str
11860
     * @phpstan-return TSubstrReplace
11861
     */
11862
    public static function substr_replace(
11863
        $str,
11864
        $replacement,
11865
        $offset,
11866
        $length = null,
11867
        string $encoding = 'UTF-8'
11868
    ) {
11869 10
        if (\is_array($str)) {
11870 1
            $num = \count($str);
11871
11872
            // the replacement
11873 1
            if (\is_array($replacement)) {
11874 1
                $replacement = \array_slice($replacement, 0, $num);
11875
            } else {
11876 1
                $replacement = \array_pad([$replacement], $num, $replacement);
11877
            }
11878
11879
            // the offset
11880 1
            if (\is_array($offset)) {
11881 1
                $offset = \array_slice($offset, 0, $num);
11882 1
                foreach ($offset as &$value_tmp) {
11883 1
                    $value_tmp = (int) $value_tmp === $value_tmp ? $value_tmp : 0;
11884
                }
11885 1
                unset($value_tmp);
11886
            } else {
11887 1
                $offset = \array_pad([$offset], $num, $offset);
11888
            }
11889
11890
            // the length
11891 1
            if ($length === null) {
11892 1
                $length = \array_fill(0, $num, 0);
11893 1
            } elseif (\is_array($length)) {
11894 1
                $length = \array_slice($length, 0, $num);
11895 1
                foreach ($length as &$value_tmp_V2) {
11896 1
                    $value_tmp_V2 = (int) $value_tmp_V2 === $value_tmp_V2 ? $value_tmp_V2 : $num;
11897
                }
11898 1
                unset($value_tmp_V2);
11899
            } else {
11900 1
                $length = \array_pad([$length], $num, $length);
11901
            }
11902
11903
            // recursive call
11904
            /** @phpstan-ignore-next-line - phpstan currently can't handle recursive calls */
11905 1
            return \array_map([self::class, 'substr_replace'], $str, $replacement, $offset, $length);
11906
        }
11907
11908 10
        if (\is_array($replacement)) {
11909 1
            if ($replacement !== []) {
11910 1
                $replacement = $replacement[0];
11911
            } else {
11912 1
                $replacement = '';
11913
            }
11914
        }
11915
11916
        // init
11917 10
        $str = (string) $str;
11918 10
        $replacement = (string) $replacement;
11919
11920 10
        if (\is_array($length)) {
11921
            throw new \InvalidArgumentException('Parameter "$length" can only be an array, if "$str" is also an array.');
11922
        }
11923
11924 10
        if (\is_array($offset)) {
11925
            throw new \InvalidArgumentException('Parameter "$offset" can only be an array, if "$str" is also an array.');
11926
        }
11927
11928 10
        if ($str === '') {
11929 1
            return $replacement;
11930
        }
11931
11932 9
        if (self::$SUPPORT['mbstring'] === true) {
11933 9
            $string_length = (int) self::strlen($str, $encoding);
11934
11935 9
            if ($offset < 0) {
11936 1
                $offset = (int) \max(0, $string_length + $offset);
11937 9
            } elseif ($offset > $string_length) {
11938 1
                $offset = $string_length;
11939
            }
11940
11941 9
            if ($length !== null && $length < 0) {
11942 1
                $length = (int) \max(0, $string_length - $offset + $length);
11943 9
            } elseif ($length === null || $length > $string_length) {
11944 4
                $length = $string_length;
11945
            }
11946
11947 9
            if (($offset + $length) > $string_length) {
11948 4
                $length = $string_length - $offset;
11949
            }
11950
11951 9
            return ((string) \mb_substr($str, 0, $offset, $encoding)) .
11952 9
                   $replacement .
11953 9
                   ((string) \mb_substr($str, $offset + $length, $string_length - $offset - $length, $encoding));
11954
        }
11955
11956
        //
11957
        // fallback for ascii only
11958
        //
11959
11960
        if (ASCII::is_ascii($str)) {
11961
            return ($length === null) ?
11962
                \substr_replace($str, $replacement, $offset) :
11963
                \substr_replace($str, $replacement, $offset, $length);
11964
        }
11965
11966
        //
11967
        // fallback via vanilla php
11968
        //
11969
11970
        \preg_match_all('/./us', $str, $str_matches);
11971
        \preg_match_all('/./us', $replacement, $replacement_matches);
11972
11973
        if ($length === null) {
11974
            $length_tmp = self::strlen($str, $encoding);
11975
            if ($length_tmp === false) {
11976
                // e.g.: non mbstring support + invalid chars
11977
                return '';
11978
            }
11979
            $length = $length_tmp;
11980
        }
11981
11982
        \array_splice($str_matches[0], $offset, $length, $replacement_matches[0]);
11983
11984
        return \implode('', $str_matches[0]);
11985
    }
11986
11987
    /**
11988
     * Removes a suffix ($needle) from the end of the string ($haystack).
11989
     *
11990
     * EXAMPLE: <code>
11991
     * UTF8::substr_right('BeginMiddleΚόσμε', 'Κόσμε'); // 'BeginMiddle'
11992
     * UTF8::substr_right('BeginMiddleΚόσμε', 'κόσμε'); // 'BeginMiddleΚόσμε'
11993
     * </code>
11994
     *
11995
     * @param string $haystack <p>The string to search in.</p>
11996
     * @param string $needle   <p>The substring to search for.</p>
11997
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
11998
     *
11999
     * @psalm-pure
12000
     *
12001
     * @return string
12002
     *                <p>Return the sub-string.</p>
12003
     */
12004
    public static function substr_right(
12005
        string $haystack,
12006
        string $needle,
12007
        string $encoding = 'UTF-8'
12008
    ): string {
12009 2
        if ($haystack === '') {
12010 2
            return '';
12011
        }
12012
12013 2
        if ($needle === '') {
12014 2
            return $haystack;
12015
        }
12016
12017
        if (
12018 2
            $encoding === 'UTF-8'
12019
            &&
12020 2
            \substr($haystack, -\strlen($needle)) === $needle
12021
        ) {
12022 2
            return (string) \mb_substr($haystack, 0, (int) \mb_strlen($haystack) - (int) \mb_strlen($needle));
12023
        }
12024
12025 2
        if (\substr($haystack, -\strlen($needle)) === $needle) {
12026
            return (string) self::substr(
12027
                $haystack,
12028
                0,
12029
                (int) self::strlen($haystack, $encoding) - (int) self::strlen($needle, $encoding),
12030
                $encoding
12031
            );
12032
        }
12033
12034 2
        return $haystack;
12035
    }
12036
12037
    /**
12038
     * Returns a case swapped version of the string.
12039
     *
12040
     * EXAMPLE: <code>UTF8::swapCase('déJÀ σσς iıII'); // 'DÉjà ΣΣΣ IIii'</code>
12041
     *
12042
     * @param string $str        <p>The input string.</p>
12043
     * @param string $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
12044
     * @param bool   $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
12045
     *
12046
     * @psalm-pure
12047
     *
12048
     * @return string
12049
     *                <p>Each character's case swapped.</p>
12050
     */
12051
    public static function swapCase(string $str, string $encoding = 'UTF-8', bool $clean_utf8 = false): string
12052
    {
12053 6
        if ($str === '') {
12054 1
            return '';
12055
        }
12056
12057 6
        if ($clean_utf8) {
12058
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
12059
            // if invalid characters are found in $haystack before $needle
12060 2
            $str = self::clean($str);
12061
        }
12062
12063 6
        if ($encoding === 'UTF-8') {
12064 4
            return (string) (\mb_strtolower($str) ^ \mb_strtoupper($str) ^ $str);
12065
        }
12066
12067 4
        return (string) (self::strtolower($str, $encoding) ^ self::strtoupper($str, $encoding) ^ $str);
12068
    }
12069
12070
    /**
12071
     * Checks whether symfony-polyfills are used.
12072
     *
12073
     * @psalm-pure
12074
     *
12075
     * @return bool
12076
     *              <p><strong>true</strong> if in use, <strong>false</strong> otherwise</p>
12077
     *
12078
     * @internal <p>Please do not use it anymore, we will make is private in next major version.</p>
12079
     */
12080
    public static function symfony_polyfill_used(): bool
12081
    {
12082
        // init
12083
        $return = false;
12084
12085
        $return_tmp = \extension_loaded('mbstring');
12086
        if (!$return_tmp && \function_exists('mb_strlen')) {
12087
            $return = true;
12088
        }
12089
12090
        $return_tmp = \extension_loaded('iconv');
12091
        if (!$return_tmp && \function_exists('iconv')) {
12092
            $return = true;
12093
        }
12094
12095
        return $return;
12096
    }
12097
12098
    /**
12099
     * @param string $str
12100
     * @param int    $tab_length
12101
     *
12102
     * @psalm-pure
12103
     *
12104
     * @return string
12105
     */
12106
    public static function tabs_to_spaces(string $str, int $tab_length = 4): string
12107
    {
12108 6
        if ($tab_length === 4) {
12109 3
            $spaces = '    ';
12110 3
        } elseif ($tab_length === 2) {
12111 1
            $spaces = '  ';
12112
        } else {
12113 2
            $spaces = \str_repeat(' ', $tab_length);
12114
        }
12115
12116 6
        return \str_replace("\t", $spaces, $str);
12117
    }
12118
12119
    /**
12120
     * Converts the first character of each word in the string to uppercase
12121
     * and all other chars to lowercase.
12122
     *
12123
     * @param string      $str                           <p>The input string.</p>
12124
     * @param string      $encoding                      [optional] <p>Set the charset for e.g. "mb_" function</p>
12125
     * @param bool        $clean_utf8                    [optional] <p>Remove non UTF-8 chars from the string.</p>
12126
     * @param string|null $lang                          [optional] <p>Set the language for special cases: az, el, lt,
12127
     *                                                   tr</p>
12128
     * @param bool        $try_to_keep_the_string_length [optional] <p>true === try to keep the string length: e.g. ẞ
12129
     *                                                   -> ß</p>
12130
     *
12131
     * @psalm-pure
12132
     *
12133
     * @return string
12134
     *                <p>A string with all characters of $str being title-cased.</p>
12135
     */
12136
    public static function titlecase(
12137
        string $str,
12138
        string $encoding = 'UTF-8',
12139
        bool $clean_utf8 = false,
12140
        string $lang = null,
12141
        bool $try_to_keep_the_string_length = false
12142
    ): string {
12143 5
        if ($clean_utf8) {
12144
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
12145
            // if invalid characters are found in $haystack before $needle
12146
            $str = self::clean($str);
12147
        }
12148
12149
        if (
12150 5
            $lang === null
12151
            &&
12152 5
            !$try_to_keep_the_string_length
12153
        ) {
12154 5
            if ($encoding === 'UTF-8') {
12155 3
                return \mb_convert_case($str, \MB_CASE_TITLE);
12156
            }
12157
12158 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
12159
12160 2
            return \mb_convert_case($str, \MB_CASE_TITLE, $encoding);
12161
        }
12162
12163
        return self::str_titleize(
12164
            $str,
12165
            null,
12166
            $encoding,
12167
            false,
12168
            $lang,
12169
            $try_to_keep_the_string_length,
12170
            false
12171
        );
12172
    }
12173
12174
    /**
12175
     * Convert a string into ASCII.
12176
     *
12177
     * EXAMPLE: <code>UTF8::to_ascii('déjà σσς iıii'); // 'deja sss iiii'</code>
12178
     *
12179
     * @param string $str     <p>The input string.</p>
12180
     * @param string $unknown [optional] <p>Character use if character unknown. (default is ?)</p>
12181
     * @param bool   $strict  [optional] <p>Use "transliterator_transliterate()" from PHP-Intl | WARNING: bad
12182
     *                        performance</p>
12183
     *
12184
     * @psalm-pure
12185
     *
12186
     * @return string
12187
     */
12188
    public static function to_ascii(
12189
        string $str,
12190
        string $unknown = '?',
12191
        bool $strict = false
12192
    ): string {
12193 37
        return ASCII::to_transliterate($str, $unknown, $strict);
12194
    }
12195
12196
    /**
12197
     * @param bool|float|int|string $str
12198
     *
12199
     * @psalm-pure
12200
     *
12201
     * @return bool
12202
     */
12203
    public static function to_boolean($str): bool
12204
    {
12205
        // init
12206 25
        $str = (string) $str;
12207
12208 25
        if ($str === '') {
12209 2
            return false;
12210
        }
12211
12212
        // Info: http://php.net/manual/en/filter.filters.validate.php
12213
        $map = [
12214 23
            'true'  => true,
12215
            '1'     => true,
12216
            'on'    => true,
12217
            'yes'   => true,
12218
            'false' => false,
12219
            '0'     => false,
12220
            'off'   => false,
12221
            'no'    => false,
12222
        ];
12223
12224 23
        if (isset($map[$str])) {
12225 13
            return $map[$str];
12226
        }
12227
12228 10
        $key = \strtolower($str);
12229 10
        if (isset($map[$key])) {
12230 2
            return $map[$key];
12231
        }
12232
12233 8
        if (\is_numeric($str)) {
12234 6
            return ((float) $str) > 0;
12235
        }
12236
12237 2
        return (bool) \trim($str);
12238
    }
12239
12240
    /**
12241
     * Convert given string to safe filename (and keep string case).
12242
     *
12243
     * @param string $str
12244
     * @param bool   $use_transliterate No transliteration, conversion etc. is done by default - unsafe characters are
12245
     *                                  simply replaced with hyphen.
12246
     * @param string $fallback_char
12247
     *
12248
     * @psalm-pure
12249
     *
12250
     * @return string
12251
     */
12252
    public static function to_filename(
12253
        string $str,
12254
        bool $use_transliterate = false,
12255
        string $fallback_char = '-'
12256
    ): string {
12257 1
        return ASCII::to_filename(
12258 1
            $str,
12259 1
            $use_transliterate,
12260 1
            $fallback_char
12261
        );
12262
    }
12263
12264
    /**
12265
     * Convert a string into "ISO-8859"-encoding (Latin-1).
12266
     *
12267
     * EXAMPLE: <code>UTF8::to_utf8(UTF8::to_iso8859('  -ABC-中文空白-  ')); // '  -ABC-????-  '</code>
12268
     *
12269
     * @param string|string[] $str
12270
     *
12271
     * @psalm-pure
12272
     *
12273
     * @return string|string[]
12274
     *
12275
     * @template TToIso8859
12276
     * @phpstan-param TToIso8859 $str
12277
     * @phpstan-return TToIso8859
12278
     */
12279
    public static function to_iso8859($str)
12280
    {
12281 8
        if (\is_array($str)) {
12282 2
            foreach ($str as &$v) {
12283 2
                $v = self::to_iso8859($v);
12284
            }
12285
12286 2
            return $str;
12287
        }
12288
12289 8
        $str = (string) $str;
12290 8
        if ($str === '') {
12291 2
            return '';
12292
        }
12293
12294 8
        return self::utf8_decode($str);
12295
    }
12296
12297
    /**
12298
     * This function leaves UTF-8 characters alone, while converting almost all non-UTF8 to UTF8.
12299
     *
12300
     * <ul>
12301
     * <li>It decode UTF-8 codepoints and Unicode escape sequences.</li>
12302
     * <li>It assumes that the encoding of the original string is either WINDOWS-1252 or ISO-8859.</li>
12303
     * <li>WARNING: It does not remove invalid UTF-8 characters, so you maybe need to use "UTF8::clean()" for this
12304
     * case.</li>
12305
     * </ul>
12306
     *
12307
     * EXAMPLE: <code>UTF8::to_utf8(["\u0063\u0061\u0074"]); // array('cat')</code>
12308
     *
12309
     * @param string|string[] $str                        <p>Any string or array of strings.</p>
12310
     * @param bool            $decode_html_entity_to_utf8 <p>Set to true, if you need to decode html-entities.</p>
12311
     *
12312
     * @psalm-pure
12313
     *
12314
     * @return string|string[]
12315
     *                         <p>The UTF-8 encoded string</p>
12316
     *
12317
     * @template TToUtf8
12318
     * @phpstan-param TToUtf8 $str
12319
     * @phpstan-return TToUtf8
12320
     */
12321
    public static function to_utf8($str, bool $decode_html_entity_to_utf8 = false)
12322
    {
12323 42
        if (\is_array($str)) {
12324 4
            foreach ($str as &$v) {
12325 4
                $v = self::to_utf8_string($v, $decode_html_entity_to_utf8);
12326
            }
12327
12328
            /** @phpstan-var TToUtf8 $str */
12329 4
            return $str;
12330
        }
12331
12332
        /** @phpstan-var TToUtf8 $str */
12333 42
        $str = self::to_utf8_string($str, $decode_html_entity_to_utf8);
12334
12335 42
        return $str;
12336
    }
12337
12338
    /**
12339
     * This function leaves UTF-8 characters alone, while converting almost all non-UTF8 to UTF8.
12340
     *
12341
     * <ul>
12342
     * <li>It decode UTF-8 codepoints and Unicode escape sequences.</li>
12343
     * <li>It assumes that the encoding of the original string is either WINDOWS-1252 or ISO-8859.</li>
12344
     * <li>WARNING: It does not remove invalid UTF-8 characters, so you maybe need to use "UTF8::clean()" for this
12345
     * case.</li>
12346
     * </ul>
12347
     *
12348
     * EXAMPLE: <code>UTF8::to_utf8_string("\u0063\u0061\u0074"); // 'cat'</code>
12349
     *
12350
     * @param string $str                        <p>Any string.</p>
12351
     * @param bool   $decode_html_entity_to_utf8 <p>Set to true, if you need to decode html-entities.</p>
12352
     *
12353
     * @psalm-pure
12354
     *
12355
     * @return string
12356
     *                <p>The UTF-8 encoded string</p>
12357
     */
12358
    public static function to_utf8_string(string $str, bool $decode_html_entity_to_utf8 = false): string
12359
    {
12360 42
        if ($str === '') {
12361 7
            return $str;
12362
        }
12363
12364 42
        $max = \strlen($str);
12365 42
        $buf = '';
12366
12367 42
        for ($i = 0; $i < $max; ++$i) {
12368 42
            $c1 = $str[$i];
12369
12370 42
            if ($c1 >= "\xC0") { // should be converted to UTF8, if it's not UTF8 already
12371
12372 38
                if ($c1 <= "\xDF") { // looks like 2 bytes UTF8
12373
12374 35
                    $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
12375
12376 35
                    if ($c2 >= "\x80" && $c2 <= "\xBF") { // yeah, almost sure it's UTF8 already
12377 21
                        $buf .= $c1 . $c2;
12378 21
                        ++$i;
12379
                    } else { // not valid UTF8 - convert it
12380 35
                        $buf .= self::to_utf8_convert_helper($c1);
12381
                    }
12382 35
                } elseif ($c1 >= "\xE0" && $c1 <= "\xEF") { // looks like 3 bytes UTF8
12383
12384 34
                    $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
12385 34
                    $c3 = $i + 2 >= $max ? "\x00" : $str[$i + 2];
12386
12387 34
                    if ($c2 >= "\x80" && $c2 <= "\xBF" && $c3 >= "\x80" && $c3 <= "\xBF") { // yeah, almost sure it's UTF8 already
12388 17
                        $buf .= $c1 . $c2 . $c3;
12389 17
                        $i += 2;
12390
                    } else { // not valid UTF8 - convert it
12391 34
                        $buf .= self::to_utf8_convert_helper($c1);
12392
                    }
12393 28
                } elseif ($c1 >= "\xF0" && $c1 <= "\xF7") { // looks like 4 bytes UTF8
12394
12395 28
                    $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
12396 28
                    $c3 = $i + 2 >= $max ? "\x00" : $str[$i + 2];
12397 28
                    $c4 = $i + 3 >= $max ? "\x00" : $str[$i + 3];
12398
12399 28
                    if ($c2 >= "\x80" && $c2 <= "\xBF" && $c3 >= "\x80" && $c3 <= "\xBF" && $c4 >= "\x80" && $c4 <= "\xBF") { // yeah, almost sure it's UTF8 already
12400 10
                        $buf .= $c1 . $c2 . $c3 . $c4;
12401 10
                        $i += 3;
12402
                    } else { // not valid UTF8 - convert it
12403 28
                        $buf .= self::to_utf8_convert_helper($c1);
12404
                    }
12405
                } else { // doesn't look like UTF8, but should be converted
12406
12407 38
                    $buf .= self::to_utf8_convert_helper($c1);
12408
                }
12409 40
            } elseif (($c1 & "\xC0") === "\x80") { // needs conversion
12410
12411 4
                $buf .= self::to_utf8_convert_helper($c1);
12412
            } else { // it doesn't need conversion
12413
12414 40
                $buf .= $c1;
12415
            }
12416
        }
12417
12418
        // decode unicode escape sequences + unicode surrogate pairs
12419 42
        $buf = \preg_replace_callback(
12420 42
            '/\\\\u([dD][89abAB][0-9a-fA-F]{2})\\\\u([dD][cdefCDEF][\da-fA-F]{2})|\\\\u([0-9a-fA-F]{4})/',
12421
            /**
12422
             * @param array $matches
12423
             *
12424
             * @psalm-pure
12425
             *
12426
             * @return string
12427
             */
12428
            static function (array $matches): string {
12429 13
                if (isset($matches[3])) {
12430 13
                    $cp = (int) \hexdec($matches[3]);
12431
                } else {
12432
                    // http://unicode.org/faq/utf_bom.html#utf16-4
12433 1
                    $cp = ((int) \hexdec($matches[1]) << 10)
12434 1
                          + (int) \hexdec($matches[2])
12435 1
                          + 0x10000
12436 1
                          - (0xD800 << 10)
12437 1
                          - 0xDC00;
12438
                }
12439
12440
                // https://github.com/php/php-src/blob/php-7.3.2/ext/standard/html.c#L471
12441
                //
12442
                // php_utf32_utf8(unsigned char *buf, unsigned k)
12443
12444 13
                if ($cp < 0x80) {
12445 8
                    return (string) self::chr($cp);
12446
                }
12447
12448 10
                if ($cp < 0xA0) {
12449
                    /** @noinspection UnnecessaryCastingInspection */
12450
                    return (string) self::chr(0xC0 | $cp >> 6) . (string) self::chr(0x80 | $cp & 0x3F);
12451
                }
12452
12453 10
                return self::decimal_to_chr($cp);
12454 42
            },
12455 42
            $buf
12456
        );
12457
12458 42
        if ($buf === null) {
12459
            return '';
12460
        }
12461
12462
        // decode UTF-8 codepoints
12463 42
        if ($decode_html_entity_to_utf8) {
12464 3
            $buf = self::html_entity_decode($buf);
12465
        }
12466
12467 42
        return $buf;
12468
    }
12469
12470
    /**
12471
     * Returns the given string as an integer, or null if the string isn't numeric.
12472
     *
12473
     * @param string $str
12474
     *
12475
     * @psalm-pure
12476
     *
12477
     * @return int|null
12478
     *                  <p>null if the string isn't numeric</p>
12479
     */
12480
    public static function to_int(string $str)
12481
    {
12482 1
        if (\is_numeric($str)) {
12483 1
            return (int) $str;
12484
        }
12485
12486 1
        return null;
12487
    }
12488
12489
    /**
12490
     * Returns the given input as string, or null if the input isn't int|float|string
12491
     * and do not implement the "__toString()" method.
12492
     *
12493
     * @param float|int|object|string|null $input
12494
     *
12495
     * @psalm-pure
12496
     *
12497
     * @return string|null
12498
     *                     <p>null if the input isn't int|float|string and has no "__toString()" method</p>
12499
     */
12500
    public static function to_string($input)
12501
    {
12502 1
        if ($input === null) {
12503
            return null;
12504
        }
12505
12506
        /** @var string $input_type - hack for psalm */
12507 1
        $input_type = \gettype($input);
12508
12509
        if (
12510 1
            $input_type === 'string'
12511
            ||
12512 1
            $input_type === 'integer'
12513
            ||
12514 1
            $input_type === 'float'
12515
            ||
12516 1
            $input_type === 'double'
12517
        ) {
12518 1
            return (string) $input;
12519
        }
12520
12521
        /** @phpstan-ignore-next-line - "gettype": FP? */
12522 1
        if ($input_type === 'object' && \method_exists($input, '__toString')) {
12523 1
            return (string) $input;
12524
        }
12525
12526 1
        return null;
12527
    }
12528
12529
    /**
12530
     * Strip whitespace or other characters from the beginning and end of a UTF-8 string.
12531
     *
12532
     * INFO: This is slower then "trim()"
12533
     *
12534
     * We can only use the original-function, if we use <= 7-Bit in the string / chars
12535
     * but the check for ASCII (7-Bit) cost more time, then we can safe here.
12536
     *
12537
     * EXAMPLE: <code>UTF8::trim('   -ABC-中文空白-  '); // '-ABC-中文空白-'</code>
12538
     *
12539
     * @param string      $str   <p>The string to be trimmed</p>
12540
     * @param string|null $chars [optional] <p>Optional characters to be stripped</p>
12541
     *
12542
     * @psalm-pure
12543
     *
12544
     * @return string
12545
     *                <p>The trimmed string.</p>
12546
     */
12547
    public static function trim(string $str = '', string $chars = null): string
12548
    {
12549 57
        if ($str === '') {
12550 9
            return '';
12551
        }
12552
12553 50
        if (self::$SUPPORT['mbstring'] === true) {
12554 50
            if ($chars !== null) {
12555
                /** @noinspection PregQuoteUsageInspection */
12556 28
                $chars = \preg_quote($chars);
12557 28
                $pattern = "^[${chars}]+|[${chars}]+\$";
12558
            } else {
12559 22
                $pattern = '^[\\s]+|[\\s]+$';
12560
            }
12561
12562 50
            return (string) \mb_ereg_replace($pattern, '', $str);
12563
        }
12564
12565 8
        if ($chars !== null) {
12566
            $chars = \preg_quote($chars, '/');
12567
            $pattern = "^[${chars}]+|[${chars}]+\$";
12568
        } else {
12569 8
            $pattern = '^[\\s]+|[\\s]+$';
12570
        }
12571
12572 8
        return self::regex_replace($str, $pattern, '');
12573
    }
12574
12575
    /**
12576
     * Makes string's first char uppercase.
12577
     *
12578
     * EXAMPLE: <code>UTF8::ucfirst('ñtërnâtiônàlizætiøn foo'); // 'Ñtërnâtiônàlizætiøn foo'</code>
12579
     *
12580
     * @param string      $str                           <p>The input string.</p>
12581
     * @param string      $encoding                      [optional] <p>Set the charset for e.g. "mb_" function</p>
12582
     * @param bool        $clean_utf8                    [optional] <p>Remove non UTF-8 chars from the string.</p>
12583
     * @param string|null $lang                          [optional] <p>Set the language for special cases: az, el, lt,
12584
     *                                                   tr</p>
12585
     * @param bool        $try_to_keep_the_string_length [optional] <p>true === try to keep the string length: e.g. ẞ
12586
     *                                                   -> ß</p>
12587
     *
12588
     * @psalm-pure
12589
     *
12590
     * @return string
12591
     *                <p>The resulting string with with char uppercase.</p>
12592
     */
12593
    public static function ucfirst(
12594
        string $str,
12595
        string $encoding = 'UTF-8',
12596
        bool $clean_utf8 = false,
12597
        string $lang = null,
12598
        bool $try_to_keep_the_string_length = false
12599
    ): string {
12600 69
        if ($str === '') {
12601 3
            return '';
12602
        }
12603
12604 68
        if ($clean_utf8) {
12605
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
12606
            // if invalid characters are found in $haystack before $needle
12607 1
            $str = self::clean($str);
12608
        }
12609
12610 68
        $use_mb_functions = $lang === null && !$try_to_keep_the_string_length;
12611
12612 68
        if ($encoding === 'UTF-8') {
12613 22
            $str_part_two = (string) \mb_substr($str, 1);
12614
12615 22
            if ($use_mb_functions) {
12616 22
                $str_part_one = \mb_strtoupper(
12617 22
                    (string) \mb_substr($str, 0, 1)
12618
                );
12619
            } else {
12620
                $str_part_one = self::strtoupper(
12621
                    (string) \mb_substr($str, 0, 1),
12622
                    $encoding,
12623
                    false,
12624
                    $lang,
12625 22
                    $try_to_keep_the_string_length
12626
                );
12627
            }
12628
        } else {
12629 47
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
12630
12631 47
            $str_part_two = (string) self::substr($str, 1, null, $encoding);
12632
12633 47
            if ($use_mb_functions) {
12634 47
                $str_part_one = \mb_strtoupper(
12635 47
                    (string) \mb_substr($str, 0, 1, $encoding),
12636 47
                    $encoding
12637
                );
12638
            } else {
12639
                $str_part_one = self::strtoupper(
12640
                    (string) self::substr($str, 0, 1, $encoding),
12641
                    $encoding,
12642
                    false,
12643
                    $lang,
12644
                    $try_to_keep_the_string_length
12645
                );
12646
            }
12647
        }
12648
12649 68
        return $str_part_one . $str_part_two;
12650
    }
12651
12652
    /**
12653
     * Uppercase for all words in the string.
12654
     *
12655
     * EXAMPLE: <code>UTF8::ucwords('iñt ërn âTi ônà liz æti øn'); // 'Iñt Ërn ÂTi Ônà Liz Æti Øn'</code>
12656
     *
12657
     * @param string   $str        <p>The input string.</p>
12658
     * @param string[] $exceptions [optional] <p>Exclusion for some words.</p>
12659
     * @param string   $char_list  [optional] <p>Additional chars that contains to words and do not start a new
12660
     *                             word.</p>
12661
     * @param string   $encoding   [optional] <p>Set the charset.</p>
12662
     * @param bool     $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
12663
     *
12664
     * @psalm-pure
12665
     *
12666
     * @return string
12667
     */
12668
    public static function ucwords(
12669
        string $str,
12670
        array $exceptions = [],
12671
        string $char_list = '',
12672
        string $encoding = 'UTF-8',
12673
        bool $clean_utf8 = false
12674
    ): string {
12675 9
        if (!$str) {
12676 2
            return '';
12677
        }
12678
12679
        // INFO: mb_convert_case($str, MB_CASE_TITLE);
12680
        // -> MB_CASE_TITLE didn't only uppercase the first letter, it also lowercase all other letters
12681
12682 8
        if ($clean_utf8) {
12683
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
12684
            // if invalid characters are found in $haystack before $needle
12685 1
            $str = self::clean($str);
12686
        }
12687
12688 8
        $use_php_default_functions = !(bool) ($char_list . \implode('', $exceptions));
12689
12690
        if (
12691 8
            $use_php_default_functions
12692
            &&
12693 8
            ASCII::is_ascii($str)
12694
        ) {
12695
            return \ucwords($str);
12696
        }
12697
12698 8
        $words = self::str_to_words($str, $char_list);
12699 8
        $use_exceptions = $exceptions !== [];
12700
12701 8
        $words_str = '';
12702 8
        foreach ($words as &$word) {
12703 8
            if (!$word) {
12704 8
                continue;
12705
            }
12706
12707
            if (
12708 8
                !$use_exceptions
12709
                ||
12710 8
                !\in_array($word, $exceptions, true)
12711
            ) {
12712 8
                $words_str .= self::ucfirst($word, $encoding);
12713
            } else {
12714 8
                $words_str .= $word;
12715
            }
12716
        }
12717
12718 8
        return $words_str;
12719
    }
12720
12721
    /**
12722
     * Multi decode HTML entity + fix urlencoded-win1252-chars.
12723
     *
12724
     * EXAMPLE: <code>UTF8::urldecode('tes%20öäü%20\u00edtest+test'); // 'tes öäü ítest test'</code>
12725
     *
12726
     * e.g:
12727
     * 'test+test'                     => 'test test'
12728
     * 'D&#252;sseldorf'               => 'Düsseldorf'
12729
     * 'D%FCsseldorf'                  => 'Düsseldorf'
12730
     * 'D&#xFC;sseldorf'               => 'Düsseldorf'
12731
     * 'D%26%23xFC%3Bsseldorf'         => 'Düsseldorf'
12732
     * 'Düsseldorf'                   => 'Düsseldorf'
12733
     * 'D%C3%BCsseldorf'               => 'Düsseldorf'
12734
     * 'D%C3%83%C2%BCsseldorf'         => 'Düsseldorf'
12735
     * 'D%25C3%2583%25C2%25BCsseldorf' => 'Düsseldorf'
12736
     *
12737
     * @param string $str          <p>The input string.</p>
12738
     * @param bool   $multi_decode <p>Decode as often as possible.</p>
12739
     *
12740
     * @psalm-pure
12741
     *
12742
     * @return string
12743
     */
12744
    public static function urldecode(string $str, bool $multi_decode = true): string
12745
    {
12746 4
        if ($str === '') {
12747 3
            return '';
12748
        }
12749
12750 4
        $str = self::urldecode_unicode_helper($str);
12751
12752 4
        if ($multi_decode) {
12753
            do {
12754 3
                $str_compare = $str;
12755
12756
                /**
12757
                 * @psalm-suppress PossiblyInvalidArgument
12758
                 */
12759 3
                $str = \urldecode(
12760 3
                    self::html_entity_decode(
12761 3
                        self::to_utf8($str),
12762 3
                        \ENT_QUOTES | \ENT_HTML5
12763
                    )
12764
                );
12765 3
            } while ($str_compare !== $str);
12766
        } else {
12767
            /**
12768
             * @psalm-suppress PossiblyInvalidArgument
12769
             */
12770 1
            $str = \urldecode(
12771 1
                self::html_entity_decode(
12772 1
                    self::to_utf8($str),
12773 1
                    \ENT_QUOTES | \ENT_HTML5
12774
                )
12775
            );
12776
        }
12777
12778 4
        return self::fix_simple_utf8($str);
12779
    }
12780
12781
    /**
12782
     * Decodes a UTF-8 string to ISO-8859-1.
12783
     *
12784
     * EXAMPLE: <code>UTF8::encode('UTF-8', UTF8::utf8_decode('-ABC-中文空白-')); // '-ABC-????-'</code>
12785
     *
12786
     * @param string $str             <p>The input string.</p>
12787
     * @param bool   $keep_utf8_chars
12788
     *
12789
     * @psalm-pure
12790
     *
12791
     * @return string
12792
     */
12793
    public static function utf8_decode(string $str, bool $keep_utf8_chars = false): string
12794
    {
12795 14
        if ($str === '') {
12796 6
            return '';
12797
        }
12798
12799
        // save for later comparision
12800 14
        $str_backup = $str;
12801 14
        $len = \strlen($str);
12802
12803 14
        if (self::$ORD === null) {
12804
            self::$ORD = self::getData('ord');
12805
        }
12806
12807 14
        if (self::$CHR === null) {
12808
            self::$CHR = self::getData('chr');
12809
        }
12810
12811 14
        $no_char_found = '?';
12812 14
        for ($i = 0, $j = 0; $i < $len; ++$i, ++$j) {
12813 14
            switch ($str[$i] & "\xF0") {
12814 14
                case "\xC0":
12815 13
                case "\xD0":
12816 13
                    $c = (self::$ORD[$str[$i] & "\x1F"] << 6) | self::$ORD[$str[++$i] & "\x3F"];
12817 13
                    $str[$j] = $c < 256 ? self::$CHR[$c] : $no_char_found;
12818
12819 13
                    break;
12820
12821 13
                case "\xF0":
12822
                    ++$i;
12823
12824
                // no break
12825
12826 13
                case "\xE0":
12827 11
                    $str[$j] = $no_char_found;
12828 11
                    $i += 2;
12829
12830 11
                    break;
12831
12832
                default:
12833 12
                    $str[$j] = $str[$i];
12834
            }
12835
        }
12836
12837
        /** @var false|string $return - needed for PhpStan (stubs error) */
12838 14
        $return = \substr($str, 0, $j);
12839 14
        if ($return === false) {
12840
            $return = '';
12841
        }
12842
12843
        if (
12844 14
            $keep_utf8_chars
12845
            &&
12846 14
            (int) self::strlen($return) >= (int) self::strlen($str_backup)
12847
        ) {
12848 2
            return $str_backup;
12849
        }
12850
12851 14
        return $return;
12852
    }
12853
12854
    /**
12855
     * Encodes an ISO-8859-1 string to UTF-8.
12856
     *
12857
     * EXAMPLE: <code>UTF8::utf8_decode(UTF8::utf8_encode('-ABC-中文空白-')); // '-ABC-中文空白-'</code>
12858
     *
12859
     * @param string $str <p>The input string.</p>
12860
     *
12861
     * @psalm-pure
12862
     *
12863
     * @return string
12864
     */
12865
    public static function utf8_encode(string $str): string
12866
    {
12867 16
        if ($str === '') {
12868 14
            return '';
12869
        }
12870
12871
        /** @var false|string $str - the polyfill maybe return false */
12872 16
        $str = \utf8_encode($str);
0 ignored issues
show
Bug introduced by
It seems like $str can also be of type false; however, parameter $string of utf8_encode() does only seem to accept string, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

12872
        $str = \utf8_encode(/** @scrutinizer ignore-type */ $str);
Loading history...
12873
12874 16
        if ($str === false) {
12875
            return '';
12876
        }
12877
12878 16
        return $str;
12879
    }
12880
12881
    /**
12882
     * Returns an array with all utf8 whitespace characters.
12883
     *
12884
     * @see http://www.bogofilter.org/pipermail/bogofilter/2003-March/001889.html
12885
     *
12886
     * @psalm-pure
12887
     *
12888
     * @return string[]
12889
     *                  An array with all known whitespace characters as values and the type of whitespace as keys
12890
     *                  as defined in above URL
12891
     */
12892
    public static function whitespace_table(): array
12893
    {
12894 2
        return self::$WHITESPACE_TABLE;
12895
    }
12896
12897
    /**
12898
     * Limit the number of words in a string.
12899
     *
12900
     * EXAMPLE: <code>UTF8::words_limit('fòô bàř fòô', 2, ''); // 'fòô bàř'</code>
12901
     *
12902
     * @param string $str        <p>The input string.</p>
12903
     * @param int    $limit      <p>The limit of words as integer.</p>
12904
     * @param string $str_add_on <p>Replacement for the striped string.</p>
12905
     *
12906
     * @psalm-pure
12907
     *
12908
     * @return string
12909
     */
12910
    public static function words_limit(
12911
        string $str,
12912
        int $limit = 100,
12913
        string $str_add_on = '…'
12914
    ): string {
12915 2
        if ($str === '' || $limit < 1) {
12916 2
            return '';
12917
        }
12918
12919 2
        \preg_match('/^\\s*+(?:[^\\s]++\\s*+){1,' . $limit . '}/u', $str, $matches);
12920
12921
        if (
12922 2
            !isset($matches[0])
12923
            ||
12924 2
            \mb_strlen($str) === (int) \mb_strlen($matches[0])
12925
        ) {
12926 2
            return $str;
12927
        }
12928
12929 2
        return \rtrim($matches[0]) . $str_add_on;
12930
    }
12931
12932
    /**
12933
     * Wraps a string to a given number of characters
12934
     *
12935
     * EXAMPLE: <code>UTF8::wordwrap('Iñtërnâtiônàlizætiøn', 2, '<br>', true)); // 'Iñ<br>të<br>rn<br>ât<br>iô<br>nà<br>li<br>zæ<br>ti<br>øn'</code>
12936
     *
12937
     * @see http://php.net/manual/en/function.wordwrap.php
12938
     *
12939
     * @param string $str   <p>The input string.</p>
12940
     * @param int    $width [optional] <p>The column width.</p>
12941
     * @param string $break [optional] <p>The line is broken using the optional break parameter.</p>
12942
     * @param bool   $cut   [optional] <p>
12943
     *                      If the cut is set to true, the string is
12944
     *                      always wrapped at or before the specified width. So if you have
12945
     *                      a word that is larger than the given width, it is broken apart.
12946
     *                      </p>
12947
     *
12948
     * @psalm-pure
12949
     *
12950
     * @return string
12951
     *                <p>The given string wrapped at the specified column.</p>
12952
     */
12953
    public static function wordwrap(
12954
        string $str,
12955
        int $width = 75,
12956
        string $break = "\n",
12957
        bool $cut = false
12958
    ): string {
12959 12
        if ($str === '' || $break === '') {
12960 4
            return '';
12961
        }
12962
12963 10
        $str_split = \explode($break, $str);
12964
12965
        /** @var string[] $charsArray */
12966 10
        $charsArray = [];
12967 10
        $word_split = '';
12968 10
        foreach ($str_split as $i => $i_value) {
12969 10
            if ($i) {
12970 3
                $charsArray[] = $break;
12971 3
                $word_split .= '#';
12972
            }
12973
12974 10
            foreach (self::str_split($i_value) as $c) {
12975 10
                $charsArray[] = $c;
12976 10
                if ($c === ' ') {
12977 3
                    $word_split .= ' ';
12978
                } else {
12979 10
                    $word_split .= '?';
12980
                }
12981
            }
12982
        }
12983
12984 10
        $str_return = '';
12985 10
        $j = 0;
12986 10
        $b = -1;
12987 10
        $i = -1;
12988 10
        $word_split = \wordwrap($word_split, $width, '#', $cut);
12989
12990 10
        $max = \mb_strlen($word_split);
12991
        /** @noinspection PhpAssignmentInConditionInspection - is ok here */
12992 10
        while (($b = \mb_strpos($word_split, '#', $b + 1)) !== false) {
12993 8
            for (++$i; $i < $b; ++$i) {
12994 8
                if (isset($charsArray[$j])) {
12995 8
                    $str_return .= $charsArray[$j];
12996 8
                    unset($charsArray[$j]);
12997
                }
12998 8
                ++$j;
12999
13000
                // prevent endless loop, e.g. if there is a error in the "mb_*" polyfill
13001 8
                if ($i > $max) {
13002
                    break 2;
13003
                }
13004
            }
13005
13006
            if (
13007 8
                $break === $charsArray[$j]
13008
                ||
13009 8
                $charsArray[$j] === ' '
13010
            ) {
13011 5
                unset($charsArray[$j++]);
13012
            }
13013
13014 8
            $str_return .= $break;
13015
13016
            // prevent endless loop, e.g. if there is a error in the "mb_*" polyfill
13017 8
            if ($b > $max) {
13018
                break;
13019
            }
13020
        }
13021
13022 10
        return $str_return . \implode('', $charsArray);
13023
    }
13024
13025
    /**
13026
     * Line-Wrap the string after $limit, but split the string by "$delimiter" before ...
13027
     *    ... so that we wrap the per line.
13028
     *
13029
     * @param string      $str             <p>The input string.</p>
13030
     * @param int         $width           [optional] <p>The column width.</p>
13031
     * @param string      $break           [optional] <p>The line is broken using the optional break parameter.</p>
13032
     * @param bool        $cut             [optional] <p>
13033
     *                                     If the cut is set to true, the string is
13034
     *                                     always wrapped at or before the specified width. So if you have
13035
     *                                     a word that is larger than the given width, it is broken apart.
13036
     *                                     </p>
13037
     * @param bool        $add_final_break [optional] <p>
13038
     *                                     If this flag is true, then the method will add a $break at the end
13039
     *                                     of the result string.
13040
     *                                     </p>
13041
     * @param string|null $delimiter       [optional] <p>
13042
     *                                     You can change the default behavior, where we split the string by newline.
13043
     *                                     </p>
13044
     *
13045
     * @psalm-pure
13046
     *
13047
     * @return string
13048
     */
13049
    public static function wordwrap_per_line(
13050
        string $str,
13051
        int $width = 75,
13052
        string $break = "\n",
13053
        bool $cut = false,
13054
        bool $add_final_break = true,
13055
        string $delimiter = null
13056
    ): string {
13057 1
        if ($delimiter === null) {
13058 1
            $strings = \preg_split('/\\r\\n|\\r|\\n/', $str);
13059
        } else {
13060 1
            $strings = \explode($delimiter, $str);
13061
        }
13062
13063 1
        $string_helper_array = [];
13064 1
        if ($strings !== false) {
13065 1
            foreach ($strings as $value) {
13066 1
                $string_helper_array[] = self::wordwrap($value, $width, $break, $cut);
13067
            }
13068
        }
13069
13070 1
        if ($add_final_break) {
13071 1
            $final_break = $break;
13072
        } else {
13073 1
            $final_break = '';
13074
        }
13075
13076 1
        return \implode($delimiter ?? "\n", $string_helper_array) . $final_break;
13077
    }
13078
13079
    /**
13080
     * Returns an array of Unicode White Space characters.
13081
     *
13082
     * @psalm-pure
13083
     *
13084
     * @return string[]
13085
     *                  <p>An array with numeric code point as key and White Space Character as value.</p>
13086
     */
13087
    public static function ws(): array
13088
    {
13089 2
        return self::$WHITESPACE;
13090
    }
13091
13092
    /**
13093
     * Checks whether the passed string contains only byte sequences that are valid UTF-8 characters.
13094
     *
13095
     * EXAMPLE: <code>
13096
     * UTF8::is_utf8_string('Iñtërnâtiônàlizætiøn']); // true
13097
     * //
13098
     * UTF8::is_utf8_string("Iñtërnâtiônàlizætiøn\xA0\xA1"); // false
13099
     * </code>
13100
     *
13101
     * @see          http://hsivonen.iki.fi/php-utf8/
13102
     *
13103
     * @param string $str    <p>The string to be checked.</p>
13104
     * @param bool   $strict <p>Check also if the string is not UTF-16 or UTF-32.</p>
13105
     *
13106
     * @psalm-pure
13107
     *
13108
     * @return bool
13109
     *
13110
     * @noinspection ReturnTypeCanBeDeclaredInspection
13111
     */
13112
    private static function is_utf8_string(string $str, bool $strict = false)
13113
    {
13114 110
        if ($str === '') {
13115 15
            return true;
13116
        }
13117
13118 103
        if ($strict) {
13119 2
            $is_binary = self::is_binary($str, true);
13120
13121 2
            if ($is_binary && self::is_utf16($str, false) !== false) {
13122 2
                return false;
13123
            }
13124
13125
            if ($is_binary && self::is_utf32($str, false) !== false) {
13126
                return false;
13127
            }
13128
        }
13129
13130 103
        if (self::$SUPPORT['pcre_utf8']) {
13131
            // If even just the first character can be matched, when the /u
13132
            // modifier is used, then it's valid UTF-8. If the UTF-8 is somehow
13133
            // invalid, nothing at all will match, even if the string contains
13134
            // some valid sequences
13135 103
            return \preg_match('/^./us', $str) === 1;
13136
        }
13137
13138 2
        $mState = 0; // cached expected number of octets after the current octet
13139
        // until the beginning of the next UTF8 character sequence
13140 2
        $mUcs4 = 0; // cached Unicode character
13141 2
        $mBytes = 1; // cached expected number of octets in the current sequence
13142
13143 2
        if (self::$ORD === null) {
13144
            self::$ORD = self::getData('ord');
13145
        }
13146
13147 2
        $len = \strlen($str);
13148 2
        for ($i = 0; $i < $len; ++$i) {
13149 2
            $in = self::$ORD[$str[$i]];
13150
13151 2
            if ($mState === 0) {
13152
                // When mState is zero we expect either a US-ASCII character or a
13153
                // multi-octet sequence.
13154 2
                if ((0x80 & $in) === 0) {
13155
                    // US-ASCII, pass straight through.
13156 2
                    $mBytes = 1;
13157 2
                } elseif ((0xE0 & $in) === 0xC0) {
13158
                    // First octet of 2 octet sequence.
13159 2
                    $mUcs4 = $in;
13160 2
                    $mUcs4 = ($mUcs4 & 0x1F) << 6;
13161 2
                    $mState = 1;
13162 2
                    $mBytes = 2;
13163 2
                } elseif ((0xF0 & $in) === 0xE0) {
13164
                    // First octet of 3 octet sequence.
13165 2
                    $mUcs4 = $in;
13166 2
                    $mUcs4 = ($mUcs4 & 0x0F) << 12;
13167 2
                    $mState = 2;
13168 2
                    $mBytes = 3;
13169
                } elseif ((0xF8 & $in) === 0xF0) {
13170
                    // First octet of 4 octet sequence.
13171
                    $mUcs4 = $in;
13172
                    $mUcs4 = ($mUcs4 & 0x07) << 18;
13173
                    $mState = 3;
13174
                    $mBytes = 4;
13175
                } elseif ((0xFC & $in) === 0xF8) {
13176
                    /* First octet of 5 octet sequence.
13177
                     *
13178
                     * This is illegal because the encoded codepoint must be either
13179
                     * (a) not the shortest form or
13180
                     * (b) outside the Unicode range of 0-0x10FFFF.
13181
                     * Rather than trying to resynchronize, we will carry on until the end
13182
                     * of the sequence and let the later error handling code catch it.
13183
                     */
13184
                    $mUcs4 = $in;
13185
                    $mUcs4 = ($mUcs4 & 0x03) << 24;
13186
                    $mState = 4;
13187
                    $mBytes = 5;
13188
                } elseif ((0xFE & $in) === 0xFC) {
13189
                    // First octet of 6 octet sequence, see comments for 5 octet sequence.
13190
                    $mUcs4 = $in;
13191
                    $mUcs4 = ($mUcs4 & 1) << 30;
13192
                    $mState = 5;
13193
                    $mBytes = 6;
13194
                } else {
13195
                    // Current octet is neither in the US-ASCII range nor a legal first
13196
                    // octet of a multi-octet sequence.
13197 2
                    return false;
13198
                }
13199 2
            } elseif ((0xC0 & $in) === 0x80) {
13200
13201
                // When mState is non-zero, we expect a continuation of the multi-octet
13202
                // sequence
13203
13204
                // Legal continuation.
13205 2
                $shift = ($mState - 1) * 6;
13206 2
                $tmp = $in;
13207 2
                $tmp = ($tmp & 0x0000003F) << $shift;
13208 2
                $mUcs4 |= $tmp;
13209
                // Prefix: End of the multi-octet sequence. mUcs4 now contains the final
13210
                // Unicode code point to be output.
13211 2
                if (--$mState === 0) {
13212
                    // Check for illegal sequences and code points.
13213
                    //
13214
                    // From Unicode 3.1, non-shortest form is illegal
13215
                    if (
13216 2
                        ($mBytes === 2 && $mUcs4 < 0x0080)
13217
                        ||
13218 2
                        ($mBytes === 3 && $mUcs4 < 0x0800)
13219
                        ||
13220 2
                        ($mBytes === 4 && $mUcs4 < 0x10000)
13221
                        ||
13222 2
                        ($mBytes > 4)
13223
                        ||
13224
                        // From Unicode 3.2, surrogate characters are illegal.
13225 2
                        (($mUcs4 & 0xFFFFF800) === 0xD800)
13226
                        ||
13227
                        // Code points outside the Unicode range are illegal.
13228 2
                        ($mUcs4 > 0x10FFFF)
13229
                    ) {
13230
                        return false;
13231
                    }
13232
                    // initialize UTF8 cache
13233 2
                    $mState = 0;
13234 2
                    $mUcs4 = 0;
13235 2
                    $mBytes = 1;
13236
                }
13237
            } else {
13238
                // ((0xC0 & (*in) != 0x80) && (mState != 0))
13239
                // Incomplete multi-octet sequence.
13240
                return false;
13241
            }
13242
        }
13243
13244 2
        return $mState === 0;
13245
    }
13246
13247
    /**
13248
     * @param string $str
13249
     * @param bool   $use_lowercase      <p>Use uppercase by default, otherwise use lowercase.</p>
13250
     * @param bool   $use_full_case_fold <p>Convert not only common cases.</p>
13251
     *
13252
     * @psalm-pure
13253
     *
13254
     * @return string
13255
     *
13256
     * @noinspection ReturnTypeCanBeDeclaredInspection
13257
     */
13258
    private static function fixStrCaseHelper(
13259
        string $str,
13260
        bool $use_lowercase = false,
13261
        bool $use_full_case_fold = false
13262
    ) {
13263 33
        $upper = self::$COMMON_CASE_FOLD['upper'];
13264 33
        $lower = self::$COMMON_CASE_FOLD['lower'];
13265
13266 33
        if ($use_lowercase) {
13267 2
            $str = \str_replace(
13268 2
                $upper,
13269 2
                $lower,
13270 2
                $str
13271
            );
13272
        } else {
13273 31
            $str = \str_replace(
13274 31
                $lower,
13275 31
                $upper,
13276 31
                $str
13277
            );
13278
        }
13279
13280 33
        if ($use_full_case_fold) {
13281
            /**
13282
             * @psalm-suppress ImpureStaticVariable
13283
             *
13284
             * @var array<mixed>|null
13285
             */
13286 31
            static $FULL_CASE_FOLD = null;
13287 31
            if ($FULL_CASE_FOLD === null) {
13288 1
                $FULL_CASE_FOLD = self::getData('caseFolding_full');
13289
            }
13290
13291 31
            if ($use_lowercase) {
13292 2
                $str = \str_replace($FULL_CASE_FOLD[0], $FULL_CASE_FOLD[1], $str);
13293
            } else {
13294 29
                $str = \str_replace($FULL_CASE_FOLD[1], $FULL_CASE_FOLD[0], $str);
13295
            }
13296
        }
13297
13298 33
        return $str;
13299
    }
13300
13301
    /**
13302
     * get data from "/data/*.php"
13303
     *
13304
     * @param string $file
13305
     *
13306
     * @psalm-pure
13307
     *
13308
     * @return array
13309
     *
13310
     * @noinspection ReturnTypeCanBeDeclaredInspection
13311
     */
13312
    private static function getData(string $file)
13313
    {
13314
        /** @noinspection PhpIncludeInspection */
13315
        /** @noinspection UsingInclusionReturnValueInspection */
13316
        /** @psalm-suppress UnresolvableInclude */
13317 7
        return include __DIR__ . '/data/' . $file . '.php';
13318
    }
13319
13320
    /**
13321
     * @psalm-pure
13322
     *
13323
     * @return true|null
13324
     *
13325
     * @noinspection ReturnTypeCanBeDeclaredInspection
13326
     */
13327
    private static function initEmojiData()
13328
    {
13329 1
        if (self::$EMOJI_KEYS_CACHE === null) {
13330 1
            if (self::$EMOJI === null) {
13331 1
                self::$EMOJI = self::getData('emoji');
13332
            }
13333
13334
            /**
13335
             * @psalm-suppress ImpureFunctionCall - static sort function is used
13336
             */
13337 1
            \uksort(
13338 1
                self::$EMOJI,
13339
                static function (string $a, string $b): int {
13340 1
                    return \strlen($b) <=> \strlen($a);
13341 1
                }
13342
            );
13343
13344 1
            self::$EMOJI_KEYS_CACHE = \array_keys(self::$EMOJI);
13345 1
            self::$EMOJI_VALUES_CACHE = self::$EMOJI;
13346
13347 1
            foreach (self::$EMOJI_KEYS_CACHE as $key) {
13348 1
                $tmp_key = \crc32($key);
13349 1
                self::$EMOJI_KEYS_REVERSIBLE_CACHE[] = '_-_PORTABLE_UTF8_-_' . $tmp_key . '_-_' . \strrev((string) $tmp_key) . '_-_8FTU_ELBATROP_-_';
13350
            }
13351
13352 1
            return true;
13353
        }
13354
13355
        return null;
13356
    }
13357
13358
    /**
13359
     * Checks whether mbstring "overloaded" is active on the server.
13360
     *
13361
     * @psalm-pure
13362
     *
13363
     * @return bool
13364
     */
13365
    private static function mbstring_overloaded(): bool
13366
    {
13367
        /**
13368
         * INI directive 'mbstring.func_overload' is deprecated since PHP 7.2
13369
         */
13370
13371
        /** @noinspection PhpComposerExtensionStubsInspection */
13372
        /** @noinspection PhpUsageOfSilenceOperatorInspection */
13373
        /** @noinspection DeprecatedIniOptionsInspection */
13374
        return \defined('MB_OVERLOAD_STRING')
13375
               &&
13376
               ((int) @\ini_get('mbstring.func_overload') & \MB_OVERLOAD_STRING);
13377
    }
13378
13379
    /**
13380
     * @param array    $strings
13381
     * @param bool     $remove_empty_values
13382
     * @param int|null $remove_short_values
13383
     *
13384
     * @psalm-pure
13385
     *
13386
     * @return array
13387
     *
13388
     * @noinspection ReturnTypeCanBeDeclaredInspection
13389
     */
13390
    private static function reduce_string_array(
13391
        array $strings,
13392
        bool $remove_empty_values,
13393
        int $remove_short_values = null
13394
    ) {
13395
        // init
13396 2
        $return = [];
13397
13398 2
        foreach ($strings as &$str) {
13399
            if (
13400 2
                $remove_short_values !== null
13401
                &&
13402 2
                \mb_strlen($str) <= $remove_short_values
13403
            ) {
13404 2
                continue;
13405
            }
13406
13407
            if (
13408 2
                $remove_empty_values
13409
                &&
13410 2
                \trim($str) === ''
13411
            ) {
13412 2
                continue;
13413
            }
13414
13415 2
            $return[] = $str;
13416
        }
13417
13418 2
        return $return;
13419
    }
13420
13421
    /**
13422
     * rxClass
13423
     *
13424
     * @param string $s
13425
     * @param string $class
13426
     *
13427
     * @return string
13428
     *                    *
13429
     * @psalm-pure
13430
     */
13431
    private static function rxClass(string $s, string $class = '')
13432
    {
13433
        /**
13434
         * @psalm-suppress ImpureStaticVariable
13435
         *
13436
         * @var array<string,string>
13437
         */
13438 36
        static $RX_CLASS_CACHE = [];
13439
13440 36
        $cache_key = $s . '_' . $class;
13441
13442 36
        if (isset($RX_CLASS_CACHE[$cache_key])) {
13443 24
            return $RX_CLASS_CACHE[$cache_key];
13444
        }
13445
13446 16
        $class_array[] = $class;
0 ignored issues
show
Comprehensibility Best Practice introduced by
$class_array was never initialized. Although not strictly required by PHP, it is generally a good practice to add $class_array = array(); before regardless.
Loading history...
13447
13448
        /** @noinspection SuspiciousLoopInspection */
13449
        /** @noinspection AlterInForeachInspection */
13450 16
        foreach (self::str_split($s) as &$s) {
0 ignored issues
show
Bug introduced by
The expression self::str_split($s) cannot be used as a reference.

Let?s assume that you have the following foreach statement:

foreach ($array as &$itemValue) { }

$itemValue is assigned by reference. This is possible because the expression (in the example $array) can be used as a reference target.

However, if we were to replace $array with something different like the result of a function call as in

foreach (getArray() as &$itemValue) { }

then assigning by reference is not possible anymore as there is no target that could be modified.

Available Fixes

1. Do not assign by reference
foreach (getArray() as $itemValue) { }
2. Assign to a local variable first
$array = getArray();
foreach ($array as &$itemValue) {}
3. Return a reference
function &getArray() { $array = array(); return $array; }

foreach (getArray() as &$itemValue) { }
Loading history...
13451 15
            if ($s === '-') {
13452
                $class_array[0] = '-' . $class_array[0];
13453 15
            } elseif (!isset($s[2])) {
13454 15
                $class_array[0] .= \preg_quote($s, '/');
13455 1
            } elseif (self::strlen($s) === 1) {
13456 1
                $class_array[0] .= $s;
13457
            } else {
13458 15
                $class_array[] = $s;
13459
            }
13460
        }
13461
13462 16
        if ($class_array[0]) {
13463 16
            $class_array[0] = '[' . $class_array[0] . ']';
13464
        }
13465
13466 16
        if (\count($class_array) === 1) {
13467 16
            $return = $class_array[0];
13468
        } else {
13469
            $return = '(?:' . \implode('|', $class_array) . ')';
13470
        }
13471
13472 16
        $RX_CLASS_CACHE[$cache_key] = $return;
13473
13474 16
        return $return;
13475
    }
13476
13477
    /**
13478
     * Personal names such as "Marcus Aurelius" are sometimes typed incorrectly using lowercase ("marcus aurelius").
13479
     *
13480
     * @param string $names
13481
     * @param string $delimiter
13482
     * @param string $encoding
13483
     *
13484
     * @psalm-pure
13485
     *
13486
     * @return string
13487
     *
13488
     * @noinspection ReturnTypeCanBeDeclaredInspection
13489
     */
13490
    private static function str_capitalize_name_helper(
13491
        string $names,
13492
        string $delimiter,
13493
        string $encoding = 'UTF-8'
13494
    ) {
13495
        // init
13496 1
        $name_helper_array = \explode($delimiter, $names);
13497 1
        if ($name_helper_array === false) {
13498
            return '';
13499
        }
13500
13501
        $special_cases = [
13502 1
            'names' => [
13503
                'ab',
13504
                'af',
13505
                'al',
13506
                'and',
13507
                'ap',
13508
                'bint',
13509
                'binte',
13510
                'da',
13511
                'de',
13512
                'del',
13513
                'den',
13514
                'der',
13515
                'di',
13516
                'dit',
13517
                'ibn',
13518
                'la',
13519
                'mac',
13520
                'nic',
13521
                'of',
13522
                'ter',
13523
                'the',
13524
                'und',
13525
                'van',
13526
                'von',
13527
                'y',
13528
                'zu',
13529
            ],
13530
            'prefixes' => [
13531
                'al-',
13532
                "d'",
13533
                'ff',
13534
                "l'",
13535
                'mac',
13536
                'mc',
13537
                'nic',
13538
            ],
13539
        ];
13540
13541 1
        foreach ($name_helper_array as &$name) {
13542 1
            if (\in_array($name, $special_cases['names'], true)) {
13543 1
                continue;
13544
            }
13545
13546 1
            $continue = false;
13547
13548 1
            if ($delimiter === '-') {
13549 1
                foreach ((array) $special_cases['names'] as &$beginning) {
13550 1
                    if (\strncmp($name, $beginning, \strlen($beginning)) === 0) {
13551 1
                        $continue = true;
13552
13553 1
                        break;
13554
                    }
13555
                }
13556 1
                unset($beginning);
13557
            }
13558
13559 1
            foreach ((array) $special_cases['prefixes'] as &$beginning) {
13560 1
                if (\strncmp($name, $beginning, \strlen($beginning)) === 0) {
13561 1
                    $continue = true;
13562
13563 1
                    break;
13564
                }
13565
            }
13566 1
            unset($beginning);
13567
13568 1
            if ($continue) {
13569 1
                continue;
13570
            }
13571
13572 1
            $name = self::ucfirst($name, $encoding);
13573
        }
13574
13575 1
        return \implode($delimiter, $name_helper_array);
13576
    }
13577
13578
    /**
13579
     * Generic case-sensitive transformation for collation matching.
13580
     *
13581
     * @param string $str <p>The input string</p>
13582
     *
13583
     * @psalm-pure
13584
     *
13585
     * @return string|null
13586
     *
13587
     * @noinspection ReturnTypeCanBeDeclaredInspection
13588
     */
13589
    private static function strtonatfold(string $str)
13590
    {
13591 6
        $str = \Normalizer::normalize($str, \Normalizer::NFD);
13592 6
        if ($str === false) {
13593 2
            return '';
13594
        }
13595
13596 6
        return \preg_replace(
13597 6
            '/\p{Mn}+/u',
13598 6
            '',
13599 6
            $str
13600
        );
13601
    }
13602
13603
    /**
13604
     * @param int|string $input
13605
     *
13606
     * @psalm-pure
13607
     *
13608
     * @return string
13609
     *
13610
     * @noinspection ReturnTypeCanBeDeclaredInspection
13611
     */
13612
    private static function to_utf8_convert_helper($input)
13613
    {
13614
        // init
13615 30
        $buf = '';
13616
13617 30
        if (self::$ORD === null) {
13618
            self::$ORD = self::getData('ord');
13619
        }
13620
13621 30
        if (self::$CHR === null) {
13622
            self::$CHR = self::getData('chr');
13623
        }
13624
13625 30
        if (self::$WIN1252_TO_UTF8 === null) {
13626 1
            self::$WIN1252_TO_UTF8 = self::getData('win1252_to_utf8');
13627
        }
13628
13629 30
        $ordC1 = self::$ORD[$input];
13630 30
        if (isset(self::$WIN1252_TO_UTF8[$ordC1])) { // found in Windows-1252 special cases
13631 30
            $buf .= self::$WIN1252_TO_UTF8[$ordC1];
13632
        } else {
13633
            /** @noinspection OffsetOperationsInspection */
13634 1
            $cc1 = self::$CHR[$ordC1 / 64] | "\xC0";
13635 1
            $cc2 = ((string) $input & "\x3F") | "\x80";
0 ignored issues
show
Bug introduced by
Are you sure you want to use the bitwise & or did you mean &&?
Loading history...
Bug introduced by
Are you sure you want to use the bitwise | or did you mean ||?
Loading history...
13636 1
            $buf .= $cc1 . $cc2;
13637
        }
13638
13639 30
        return $buf;
13640
    }
13641
13642
    /**
13643
     * @param string $str
13644
     *
13645
     * @psalm-pure
13646
     *
13647
     * @return string
13648
     *
13649
     * @noinspection ReturnTypeCanBeDeclaredInspection
13650
     */
13651
    private static function urldecode_unicode_helper(string $str)
13652
    {
13653 9
        if (\strpos($str, '%u') === false) {
13654 9
            return $str;
13655
        }
13656
13657 7
        $pattern = '/%u([0-9a-fA-F]{3,4})/';
13658 7
        if (\preg_match($pattern, $str)) {
13659 7
            $str = (string) \preg_replace($pattern, '&#x\\1;', $str);
13660
        }
13661
13662 7
        return $str;
13663
    }
13664
}
13665