Passed
Push — master ( dbf068...b6d151 )
by Lars
23:32 queued 20:33
created

UTF8   F

Complexity

Total Complexity 1752

Size/Duplication

Total Lines 13711
Duplicated Lines 0 %

Test Coverage

Coverage 79.81%

Importance

Changes 109
Bugs 53 Features 5
Metric Value
eloc 4232
dl 0
loc 13711
ccs 2968
cts 3719
cp 0.7981
rs 0.8
c 109
b 53
f 5
wmc 1752

273 Methods

Rating   Name   Duplication   Size   Complexity  
A str_iends_with() 0 11 3
A str_iends_with_any() 0 13 4
A is_serialized() 0 11 3
A regex_replace() 0 20 3
A encode_mimeheader() 0 26 5
F extract_text() 0 175 34
B chr_to_decimal() 0 38 8
A add_bom_to_string() 0 7 2
A ctype_loaded() 0 3 1
D chr() 0 107 19
A chunk_split() 0 3 1
A css_identifier() 0 55 6
A css_stripe_media_queries() 0 6 1
A clean() 0 47 6
A __construct() 0 2 1
A codepoints() 0 36 5
A chr_map() 0 5 1
A cleanup() 0 24 2
A char_at() 0 7 2
A chars() 0 4 1
A chr_size_list() 0 17 3
A checkForSupport() 0 46 4
A collapse_whitespace() 0 7 2
A access() 0 11 4
A callback() 0 3 1
A binary_to_str() 0 12 3
A bom() 0 3 1
A array_change_key_case() 0 23 5
A count_chars() 0 11 1
B between() 0 48 8
A emoji_decode() 0 21 3
A decode_mimeheader() 0 8 3
A emoji_encode() 0 21 3
A decimal_to_chr() 0 5 1
F encode() 0 144 37
A chr_to_hex() 0 11 3
A emoji_from_country_code() 0 17 3
A get_unique_string() 0 21 3
A file_has_bom() 0 8 2
A filter_input() 0 16 3
A is_bom() 0 10 3
A is_hexadecimal() 0 7 2
A has_uppercase() 0 7 2
A is_utf8() 0 13 4
A html_escape() 0 6 1
B get_file_type() 0 60 7
D is_utf16() 0 76 18
C filter() 0 59 14
A is_html() 0 14 2
A is_alpha() 0 7 2
B get_random_string() 0 54 10
A fix_utf8() 0 30 4
A first_char() 0 14 4
A is_uppercase() 0 7 2
A is_ascii() 0 3 1
A is_blank() 0 7 2
D getCharDirection() 0 104 117
A htmlspecialchars() 0 15 3
A filter_var_array() 0 15 2
A has_whitespace() 0 7 2
B is_binary() 0 39 10
A intlChar_loaded() 0 3 1
A lcfirst() 0 44 5
B is_url() 0 40 7
A finfo_loaded() 0 3 1
A fits_inside() 0 3 1
A is_binary_file() 0 16 4
A intl_loaded() 0 3 1
A html_stripe_empty_tags() 0 6 1
A json_loaded() 0 3 1
A is_lowercase() 0 7 2
A iconv_loaded() 0 3 1
A lcwords() 0 34 6
A filter_var() 0 15 2
A is_empty() 0 3 1
D is_utf32() 0 76 18
A is_alphanumeric() 0 7 2
A json_decode() 0 17 3
A fix_simple_utf8() 0 32 5
B is_json() 0 26 8
A is_printable() 0 3 1
A int_to_hex() 0 7 2
A has_lowercase() 0 7 2
A json_encode() 0 13 3
A is_base64() 0 17 5
A hex_to_int() 0 14 3
A hex_to_chr() 0 4 1
A htmlentities() 0 28 3
A filter_input_array() 0 15 3
A getSupportInfo() 0 13 3
A is_punctuation() 0 3 1
C html_entity_decode() 0 58 13
C file_get_contents() 0 60 12
B html_encode() 0 54 11
A str_substr_after_first_separator() 0 28 6
A max() 0 14 3
B str_camelize() 0 74 10
A parse_str() 0 18 4
A str_contains() 0 15 3
B str_to_lines() 0 28 8
A substr_in_byte() 0 18 6
A stripos_in_byte() 0 12 4
A strnatcasecmp() 0 5 1
A substr_left() 0 15 4
D strlen() 0 104 19
A str_isubstr_last() 0 25 4
A to_int() 0 7 2
A str_replace_beginning() 0 25 6
A remove_left() 0 28 4
C stripos() 0 67 14
A str_offset_exists() 0 10 2
D strrchr() 0 104 20
A to_filename() 0 9 1
A max_chr_width() 0 8 2
C utf8_decode() 0 59 13
A ltrim() 0 26 5
A levenshtein() 0 10 1
A remove_html() 0 3 1
B str_longest_common_suffix() 0 54 10
C wordwrap() 0 70 14
B ucfirst() 0 57 7
A str_pad_both() 0 12 1
A str_substr_last() 0 33 6
A mbstring_loaded() 0 3 1
A str_limit() 0 26 6
A string() 0 18 4
B str_obfuscate() 0 47 8
D normalize_encoding() 0 147 16
B rxClass() 0 44 8
A str_ensure_right() 0 13 4
B str_titleize_for_humans() 0 170 7
A normalize_whitespace() 0 11 1
A str_starts_with() 0 16 4
A str_humanize() 0 15 1
C substr_count_in_byte() 0 55 15
A strlen_in_byte() 0 12 3
A str_ireplace_ending() 0 21 6
A rtrim() 0 26 5
C str_longest_common_substring() 0 76 16
A titlecase() 0 35 5
A getData() 0 6 1
B strtolower() 0 58 10
A urldecode() 0 35 4
A str_isubstr_before_first_separator() 0 19 5
B strrev() 0 43 10
A replace_all() 0 11 2
D substr_replace() 0 123 27
A strstr_in_byte() 0 15 4
A str_matches_pattern() 0 3 1
C str_titleize() 0 69 12
A str_split_array() 0 17 2
A ws() 0 3 1
A str_replace_first() 0 20 2
A str_pad_right() 0 12 1
B ucwords() 0 51 9
A to_boolean() 0 35 5
C stristr() 0 79 17
A strncasecmp() 0 10 1
B strwidth() 0 43 8
A trim() 0 26 5
A str_upper_camelize() 0 8 1
A substr_compare() 0 33 6
D substr_count() 0 73 17
A strnatcmp() 0 9 2
D str_pad() 0 146 16
A urldecode_unicode_helper() 0 12 3
A str_ireplace() 0 31 5
A str_replace_ending() 0 24 6
A string_has_bom() 0 9 3
B strtr() 0 41 11
B str_contains_all() 0 22 9
A normalize_line_ending() 0 3 1
A str_isubstr_after_last_separator() 0 26 5
D range() 0 71 23
B strspn() 0 30 10
A strcasecmp() 0 21 1
A rawurldecode() 0 35 4
B str_capitalize_name_helper() 0 86 10
A utf8_encode() 0 14 3
A normalize_msword() 0 3 1
C str_detect_encoding() 0 111 14
A spaces_to_tabs() 0 11 3
A str_istarts_with() 0 11 3
A str_replace() 0 18 1
A substr_iright() 0 15 4
A replace() 0 11 2
A to_iso8859() 0 16 4
A words_limit() 0 20 5
A strip_tags() 0 18 4
A pcre_utf8_support() 0 4 1
A str_isubstr_before_last_separator() 0 24 6
D str_truncate_safe() 0 86 18
A substr_right() 0 31 6
D str_split() 0 134 29
A str_ends_with_any() 0 13 4
A strrpos_in_byte() 0 12 4
F strrpos() 0 136 31
A remove_right() 0 25 4
A remove_html_breaks() 0 3 1
A showSupport() 0 16 3
A remove_invisible_characters() 0 11 1
A single_chr_html_encode() 0 18 4
A str_replace_last() 0 19 2
A str_substr_before_last_separator() 0 31 6
B strtocasefold() 0 33 7
A tabs_to_spaces() 0 11 3
B str_truncate() 0 43 7
F strripos() 0 113 25
A strpos_in_byte() 0 12 4
A str_ends_with() 0 16 4
A to_ascii() 0 6 1
A reduce_string_array() 0 29 6
B str_longest_common_prefix() 0 51 8
A mbstring_overloaded() 0 12 2
A str_pad_left() 0 12 1
A str_substr_first() 0 33 6
A remove_bom() 0 22 5
A str_repeat() 0 5 1
A strpbrk() 0 11 4
D to_utf8_string() 0 110 33
A whitespace_table() 0 3 1
A substr_count_simple() 0 31 6
A str_substr_after_last_separator() 0 31 6
A str_isubstr_after_first_separator() 0 26 5
B str_snakeize() 0 57 6
A str_sort() 0 16 3
A to_utf8() 0 15 3
A str_underscored() 0 3 1
A str_offset_get() 0 14 4
A strip_whitespace() 0 7 2
A str_capitalize_name() 0 8 1
B str_limit_after_word() 0 51 11
A swapCase() 0 17 4
A substr_ileft() 0 15 4
A str_dasherize() 0 3 1
A str_ensure_left() 0 11 3
C ord() 0 68 16
B to_string() 0 27 8
A strtonatfold() 0 11 2
C strcspn() 0 48 12
A fixStrCaseHelper() 0 41 5
C str_split_pattern() 0 54 13
D strstr() 0 107 21
A str_isubstr_first() 0 25 4
A str_last_char() 0 16 4
A str_ireplace_beginning() 0 22 6
A str_substr_before_first_separator() 0 32 6
F substr() 0 137 31
A wordwrap_per_line() 0 28 5
A str_surround() 0 3 1
A strncmp() 0 19 4
A str_insert() 0 28 4
A replace_diamond_question_mark() 0 43 5
D is_utf8_string() 0 133 28
B str_delimit() 0 31 8
A to_utf8_convert_helper() 0 28 5
B strtoupper() 0 58 10
A min() 0 14 3
A str_starts_with_any() 0 17 5
B strrichr() 0 54 11
A str_istarts_with_any() 0 17 5
B str_contains_any() 0 28 8
A initEmojiData() 0 29 4
A remove_duplicates() 0 16 4
B str_slice() 0 33 10
F strpos() 0 151 33
A str_shuffle() 0 33 6
A strcmp() 0 11 2
A str_word_count() 0 23 5
A strripos_in_byte() 0 12 4
A str_to_binary() 0 10 2
A symfony_polyfill_used() 0 16 5
B str_to_words() 0 36 8

How to fix   Complexity   

Complex Class

Complex classes like UTF8 often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.

Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.

While breaking up the class, it is a good idea to analyze how other classes use UTF8, and based on these observations, apply Extract Interface, too.

1
<?php
2
3
declare(strict_types=1);
4
5
namespace voku\helper;
6
7
/**
8
 * @psalm-immutable
9
 */
10
final class UTF8
11
{
12
    /**
13
     * Bom => Byte-Length
14
     *
15
     * INFO: https://en.wikipedia.org/wiki/Byte_order_mark
16
     *
17
     * @var array<string, int>
18
     */
19
    private static $BOM = [
20
        "\xef\xbb\xbf"     => 3, // UTF-8 BOM
21
        ''              => 6, // UTF-8 BOM as "WINDOWS-1252" (one char has [maybe] more then one byte ...)
22
        "\x00\x00\xfe\xff" => 4, // UTF-32 (BE) BOM
23
        '  þÿ'             => 6, // UTF-32 (BE) BOM as "WINDOWS-1252"
24
        "\xff\xfe\x00\x00" => 4, // UTF-32 (LE) BOM
25
        'ÿþ  '             => 6, // UTF-32 (LE) BOM as "WINDOWS-1252"
26
        "\xfe\xff"         => 2, // UTF-16 (BE) BOM
27
        'þÿ'               => 4, // UTF-16 (BE) BOM as "WINDOWS-1252"
28
        "\xff\xfe"         => 2, // UTF-16 (LE) BOM
29
        'ÿþ'               => 4, // UTF-16 (LE) BOM as "WINDOWS-1252"
30
    ];
31
32
    /**
33
     * Numeric code point => UTF-8 Character
34
     *
35
     * url: http://www.w3schools.com/charsets/ref_utf_punctuation.asp
36
     *
37
     * @var array<int, string>
38
     */
39
    private static $WHITESPACE = [
40
        // NULL Byte
41
        0 => "\x0",
42
        // Tab
43
        9 => "\x9",
44
        // New Line
45
        10 => "\xa",
46
        // Vertical Tab
47
        11 => "\xb",
48
        // Carriage Return
49
        13 => "\xd",
50
        // Ordinary Space
51
        32 => "\x20",
52
        // NO-BREAK SPACE
53
        160 => "\xc2\xa0",
54
        // OGHAM SPACE MARK
55
        5760 => "\xe1\x9a\x80",
56
        // MONGOLIAN VOWEL SEPARATOR
57
        6158 => "\xe1\xa0\x8e",
58
        // EN QUAD
59
        8192 => "\xe2\x80\x80",
60
        // EM QUAD
61
        8193 => "\xe2\x80\x81",
62
        // EN SPACE
63
        8194 => "\xe2\x80\x82",
64
        // EM SPACE
65
        8195 => "\xe2\x80\x83",
66
        // THREE-PER-EM SPACE
67
        8196 => "\xe2\x80\x84",
68
        // FOUR-PER-EM SPACE
69
        8197 => "\xe2\x80\x85",
70
        // SIX-PER-EM SPACE
71
        8198 => "\xe2\x80\x86",
72
        // FIGURE SPACE
73
        8199 => "\xe2\x80\x87",
74
        // PUNCTUATION SPACE
75
        8200 => "\xe2\x80\x88",
76
        // THIN SPACE
77
        8201 => "\xe2\x80\x89",
78
        // HAIR SPACE
79
        8202 => "\xe2\x80\x8a",
80
        // LINE SEPARATOR
81
        8232 => "\xe2\x80\xa8",
82
        // PARAGRAPH SEPARATOR
83
        8233 => "\xe2\x80\xa9",
84
        // NARROW NO-BREAK SPACE
85
        8239 => "\xe2\x80\xaf",
86
        // MEDIUM MATHEMATICAL SPACE
87
        8287 => "\xe2\x81\x9f",
88
        // HALFWIDTH HANGUL FILLER
89
        65440 => "\xef\xbe\xa0",
90
        // IDEOGRAPHIC SPACE
91
        12288 => "\xe3\x80\x80",
92
    ];
93
94
    /**
95
     * @var array<string, string>
96
     */
97
    private static $WHITESPACE_TABLE = [
98
        'SPACE'                     => "\x20",
99
        'NO-BREAK SPACE'            => "\xc2\xa0",
100
        'OGHAM SPACE MARK'          => "\xe1\x9a\x80",
101
        'EN QUAD'                   => "\xe2\x80\x80",
102
        'EM QUAD'                   => "\xe2\x80\x81",
103
        'EN SPACE'                  => "\xe2\x80\x82",
104
        'EM SPACE'                  => "\xe2\x80\x83",
105
        'THREE-PER-EM SPACE'        => "\xe2\x80\x84",
106
        'FOUR-PER-EM SPACE'         => "\xe2\x80\x85",
107
        'SIX-PER-EM SPACE'          => "\xe2\x80\x86",
108
        'FIGURE SPACE'              => "\xe2\x80\x87",
109
        'PUNCTUATION SPACE'         => "\xe2\x80\x88",
110
        'THIN SPACE'                => "\xe2\x80\x89",
111
        'HAIR SPACE'                => "\xe2\x80\x8a",
112
        'LINE SEPARATOR'            => "\xe2\x80\xa8",
113
        'PARAGRAPH SEPARATOR'       => "\xe2\x80\xa9",
114
        'ZERO WIDTH SPACE'          => "\xe2\x80\x8b",
115
        'NARROW NO-BREAK SPACE'     => "\xe2\x80\xaf",
116
        'MEDIUM MATHEMATICAL SPACE' => "\xe2\x81\x9f",
117
        'IDEOGRAPHIC SPACE'         => "\xe3\x80\x80",
118
        'HALFWIDTH HANGUL FILLER'   => "\xef\xbe\xa0",
119
    ];
120
121
    /**
122
     * @var array
123
     *
124
     * @phpstan-var array{upper: string[], lower: string[]}
125
     */
126
    private static $COMMON_CASE_FOLD = [
127
        'upper' => [
128
            'µ',
129
            'ſ',
130
            "\xCD\x85",
131
            'ς',
132
            'ẞ',
133
            "\xCF\x90",
134
            "\xCF\x91",
135
            "\xCF\x95",
136
            "\xCF\x96",
137
            "\xCF\xB0",
138
            "\xCF\xB1",
139
            "\xCF\xB5",
140
            "\xE1\xBA\x9B",
141
            "\xE1\xBE\xBE",
142
        ],
143
        'lower' => [
144
            'μ',
145
            's',
146
            'ι',
147
            'σ',
148
            'ß',
149
            'β',
150
            'θ',
151
            'φ',
152
            'π',
153
            'κ',
154
            'ρ',
155
            'ε',
156
            "\xE1\xB9\xA1",
157
            'ι',
158
        ],
159
    ];
160
161
    /**
162
     * @var array
163
     *
164
     * @phpstan-var array<string, mixed>
165
     */
166
    private static $SUPPORT = [];
167
168
    /**
169
     * @var string[]|null
170
     *
171
     * @phpstan-var array<string, string>|null
172
     */
173
    private static $BROKEN_UTF8_FIX;
174
175
    /**
176
     * @var string[]|null
177
     *
178
     * @phpstan-var array<int, string>|null
179
     */
180
    private static $WIN1252_TO_UTF8;
181
182
    /**
183
     * @var string[]|null
184
     *
185
     * @phpstan-var array<int ,string>|null
186
     */
187
    private static $INTL_TRANSLITERATOR_LIST;
188
189
    /**
190
     * @var string[]|null
191
     *
192
     * @phpstan-var array<string>|null
193
     */
194
    private static $ENCODINGS;
195
196
    /**
197
     * @var int[]|null
198
     *
199
     * @phpstan-var array<string ,int>|null
200
     */
201
    private static $ORD;
202
203
    /**
204
     * @var string[]|null
205
     *
206
     * @phpstan-var array<string, string>|null
207
     */
208
    private static $EMOJI;
209
210
    /**
211
     * @var string[]|null
212
     *
213
     * @phpstan-var array<string>|null
214
     */
215
    private static $EMOJI_VALUES_CACHE;
216
217
    /**
218
     * @var string[]|null
219
     *
220
     * @phpstan-var array<string>|null
221
     */
222
    private static $EMOJI_KEYS_CACHE;
223
224
    /**
225
     * @var string[]|null
226
     *
227
     * @phpstan-var array<string>|null
228
     */
229
    private static $EMOJI_KEYS_REVERSIBLE_CACHE;
230
231
    /**
232
     * @var string[]|null
233
     *
234
     * @phpstan-var array<int, string>|null
235
     */
236
    private static $CHR;
237
238
    /**
239
     * __construct()
240
     */
241 34
    public function __construct()
242
    {
243 34
    }
244
245
    /**
246
     * Return the character at the specified position: $str[1] like functionality.
247
     *
248
     * EXAMPLE: <code>UTF8::access('fòô', 1); // 'ò'</code>
249
     *
250
     * @param string $str      <p>A UTF-8 string.</p>
251
     * @param int    $pos      <p>The position of character to return.</p>
252
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
253
     *
254
     * @psalm-pure
255
     *
256
     * @return string
257
     *                <p>Single multi-byte character.</p>
258
     */
259 3
    public static function access(string $str, int $pos, string $encoding = 'UTF-8'): string
260
    {
261 3
        if ($str === '' || $pos < 0) {
262 2
            return '';
263
        }
264
265 3
        if ($encoding === 'UTF-8') {
266 3
            return (string) \mb_substr($str, $pos, 1);
267
        }
268
269
        return (string) self::substr($str, $pos, 1, $encoding);
270
    }
271
272
    /**
273
     * Prepends UTF-8 BOM character to the string and returns the whole string.
274
     *
275
     * INFO: If BOM already existed there, the Input string is returned.
276
     *
277
     * EXAMPLE: <code>UTF8::add_bom_to_string('fòô'); // "\xEF\xBB\xBF" . 'fòô'</code>
278
     *
279
     * @param string $str <p>The input string.</p>
280
     *
281
     * @psalm-pure
282
     *
283
     * @return string
284
     *                <p>The output string that contains BOM.</p>
285
     */
286 2
    public static function add_bom_to_string(string $str): string
287
    {
288 2
        if (!self::string_has_bom($str)) {
289 2
            $str = self::bom() . $str;
290
        }
291
292 2
        return $str;
293
    }
294
295
    /**
296
     * Changes all keys in an array.
297
     *
298
     * @param array<string, mixed> $array    <p>The array to work on</p>
299
     * @param int                  $case     [optional] <p> Either <strong>CASE_UPPER</strong><br>
300
     *                                       or <strong>CASE_LOWER</strong> (default)</p>
301
     * @param string               $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
302
     *
303
     * @psalm-pure
304
     *
305
     * @return string[]
306
     *                  <p>An array with its keys lower- or uppercased.</p>
307
     */
308 2
    public static function array_change_key_case(
309
        array $array,
310
        int $case = \CASE_LOWER,
311
        string $encoding = 'UTF-8'
312
    ): array {
313
        if (
314 2
            $case !== \CASE_LOWER
315
            &&
316 2
            $case !== \CASE_UPPER
317
        ) {
318
            $case = \CASE_LOWER;
319
        }
320
321 2
        $return = [];
322 2
        foreach ($array as $key => &$value) {
323 2
            $key = $case === \CASE_LOWER
324 2
                ? self::strtolower($key, $encoding)
325 2
                : self::strtoupper($key, $encoding);
326
327 2
            $return[$key] = $value;
328
        }
329
330 2
        return $return;
331
    }
332
333
    /**
334
     * Returns the substring between $start and $end, if found, or an empty
335
     * string. An optional offset may be supplied from which to begin the
336
     * search for the start string.
337
     *
338
     * @param string $str
339
     * @param string $start    <p>Delimiter marking the start of the substring.</p>
340
     * @param string $end      <p>Delimiter marking the end of the substring.</p>
341
     * @param int    $offset   [optional] <p>Index from which to begin the search. Default: 0</p>
342
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
343
     *
344
     * @psalm-pure
345
     *
346
     * @return string
347
     */
348 16
    public static function between(
349
        string $str,
350
        string $start,
351
        string $end,
352
        int $offset = 0,
353
        string $encoding = 'UTF-8'
354
    ): string {
355 16
        if ($encoding === 'UTF-8') {
356 8
            $start_position = \mb_strpos($str, $start, $offset);
357 8
            if ($start_position === false) {
358 1
                return '';
359
            }
360
361 7
            $substr_index = $start_position + (int) \mb_strlen($start);
362 7
            $end_position = \mb_strpos($str, $end, $substr_index);
363
            if (
364 7
                $end_position === false
365
                ||
366 7
                $end_position === $substr_index
367
            ) {
368 2
                return '';
369
            }
370
371 5
            return (string) \mb_substr($str, $substr_index, $end_position - $substr_index);
372
        }
373
374 8
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
375
376 8
        $start_position = self::strpos($str, $start, $offset, $encoding);
377 8
        if ($start_position === false) {
378 1
            return '';
379
        }
380
381 7
        $substr_index = $start_position + (int) self::strlen($start, $encoding);
382 7
        $end_position = self::strpos($str, $end, $substr_index, $encoding);
383
        if (
384 7
            $end_position === false
385
            ||
386 7
            $end_position === $substr_index
387
        ) {
388 2
            return '';
389
        }
390
391 5
        return (string) self::substr(
392 5
            $str,
393 5
            $substr_index,
394 5
            $end_position - $substr_index,
395 5
            $encoding
396
        );
397
    }
398
399
    /**
400
     * Convert binary into a string.
401
     *
402
     * INFO: opposite to UTF8::str_to_binary()
403
     *
404
     * EXAMPLE: <code>UTF8::binary_to_str('11110000100111111001100010000011'); // '😃'</code>
405
     *
406
     * @param string $bin 1|0
407
     *
408
     * @psalm-pure
409
     *
410
     * @return string
411
     */
412 2
    public static function binary_to_str($bin): string
413
    {
414 2
        if (!isset($bin[0])) {
415
            return '';
416
        }
417
418 2
        $convert = \base_convert($bin, 2, 16);
419 2
        if ($convert === '0') {
420 1
            return '';
421
        }
422
423 2
        return \pack('H*', $convert);
424
    }
425
426
    /**
427
     * Returns the UTF-8 Byte Order Mark Character.
428
     *
429
     * INFO: take a look at UTF8::$bom for e.g. UTF-16 and UTF-32 BOM values
430
     *
431
     * EXAMPLE: <code>UTF8::bom(); // "\xEF\xBB\xBF"</code>
432
     *
433
     * @psalm-pure
434
     *
435
     * @return string
436
     *                <p>UTF-8 Byte Order Mark.</p>
437
     */
438 4
    public static function bom(): string
439
    {
440 4
        return "\xef\xbb\xbf";
441
    }
442
443
    /**
444
     * @alias of UTF8::chr_map()
445
     *
446
     * @param callable $callback
447
     * @param string   $str
448
     *
449
     * @psalm-pure
450
     *
451
     * @return string[]
452
     *
453
     * @see   UTF8::chr_map()
454
     */
455 2
    public static function callback($callback, string $str): array
456
    {
457 2
        return self::chr_map($callback, $str);
458
    }
459
460
    /**
461
     * Returns the character at $index, with indexes starting at 0.
462
     *
463
     * @param string $str      <p>The input string.</p>
464
     * @param int    $index    <p>Position of the character.</p>
465
     * @param string $encoding [optional] <p>Default is UTF-8</p>
466
     *
467
     * @psalm-pure
468
     *
469
     * @return string
470
     *                <p>The character at $index.</p>
471
     */
472 9
    public static function char_at(string $str, int $index, string $encoding = 'UTF-8'): string
473
    {
474 9
        if ($encoding === 'UTF-8') {
475 5
            return (string) \mb_substr($str, $index, 1);
476
        }
477
478 4
        return (string) self::substr($str, $index, 1, $encoding);
479
    }
480
481
    /**
482
     * Returns an array consisting of the characters in the string.
483
     *
484
     * @param string $str <p>The input string.</p>
485
     *
486
     * @psalm-pure
487
     *
488
     * @return string[]
489
     *                  <p>An array of chars.</p>
490
     */
491 4
    public static function chars(string $str): array
492
    {
493
        /** @var string[] */
494 4
        return self::str_split($str);
0 ignored issues
show
Bug Best Practice introduced by
The expression return self::str_split($str) returns an array which contains values of type string[] which are incompatible with the documented value type string.
Loading history...
495
    }
496
497
    /**
498
     * This method will auto-detect your server environment for UTF-8 support.
499
     *
500
     * @return true|null
501
     *
502
     * @internal <p>You don't need to run it manually, it will be triggered if it's needed.</p>
503
     */
504 4
    public static function checkForSupport()
505
    {
506 4
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
507
            self::$SUPPORT['already_checked_via_portable_utf8'] = true;
508
509
            // http://php.net/manual/en/book.mbstring.php
510
            self::$SUPPORT['mbstring'] = self::mbstring_loaded();
511
512
            self::$SUPPORT['mbstring_func_overload'] = self::mbstring_overloaded();
513
            if (self::$SUPPORT['mbstring'] === true) {
514
                \mb_internal_encoding('UTF-8');
515
                \mb_regex_encoding('UTF-8');
516
                self::$SUPPORT['mbstring_internal_encoding'] = 'UTF-8';
517
            }
518
519
            // http://php.net/manual/en/book.iconv.php
520
            self::$SUPPORT['iconv'] = self::iconv_loaded();
521
522
            // http://php.net/manual/en/book.intl.php
523
            self::$SUPPORT['intl'] = self::intl_loaded();
524
525
            // http://php.net/manual/en/class.intlchar.php
526
            self::$SUPPORT['intlChar'] = self::intlChar_loaded();
527
528
            // http://php.net/manual/en/book.ctype.php
529
            self::$SUPPORT['ctype'] = self::ctype_loaded();
530
531
            // http://php.net/manual/en/class.finfo.php
532
            self::$SUPPORT['finfo'] = self::finfo_loaded();
533
534
            // http://php.net/manual/en/book.json.php
535
            self::$SUPPORT['json'] = self::json_loaded();
536
537
            // http://php.net/manual/en/book.pcre.php
538
            self::$SUPPORT['pcre_utf8'] = self::pcre_utf8_support();
539
540
            self::$SUPPORT['symfony_polyfill_used'] = self::symfony_polyfill_used();
541
            if (self::$SUPPORT['symfony_polyfill_used'] === true) {
542
                \mb_internal_encoding('UTF-8');
543
                self::$SUPPORT['mbstring_internal_encoding'] = 'UTF-8';
544
            }
545
546
            return true;
547
        }
548
549 4
        return null;
550
    }
551
552
    /**
553
     * Generates a UTF-8 encoded character from the given code point.
554
     *
555
     * INFO: opposite to UTF8::ord()
556
     *
557
     * EXAMPLE: <code>UTF8::chr(0x2603); // '☃'</code>
558
     *
559
     * @param int    $code_point <p>The code point for which to generate a character.</p>
560
     * @param string $encoding   [optional] <p>Default is UTF-8</p>
561
     *
562
     * @psalm-pure
563
     *
564
     * @return string|null
565
     *                     <p>Multi-byte character, returns null on failure or empty input.</p>
566
     */
567 21
    public static function chr($code_point, string $encoding = 'UTF-8')
568
    {
569
        // init
570
        /**
571
         * @psalm-suppress ImpureStaticVariable
572
         *
573
         * @var array<string,string>
574
         */
575 21
        static $CHAR_CACHE = [];
576
577 21
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
578 5
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
579
        }
580
581
        if (
582 21
            $encoding !== 'UTF-8'
583
            &&
584 21
            $encoding !== 'ISO-8859-1'
585
            &&
586 21
            $encoding !== 'WINDOWS-1252'
587
            &&
588 21
            self::$SUPPORT['mbstring'] === false
589
        ) {
590
            /**
591
             * @psalm-suppress ImpureFunctionCall - is is only a warning
592
             */
593
            \trigger_error('UTF8::chr() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
594
        }
595
596 21
        if (!\is_int($code_point) || $code_point <= 0) {
0 ignored issues
show
introduced by
The condition is_int($code_point) is always true.
Loading history...
597 5
            return null;
598
        }
599
600 21
        $cache_key = $code_point . '_' . $encoding;
601 21
        if (isset($CHAR_CACHE[$cache_key])) {
602 19
            return $CHAR_CACHE[$cache_key];
603
        }
604
605 10
        if ($code_point <= 0x80) { // only for "simple"-chars
606
607 9
            if (self::$CHR === null) {
608 1
                self::$CHR = self::getData('chr');
609
            }
610
611
            /**
612
             * @psalm-suppress PossiblyNullArrayAccess
613
             */
614 9
            $chr = self::$CHR[$code_point];
615
616 9
            if ($encoding !== 'UTF-8') {
617 1
                $chr = self::encode($encoding, $chr);
618
            }
619
620 9
            return $CHAR_CACHE[$cache_key] = $chr;
621
        }
622
623
        //
624
        // fallback via "IntlChar"
625
        //
626
627 6
        if (self::$SUPPORT['intlChar'] === true) {
628 6
            $chr = \IntlChar::chr($code_point);
629
630 6
            if ($encoding !== 'UTF-8') {
631
                $chr = self::encode($encoding, $chr);
632
            }
633
634 6
            return $CHAR_CACHE[$cache_key] = $chr;
635
        }
636
637
        //
638
        // fallback via vanilla php
639
        //
640
641
        if (self::$CHR === null) {
642
            self::$CHR = self::getData('chr');
643
        }
644
645
        $code_point = (int) $code_point;
646
        if ($code_point <= 0x7FF) {
647
            /**
648
             * @psalm-suppress PossiblyNullArrayAccess
649
             */
650
            $chr = self::$CHR[($code_point >> 6) + 0xC0] .
651
                   self::$CHR[($code_point & 0x3F) + 0x80];
652
        } elseif ($code_point <= 0xFFFF) {
653
            /**
654
             * @psalm-suppress PossiblyNullArrayAccess
655
             */
656
            $chr = self::$CHR[($code_point >> 12) + 0xE0] .
657
                   self::$CHR[(($code_point >> 6) & 0x3F) + 0x80] .
658
                   self::$CHR[($code_point & 0x3F) + 0x80];
659
        } else {
660
            /**
661
             * @psalm-suppress PossiblyNullArrayAccess
662
             */
663
            $chr = self::$CHR[($code_point >> 18) + 0xF0] .
664
                   self::$CHR[(($code_point >> 12) & 0x3F) + 0x80] .
665
                   self::$CHR[(($code_point >> 6) & 0x3F) + 0x80] .
666
                   self::$CHR[($code_point & 0x3F) + 0x80];
667
        }
668
669
        if ($encoding !== 'UTF-8') {
670
            $chr = self::encode($encoding, $chr);
671
        }
672
673
        return $CHAR_CACHE[$cache_key] = $chr;
674
    }
675
676
    /**
677
     * Applies callback to all characters of a string.
678
     *
679
     * EXAMPLE: <code>UTF8::chr_map([UTF8::class, 'strtolower'], 'Κόσμε'); // ['κ','ό', 'σ', 'μ', 'ε']</code>
680
     *
681
     * @param callable $callback <p>The callback function.</p>
682
     * @param string   $str      <p>UTF-8 string to run callback on.</p>
683
     *
684
     * @psalm-pure
685
     *
686
     * @return string[]
687
     *                  <p>The outcome of the callback, as array.</p>
688
     */
689 2
    public static function chr_map($callback, string $str): array
690
    {
691 2
        return \array_map(
692 2
            $callback,
693 2
            self::str_split($str)
694
        );
695
    }
696
697
    /**
698
     * Generates an array of byte length of each character of a Unicode string.
699
     *
700
     * 1 byte => U+0000  - U+007F
701
     * 2 byte => U+0080  - U+07FF
702
     * 3 byte => U+0800  - U+FFFF
703
     * 4 byte => U+10000 - U+10FFFF
704
     *
705
     * EXAMPLE: <code>UTF8::chr_size_list('中文空白-test'); // [3, 3, 3, 3, 1, 1, 1, 1, 1]</code>
706
     *
707
     * @param string $str <p>The original unicode string.</p>
708
     *
709
     * @psalm-pure
710
     *
711
     * @return int[]
712
     *               <p>An array of byte lengths of each character.</p>
713
     */
714 4
    public static function chr_size_list(string $str): array
715
    {
716 4
        if ($str === '') {
717 4
            return [];
718
        }
719
720 4
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
721
            return \array_map(
722
                static function (string $data): int {
723
                    // "mb_" is available if overload is used, so use it ...
724
                    return \mb_strlen($data, 'CP850'); // 8-BIT
725
                },
726
                self::str_split($str)
727
            );
728
        }
729
730 4
        return \array_map('\strlen', self::str_split($str));
731
    }
732
733
    /**
734
     * Get a decimal code representation of a specific character.
735
     *
736
     * INFO: opposite to UTF8::decimal_to_chr()
737
     *
738
     * EXAMPLE: <code>UTF8::chr_to_decimal('§'); // 0xa7</code>
739
     *
740
     * @param string $char <p>The input character.</p>
741
     *
742
     * @psalm-pure
743
     *
744
     * @return int
745
     */
746 5
    public static function chr_to_decimal(string $char): int
747
    {
748 5
        if (self::$SUPPORT['iconv'] === true) {
749 5
            $chr_tmp = \iconv('UTF-8', 'UCS-4LE', $char);
750 5
            if ($chr_tmp !== false) {
751
                /** @phpstan-ignore-next-line - "unpack": only false if the format string contains errors */
752 5
                return \unpack('V', $chr_tmp)[1];
753
            }
754
        }
755
756
        $code = self::ord($char[0]);
757
        $bytes = 1;
758
759
        if (!($code & 0x80)) {
760
            // 0xxxxxxx
761
            return $code;
762
        }
763
764
        if (($code & 0xe0) === 0xc0) {
765
            // 110xxxxx
766
            $bytes = 2;
767
            $code &= ~0xc0;
768
        } elseif (($code & 0xf0) === 0xe0) {
769
            // 1110xxxx
770
            $bytes = 3;
771
            $code &= ~0xe0;
772
        } elseif (($code & 0xf8) === 0xf0) {
773
            // 11110xxx
774
            $bytes = 4;
775
            $code &= ~0xf0;
776
        }
777
778
        for ($i = 2; $i <= $bytes; ++$i) {
779
            // 10xxxxxx
780
            $code = ($code << 6) + (self::ord($char[$i - 1]) & ~0x80);
781
        }
782
783
        return $code;
784
    }
785
786
    /**
787
     * Get hexadecimal code point (U+xxxx) of a UTF-8 encoded character.
788
     *
789
     * EXAMPLE: <code>UTF8::chr_to_hex('§'); // U+00a7</code>
790
     *
791
     * @param int|string $char   <p>The input character</p>
792
     * @param string     $prefix [optional]
793
     *
794
     * @psalm-pure
795
     *
796
     * @return string
797
     *                <p>The code point encoded as U+xxxx.</p>
798
     */
799 2
    public static function chr_to_hex($char, string $prefix = 'U+'): string
800
    {
801 2
        if ($char === '') {
802 2
            return '';
803
        }
804
805 2
        if ($char === '&#0;') {
806
            $char = '';
807
        }
808
809 2
        return self::int_to_hex(self::ord((string) $char), $prefix);
810
    }
811
812
    /**
813
     * Splits a string into smaller chunks and multiple lines, using the specified line ending character.
814
     *
815
     * EXAMPLE: <code>UTF8::chunk_split('ABC-ÖÄÜ-中文空白-κόσμε', 3); // "ABC\r\n-ÖÄ\r\nÜ-中\r\n文空白\r\n-κό\r\nσμε"</code>
816
     *
817
     * @param string $body         <p>The original string to be split.</p>
818
     * @param int    $chunk_length [optional] <p>The maximum character length of a chunk.</p>
819
     * @param string $end          [optional] <p>The character(s) to be inserted at the end of each chunk.</p>
820
     *
821
     * @psalm-pure
822
     *
823
     * @return string
824
     *                <p>The chunked string.</p>
825
     */
826 4
    public static function chunk_split(string $body, int $chunk_length = 76, string $end = "\r\n"): string
827
    {
828 4
        return \implode($end, self::str_split($body, $chunk_length));
829
    }
830
831
    /**
832
     * Accepts a string and removes all non-UTF-8 characters from it + extras if needed.
833
     *
834
     * EXAMPLE: <code>UTF8::clean("\xEF\xBB\xBF„Abcdef\xc2\xa0\x20…” — 😃 - Düsseldorf", true, true); // '„Abcdef  …” — 😃 - Düsseldorf'</code>
835
     *
836
     * @param string $str                                     <p>The string to be sanitized.</p>
837
     * @param bool   $remove_bom                              [optional] <p>Set to true, if you need to remove
838
     *                                                        UTF-BOM.</p>
839
     * @param bool   $normalize_whitespace                    [optional] <p>Set to true, if you need to normalize the
840
     *                                                        whitespace.</p>
841
     * @param bool   $normalize_msword                        [optional] <p>Set to true, if you need to normalize MS
842
     *                                                        Word chars e.g.: "…"
843
     *                                                        => "..."</p>
844
     * @param bool   $keep_non_breaking_space                 [optional] <p>Set to true, to keep non-breaking-spaces,
845
     *                                                        in
846
     *                                                        combination with
847
     *                                                        $normalize_whitespace</p>
848
     * @param bool   $replace_diamond_question_mark           [optional] <p>Set to true, if you need to remove diamond
849
     *                                                        question mark e.g.: "�"</p>
850
     * @param bool   $remove_invisible_characters             [optional] <p>Set to false, if you not want to remove
851
     *                                                        invisible characters e.g.: "\0"</p>
852
     * @param bool   $remove_invisible_characters_url_encoded [optional] <p>Set to true, if you not want to remove
853
     *                                                        invisible url encoded characters e.g.: "%0B"<br> WARNING:
854
     *                                                        maybe contains false-positives e.g. aa%0Baa -> aaaa.
855
     *                                                        </p>
856
     *
857
     * @psalm-pure
858
     *
859
     * @return string
860
     *                <p>An clean UTF-8 encoded string.</p>
861
     */
862 90
    public static function clean(
863
        string $str,
864
        bool $remove_bom = false,
865
        bool $normalize_whitespace = false,
866
        bool $normalize_msword = false,
867
        bool $keep_non_breaking_space = false,
868
        bool $replace_diamond_question_mark = false,
869
        bool $remove_invisible_characters = true,
870
        bool $remove_invisible_characters_url_encoded = false
871
    ): string {
872
        // http://stackoverflow.com/questions/1401317/remove-non-utf8-characters-from-string
873
        // caused connection reset problem on larger strings
874
875 90
        $regex = '/
876
          (
877
            (?: [\x00-\x7F]               # single-byte sequences   0xxxxxxx
878
            |   [\xC0-\xDF][\x80-\xBF]    # double-byte sequences   110xxxxx 10xxxxxx
879
            |   [\xE0-\xEF][\x80-\xBF]{2} # triple-byte sequences   1110xxxx 10xxxxxx * 2
880
            |   [\xF0-\xF7][\x80-\xBF]{3} # quadruple-byte sequence 11110xxx 10xxxxxx * 3
881
            ){1,100}                      # ...one or more times
882
          )
883
        | ( [\x80-\xBF] )                 # invalid byte in range 10000000 - 10111111
884
        | ( [\xC0-\xFF] )                 # invalid byte in range 11000000 - 11111111
885
        /x';
886 90
        $str = (string) \preg_replace($regex, '$1', $str);
887
888 90
        if ($replace_diamond_question_mark) {
889 33
            $str = self::replace_diamond_question_mark($str);
890
        }
891
892 90
        if ($remove_invisible_characters) {
893 90
            $str = self::remove_invisible_characters($str, $remove_invisible_characters_url_encoded);
894
        }
895
896 90
        if ($normalize_whitespace) {
897 37
            $str = self::normalize_whitespace($str, $keep_non_breaking_space);
898
        }
899
900 90
        if ($normalize_msword) {
901 4
            $str = self::normalize_msword($str);
902
        }
903
904 90
        if ($remove_bom) {
905 37
            $str = self::remove_bom($str);
906
        }
907
908 90
        return $str;
909
    }
910
911
    /**
912
     * Clean-up a string and show only printable UTF-8 chars at the end  + fix UTF-8 encoding.
913
     *
914
     * EXAMPLE: <code>UTF8::cleanup("\xEF\xBB\xBF„Abcdef\xc2\xa0\x20…” — 😃 - Düsseldorf", true, true); // '„Abcdef  …” — 😃 - Düsseldorf'</code>
915
     *
916
     * @param string $str <p>The input string.</p>
917
     *
918
     * @psalm-pure
919
     *
920
     * @return string
921
     */
922 33
    public static function cleanup($str): string
923
    {
924
        // init
925 33
        $str = (string) $str;
926
927 33
        if ($str === '') {
928 5
            return '';
929
        }
930
931
        // fixed ISO <-> UTF-8 Errors
932 33
        $str = self::fix_simple_utf8($str);
933
934
        // remove all none UTF-8 symbols
935
        // && remove diamond question mark (�)
936
        // && remove remove invisible characters (e.g. "\0")
937
        // && remove BOM
938
        // && normalize whitespace chars (but keep non-breaking-spaces)
939 33
        return self::clean(
940 33
            $str,
941 33
            true,
942 33
            true,
943 33
            false,
944 33
            true,
945 33
            true
946
        );
947
    }
948
949
    /**
950
     * Accepts a string or a array of strings and returns an array of Unicode code points.
951
     *
952
     * INFO: opposite to UTF8::string()
953
     *
954
     * EXAMPLE: <code>
955
     * UTF8::codepoints('κöñ'); // array(954, 246, 241)
956
     * // ... OR ...
957
     * UTF8::codepoints('κöñ', true); // array('U+03ba', 'U+00f6', 'U+00f1')
958
     * </code>
959
     *
960
     * @param string|string[] $arg         <p>A UTF-8 encoded string or an array of such strings.</p>
961
     * @param bool            $use_u_style <p>If True, will return code points in U+xxxx format,
962
     *                                     default, code points will be returned as integers.</p>
963
     *
964
     * @psalm-pure
965
     *
966
     * @return int[]|string[]
967
     *                        <p>
968
     *                        The array of code points:<br>
969
     *                        int[] for $u_style === false<br>
970
     *                        string[] for $u_style === true<br>
971
     *                        </p>
972
     */
973 12
    public static function codepoints($arg, bool $use_u_style = false): array
974
    {
975 12
        if (\is_string($arg)) {
976 12
            $arg = self::str_split($arg);
977
        }
978
979
        /**
980
         * @psalm-suppress DocblockTypeContradiction
981
         */
982 12
        if (!\is_array($arg)) {
0 ignored issues
show
introduced by
The condition is_array($arg) is always true.
Loading history...
983 4
            return [];
984
        }
985
986 12
        if ($arg === []) {
987 7
            return [];
988
        }
989
990 11
        $arg = \array_map(
991
            [
992 11
                self::class,
993
                'ord',
994
            ],
995 11
            $arg
996
        );
997
998 11
        if ($use_u_style) {
999 2
            $arg = \array_map(
1000
                [
1001 2
                    self::class,
1002
                    'int_to_hex',
1003
                ],
1004 2
                $arg
1005
            );
1006
        }
1007
1008 11
        return $arg;
1009
    }
1010
1011
    /**
1012
     * Trims the string and replaces consecutive whitespace characters with a
1013
     * single space. This includes tabs and newline characters, as well as
1014
     * multibyte whitespace such as the thin space and ideographic space.
1015
     *
1016
     * @param string $str <p>The input string.</p>
1017
     *
1018
     * @psalm-pure
1019
     *
1020
     * @return string
1021
     *                <p>A string with trimmed $str and condensed whitespace.</p>
1022
     */
1023 13
    public static function collapse_whitespace(string $str): string
1024
    {
1025 13
        if (self::$SUPPORT['mbstring'] === true) {
1026 13
            return \trim((string) \mb_ereg_replace('[[:space:]]+', ' ', $str));
1027
        }
1028
1029
        return \trim(self::regex_replace($str, '[[:space:]]+', ' '));
1030
    }
1031
1032
    /**
1033
     * Returns count of characters used in a string.
1034
     *
1035
     * EXAMPLE: <code>UTF8::count_chars('κaκbκc'); // array('κ' => 3, 'a' => 1, 'b' => 1, 'c' => 1)</code>
1036
     *
1037
     * @param string $str                     <p>The input string.</p>
1038
     * @param bool   $clean_utf8              [optional] <p>Remove non UTF-8 chars from the string.</p>
1039
     * @param bool   $try_to_use_mb_functions [optional] <p>Set to false, if you don't want to use
1040
     *
1041
     * @psalm-pure
1042
     *
1043
     * @return int[]
1044
     *               <p>An associative array of Character as keys and
1045
     *               their count as values.</p>
1046
     */
1047 19
    public static function count_chars(
1048
        string $str,
1049
        bool $clean_utf8 = false,
1050
        bool $try_to_use_mb_functions = true
1051
    ): array {
1052 19
        return \array_count_values(
1053 19
            self::str_split(
1054 19
                $str,
1055 19
                1,
1056 19
                $clean_utf8,
1057 19
                $try_to_use_mb_functions
1058
            )
1059
        );
1060
    }
1061
1062
    /**
1063
     * Create a valid CSS identifier for e.g. "class"- or "id"-attributes.
1064
     *
1065
     * EXAMPLE: <code>UTF8::css_identifier('123foo/bar!!!'); // _23foo-bar</code>
1066
     *
1067
     * copy&past from https://github.com/drupal/core/blob/8.8.x/lib/Drupal/Component/Utility/Html.php#L95
1068
     *
1069
     * @param string   $str        <p>INFO: if no identifier is given e.g. " " or "", we will create a unique string automatically</p>
1070
     * @param string[] $filter
1071
     * @param bool     $strip_tags
1072
     * @param bool     $strtolower
1073
     *
1074
     * @psalm-pure
1075
     *
1076
     * @return string
1077
     *
1078
     * @phpstan-param array<string,string> $filter
1079
     */
1080 1
    public static function css_identifier(
1081
        string $str = '',
1082
        array $filter = [
1083
            ' ' => '-',
1084
            '/' => '-',
1085
            '[' => '',
1086
            ']' => '',
1087
        ],
1088
        bool $strip_tags = false,
1089
        bool $strtolower = true
1090
    ): string {
1091
        // We could also use strtr() here but its much slower than str_replace(). In
1092
        // order to keep '__' to stay '__' we first replace it with a different
1093
        // placeholder after checking that it is not defined as a filter.
1094 1
        $double_underscore_replacements = 0;
1095
1096
        // Fallback ...
1097 1
        if (\trim($str) === '') {
1098 1
            $str = \uniqid('auto-generated-css-class', true);
1099
        } else {
1100 1
            $str = self::clean($str);
1101
        }
1102
1103 1
        if ($strip_tags) {
1104
            $str = \strip_tags($str);
1105
        }
1106
1107 1
        if ($strtolower) {
1108 1
            $str = \strtolower($str);
1109
        }
1110
1111 1
        if (!isset($filter['__'])) {
1112 1
            $str = \str_replace('__', '##', $str, $double_underscore_replacements);
1113
        }
1114
1115 1
        $str = \str_replace(\array_keys($filter), \array_values($filter), $str);
1116
        // Replace temporary placeholder '##' with '__' only if the original
1117
        // $identifier contained '__'.
1118 1
        if ($double_underscore_replacements > 0) {
1119
            $str = \str_replace('##', '__', $str);
1120
        }
1121
1122
        // Valid characters in a CSS identifier are:
1123
        // - the hyphen (U+002D)
1124
        // - a-z (U+0030 - U+0039)
1125
        // - A-Z (U+0041 - U+005A)
1126
        // - the underscore (U+005F)
1127
        // - 0-9 (U+0061 - U+007A)
1128
        // - ISO 10646 characters U+00A1 and higher
1129
        // We strip out any character not in the above list.
1130 1
        $str = (string) \preg_replace('/[^\x{002D}\x{0030}-\x{0039}\x{0041}-\x{005A}\x{005F}\x{0061}-\x{007A}\x{00A1}-\x{FFFF}]/u', '', $str);
1131
        // Identifiers cannot start with a digit, two hyphens, or a hyphen followed by a digit.
1132 1
        $str = (string) \preg_replace(['/^[0-9]/', '/^(-[0-9])|^(--)/'], ['_', '__'], $str);
1133
1134 1
        return \trim($str, '-');
1135
    }
1136
1137
    /**
1138
     * Remove css media-queries.
1139
     *
1140
     * @param string $str
1141
     *
1142
     * @psalm-pure
1143
     *
1144
     * @return string
1145
     */
1146 1
    public static function css_stripe_media_queries(string $str): string
1147
    {
1148 1
        return (string) \preg_replace(
1149 1
            '#@media\\s+(?:only\\s)?(?:[\\s{(]|screen|all)\\s?[^{]+{.*}\\s*}\\s*#isumU',
1150 1
            '',
1151 1
            $str
1152
        );
1153
    }
1154
1155
    /**
1156
     * Checks whether ctype is available on the server.
1157
     *
1158
     * @psalm-pure
1159
     *
1160
     * @return bool
1161
     *              <p><strong>true</strong> if available, <strong>false</strong> otherwise</p>
1162
     *
1163
     * @internal <p>Please do not use it anymore, we will make is private in next major version.</p>
1164
     */
1165
    public static function ctype_loaded(): bool
1166
    {
1167
        return \extension_loaded('ctype');
1168
    }
1169
1170
    /**
1171
     * Converts an int value into a UTF-8 character.
1172
     *
1173
     * INFO: opposite to UTF8::string()
1174
     *
1175
     * EXAMPLE: <code>UTF8::decimal_to_chr(931); // 'Σ'</code>
1176
     *
1177
     * @param int|string $int
1178
     *
1179
     * @phpstan-param int|numeric-string $int
1180
     *
1181
     * @psalm-pure
1182
     *
1183
     * @return string
1184
     */
1185 20
    public static function decimal_to_chr($int): string
1186
    {
1187
        // We cannot use html_entity_decode() here, as it will not return
1188
        // characters for many values < 160.
1189 20
        return mb_convert_encoding('&#' . $int . ';', 'UTF-8', 'HTML-ENTITIES');
0 ignored issues
show
Bug Best Practice introduced by
The expression return mb_convert_encodi...TF-8', 'HTML-ENTITIES') could return the type array which is incompatible with the type-hinted return string. Consider adding an additional type-check to rule them out.
Loading history...
1190
    }
1191
1192
    /**
1193
     * Decodes a MIME header field
1194
     *
1195
     * @param string $str
1196
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
1197
     *
1198
     * @psalm-pure
1199
     *
1200
     * @return false|string
1201
     *                      <p>A decoded MIME field on success,
1202
     *                      or false if an error occurs during the decoding.</p>
1203
     */
1204 2
    public static function decode_mimeheader($str, string $encoding = 'UTF-8')
1205
    {
1206 2
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
1207 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
1208
        }
1209
1210
        // always fallback via symfony polyfill
1211 2
        return \iconv_mime_decode($str, \ICONV_MIME_DECODE_CONTINUE_ON_ERROR, $encoding);
1212
    }
1213
1214
    /**
1215
     * Convert any two-letter country code (ISO 3166-1) to the corresponding Emoji.
1216
     *
1217
     * @see https://en.wikipedia.org/wiki/ISO_3166-1
1218
     *
1219
     * @param string $country_code_iso_3166_1 <p>e.g. DE</p>
1220
     *
1221
     * @return string
1222
     *                <p>Emoji or empty string on error.</p>
1223
     */
1224 1
    public static function emoji_from_country_code(string $country_code_iso_3166_1): string
1225
    {
1226 1
        if ($country_code_iso_3166_1 === '') {
1227 1
            return '';
1228
        }
1229
1230 1
        if (self::strlen($country_code_iso_3166_1) !== 2) {
1231 1
            return '';
1232
        }
1233
1234 1
        $country_code_iso_3166_1 = \strtoupper($country_code_iso_3166_1);
1235
1236 1
        $flagOffset = 0x1F1E6;
1237 1
        $asciiOffset = 0x41;
1238
1239 1
        return (self::chr((self::ord($country_code_iso_3166_1[0]) - $asciiOffset + $flagOffset)) ?? '') .
1240 1
               (self::chr((self::ord($country_code_iso_3166_1[1]) - $asciiOffset + $flagOffset)) ?? '');
1241
    }
1242
1243
    /**
1244
     * Decodes a string which was encoded by "UTF8::emoji_encode()".
1245
     *
1246
     * INFO: opposite to UTF8::emoji_encode()
1247
     *
1248
     * EXAMPLE: <code>
1249
     * UTF8::emoji_decode('foo CHARACTER_OGRE', false); // 'foo 👹'
1250
     * //
1251
     * UTF8::emoji_decode('foo _-_PORTABLE_UTF8_-_308095726_-_627590803_-_8FTU_ELBATROP_-_', true); // 'foo 👹'
1252
     * </code>
1253
     *
1254
     * @param string $str                            <p>The input string.</p>
1255
     * @param bool   $use_reversible_string_mappings [optional] <p>
1256
     *                                               When <b>TRUE</b>, we se a reversible string mapping
1257
     *                                               between "emoji_encode" and "emoji_decode".</p>
1258
     *
1259
     * @psalm-pure
1260
     *
1261
     * @return string
1262
     */
1263 9
    public static function emoji_decode(
1264
        string $str,
1265
        bool $use_reversible_string_mappings = false
1266
    ): string {
1267 9
        if (self::$EMOJI_KEYS_CACHE === null) {
1268
            /** @phpstan-ignore-next-line - we need to load the data first */
1269
            self::initEmojiData();
1270
        }
1271
1272 9
        if ($use_reversible_string_mappings) {
1273 9
            return (string) \str_replace(
1274 9
                (array) self::$EMOJI_KEYS_REVERSIBLE_CACHE,
1275 9
                (array) self::$EMOJI_VALUES_CACHE,
1276 9
                $str
1277
            );
1278
        }
1279
1280 1
        return (string) \str_replace(
1281 1
            (array) self::$EMOJI_KEYS_CACHE,
1282 1
            (array) self::$EMOJI_VALUES_CACHE,
1283 1
            $str
1284
        );
1285
    }
1286
1287
    /**
1288
     * Encode a string with emoji chars into a non-emoji string.
1289
     *
1290
     * INFO: opposite to UTF8::emoji_decode()
1291
     *
1292
     * EXAMPLE: <code>
1293
     * UTF8::emoji_encode('foo 👹', false)); // 'foo CHARACTER_OGRE'
1294
     * //
1295
     * UTF8::emoji_encode('foo 👹', true)); // 'foo _-_PORTABLE_UTF8_-_308095726_-_627590803_-_8FTU_ELBATROP_-_'
1296
     * </code>
1297
     *
1298
     * @param string $str                            <p>The input string</p>
1299
     * @param bool   $use_reversible_string_mappings [optional] <p>
1300
     *                                               when <b>TRUE</b>, we use a reversible string mapping
1301
     *                                               between "emoji_encode" and "emoji_decode"</p>
1302
     *
1303
     * @psalm-pure
1304
     *
1305
     * @return string
1306
     */
1307 12
    public static function emoji_encode(
1308
        string $str,
1309
        bool $use_reversible_string_mappings = false
1310
    ): string {
1311 12
        if (self::$EMOJI_KEYS_CACHE === null) {
1312
            /** @phpstan-ignore-next-line - we need to load the data first */
1313 1
            self::initEmojiData();
1314
        }
1315
1316 12
        if ($use_reversible_string_mappings) {
1317 9
            return (string) \str_replace(
1318 9
                (array) self::$EMOJI_VALUES_CACHE,
1319 9
                (array) self::$EMOJI_KEYS_REVERSIBLE_CACHE,
1320 9
                $str
1321
            );
1322
        }
1323
1324 4
        return (string) \str_replace(
1325 4
            (array) self::$EMOJI_VALUES_CACHE,
1326 4
            (array) self::$EMOJI_KEYS_CACHE,
1327 4
            $str
1328
        );
1329
    }
1330
1331
    /**
1332
     * Encode a string with a new charset-encoding.
1333
     *
1334
     * INFO:  This function will also try to fix broken / double encoding,
1335
     *        so you can call this function also on a UTF-8 string and you don't mess up the string.
1336
     *
1337
     * EXAMPLE: <code>
1338
     * UTF8::encode('ISO-8859-1', '-ABC-中文空白-'); // '-ABC-????-'
1339
     * //
1340
     * UTF8::encode('UTF-8', '-ABC-中文空白-'); // '-ABC-中文空白-'
1341
     * //
1342
     * UTF8::encode('HTML', '-ABC-中文空白-'); // '-ABC-&#20013;&#25991;&#31354;&#30333;-'
1343
     * //
1344
     * UTF8::encode('BASE64', '-ABC-中文空白-'); // 'LUFCQy3kuK3mlofnqbrnmb0t'
1345
     * </code>
1346
     *
1347
     * @param string $to_encoding                   <p>e.g. 'UTF-16', 'UTF-8', 'ISO-8859-1', etc.</p>
1348
     * @param string $str                           <p>The input string</p>
1349
     * @param bool   $auto_detect_the_from_encoding [optional] <p>Force the new encoding (we try to fix broken / double
1350
     *                                              encoding for UTF-8)<br> otherwise we auto-detect the current
1351
     *                                              string-encoding</p>
1352
     * @param string $from_encoding                 [optional] <p>e.g. 'UTF-16', 'UTF-8', 'ISO-8859-1', etc.<br>
1353
     *                                              A empty string will trigger the autodetect anyway.</p>
1354
     *
1355
     * @psalm-pure
1356
     *
1357
     * @return string
1358
     *
1359
     * @psalm-suppress InvalidReturnStatement
1360
     */
1361 28
    public static function encode(
1362
        string $to_encoding,
1363
        string $str,
1364
        bool $auto_detect_the_from_encoding = true,
1365
        string $from_encoding = ''
1366
    ): string {
1367 28
        if ($str === '' || $to_encoding === '') {
1368 13
            return $str;
1369
        }
1370
1371 28
        if ($to_encoding !== 'UTF-8' && $to_encoding !== 'CP850') {
1372 7
            $to_encoding = self::normalize_encoding($to_encoding, 'UTF-8');
1373
        }
1374
1375 28
        if ($from_encoding && $from_encoding !== 'UTF-8' && $from_encoding !== 'CP850') {
1376 2
            $from_encoding = self::normalize_encoding($from_encoding);
1377
        }
1378
1379
        if (
1380 28
            $to_encoding
1381
            &&
1382 28
            $from_encoding
1383
            &&
1384 28
            $from_encoding === $to_encoding
1385
        ) {
1386
            return $str;
1387
        }
1388
1389 28
        if ($to_encoding === 'JSON') {
1390 1
            $return = self::json_encode($str);
1391 1
            if ($return === false) {
1392
                throw new \InvalidArgumentException('The input string [' . $str . '] can not be used for json_encode().');
1393
            }
1394
1395 1
            return $return;
1396
        }
1397 28
        if ($from_encoding === 'JSON') {
1398 1
            $str = self::json_decode($str);
1399 1
            $from_encoding = '';
1400
        }
1401
1402 28
        if ($to_encoding === 'BASE64') {
1403 2
            return \base64_encode($str);
1404
        }
1405 28
        if ($from_encoding === 'BASE64') {
1406 2
            $str = \base64_decode($str, true);
1407 2
            $from_encoding = '';
1408
        }
1409
1410 28
        if ($to_encoding === 'HTML-ENTITIES') {
1411 2
            return self::html_encode($str, true);
1412
        }
1413 28
        if ($from_encoding === 'HTML-ENTITIES') {
1414 2
            $str = self::html_entity_decode($str, \ENT_COMPAT);
1415 2
            $from_encoding = '';
1416
        }
1417
1418 28
        $from_encoding_auto_detected = false;
1419
        if (
1420 28
            $auto_detect_the_from_encoding
1421
            ||
1422 28
            !$from_encoding
1423
        ) {
1424 28
            $from_encoding_auto_detected = self::str_detect_encoding($str);
1425
        }
1426
1427
        // DEBUG
1428
        //var_dump($to_encoding, $from_encoding, $from_encoding_auto_detected, $str, "\n\n");
1429
1430 28
        if ($from_encoding_auto_detected !== false) {
1431 25
            $from_encoding = $from_encoding_auto_detected;
1432 6
        } elseif ($auto_detect_the_from_encoding) {
1433
            // fallback for the "autodetect"-mode
1434 6
            return self::to_utf8($str);
1435
        }
1436
1437
        if (
1438 25
            !$from_encoding
1439
            ||
1440 25
            $from_encoding === $to_encoding
1441
        ) {
1442 15
            return $str;
1443
        }
1444
1445
        if (
1446 20
            $to_encoding === 'UTF-8'
1447
            &&
1448
            (
1449 18
                $from_encoding === 'WINDOWS-1252'
1450
                ||
1451 20
                $from_encoding === 'ISO-8859-1'
1452
            )
1453
        ) {
1454 14
            return self::to_utf8($str);
1455
        }
1456
1457
        if (
1458 12
            $to_encoding === 'ISO-8859-1'
1459
            &&
1460
            (
1461 6
                $from_encoding === 'WINDOWS-1252'
1462
                ||
1463 12
                $from_encoding === 'UTF-8'
1464
            )
1465
        ) {
1466 6
            return self::to_iso8859($str);
1467
        }
1468
1469
        if (
1470 10
            $to_encoding !== 'UTF-8'
1471
            &&
1472 10
            $to_encoding !== 'ISO-8859-1'
1473
            &&
1474 10
            $to_encoding !== 'WINDOWS-1252'
1475
            &&
1476 10
            self::$SUPPORT['mbstring'] === false
1477
        ) {
1478
            /**
1479
             * @psalm-suppress ImpureFunctionCall - is is only a warning
1480
             */
1481
            \trigger_error('UTF8::encode() without mbstring cannot handle "' . $to_encoding . '" encoding', \E_USER_WARNING);
1482
        }
1483
1484 10
        if (self::$SUPPORT['mbstring'] === true) {
1485 10
            $str_encoded = \mb_convert_encoding(
1486 10
                $str,
1487 10
                $to_encoding,
1488 10
                $from_encoding
1489
            );
1490
1491 10
            if ($str_encoded) {
1492
                \assert(\is_string($str_encoded));
1493
1494 10
                return $str_encoded;
1495
            }
1496
        }
1497
1498
        /** @noinspection PhpUsageOfSilenceOperatorInspection - Detected an incomplete multibyte character in input string */
1499
        $return = @\iconv($from_encoding, $to_encoding, $str);
1500
        if ($return !== false) {
1501
            return $return;
1502
        }
1503
1504
        return $str;
1505
    }
1506
1507
    /**
1508
     * @param string $str
1509
     * @param string $from_charset      [optional] <p>Set the input charset.</p>
1510
     * @param string $to_charset        [optional] <p>Set the output charset.</p>
1511
     * @param string $transfer_encoding [optional] <p>Set the transfer encoding.</p>
1512
     * @param string $linefeed          [optional] <p>Set the used linefeed.</p>
1513
     * @param int    $indent            [optional] <p>Set the max length indent.</p>
1514
     *
1515
     * @psalm-pure
1516
     *
1517
     * @return false|string
1518
     *                      <p>An encoded MIME field on success,
1519
     *                      or false if an error occurs during the encoding.</p>
1520
     */
1521 1
    public static function encode_mimeheader(
1522
        string $str,
1523
        string $from_charset = 'UTF-8',
1524
        string $to_charset = 'UTF-8',
1525
        string $transfer_encoding = 'Q',
1526
        string $linefeed = "\r\n",
1527
        int $indent = 76
1528
    ) {
1529 1
        if ($from_charset !== 'UTF-8' && $from_charset !== 'CP850') {
1530
            $from_charset = self::normalize_encoding($from_charset, 'UTF-8');
1531
        }
1532
1533 1
        if ($to_charset !== 'UTF-8' && $to_charset !== 'CP850') {
1534 1
            $to_charset = self::normalize_encoding($to_charset, 'UTF-8');
1535
        }
1536
1537
        // always fallback via symfony polyfill
1538 1
        return \iconv_mime_encode(
1539 1
            '',
1540 1
            $str,
1541
            [
1542 1
                'scheme'           => $transfer_encoding,
1543 1
                'line-length'      => $indent,
1544 1
                'input-charset'    => $from_charset,
1545 1
                'output-charset'   => $to_charset,
1546 1
                'line-break-chars' => $linefeed,
1547
            ]
1548
        );
1549
    }
1550
1551
    /**
1552
     * Create an extract from a sentence, so if the search-string was found, it try to centered in the output.
1553
     *
1554
     * @param string   $str                       <p>The input string.</p>
1555
     * @param string   $search                    <p>The searched string.</p>
1556
     * @param int|null $length                    [optional] <p>Default: null === text->length / 2</p>
1557
     * @param string   $replacer_for_skipped_text [optional] <p>Default: …</p>
1558
     * @param string   $encoding                  [optional] <p>Set the charset for e.g. "mb_" function</p>
1559
     *
1560
     * @psalm-pure
1561
     *
1562
     * @return string
1563
     */
1564 1
    public static function extract_text(
1565
        string $str,
1566
        string $search = '',
1567
        int $length = null,
1568
        string $replacer_for_skipped_text = '…',
1569
        string $encoding = 'UTF-8'
1570
    ): string {
1571 1
        if ($str === '') {
1572 1
            return '';
1573
        }
1574
1575 1
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
1576
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
1577
        }
1578
1579 1
        $trim_chars = "\t\r\n -_()!~?=+/*\\,.:;\"'[]{}`&";
1580
1581 1
        if ($length === null) {
1582 1
            $length = (int) \round((int) self::strlen($str, $encoding) / 2);
1583
        }
1584
1585 1
        if ($search === '') {
1586 1
            if ($encoding === 'UTF-8') {
1587 1
                if ($length > 0) {
1588 1
                    $string_length = (int) \mb_strlen($str);
1589 1
                    $end = ($length - 1) > $string_length ? $string_length : ($length - 1);
1590
                } else {
1591 1
                    $end = 0;
1592
                }
1593
1594 1
                $pos = (int) \min(
1595 1
                    \mb_strpos($str, ' ', $end),
1596 1
                    \mb_strpos($str, '.', $end)
1597
                );
1598
            } else {
1599
                if ($length > 0) {
1600
                    $string_length = (int) self::strlen($str, $encoding);
1601
                    $end = ($length - 1) > $string_length ? $string_length : ($length - 1);
1602
                } else {
1603
                    $end = 0;
1604
                }
1605
1606
                $pos = (int) \min(
1607
                    self::strpos($str, ' ', $end, $encoding),
1608
                    self::strpos($str, '.', $end, $encoding)
1609
                );
1610
            }
1611
1612 1
            if ($pos) {
1613 1
                if ($encoding === 'UTF-8') {
1614 1
                    $str_sub = \mb_substr($str, 0, $pos);
1615
                } else {
1616
                    $str_sub = self::substr($str, 0, $pos, $encoding);
1617
                }
1618
1619 1
                if ($str_sub === false) {
1620
                    return '';
1621
                }
1622
1623 1
                return \rtrim($str_sub, $trim_chars) . $replacer_for_skipped_text;
1624
            }
1625
1626
            return $str;
1627
        }
1628
1629 1
        if ($encoding === 'UTF-8') {
1630 1
            $word_position = (int) \mb_stripos($str, $search);
1631 1
            $half_side = (int) ($word_position - $length / 2 + (int) \mb_strlen($search) / 2);
1632
        } else {
1633
            $word_position = (int) self::stripos($str, $search, 0, $encoding);
1634
            $half_side = (int) ($word_position - $length / 2 + (int) self::strlen($search, $encoding) / 2);
1635
        }
1636
1637 1
        $pos_start = 0;
1638 1
        if ($half_side > 0) {
1639 1
            if ($encoding === 'UTF-8') {
1640 1
                $half_text = \mb_substr($str, 0, $half_side);
1641
            } else {
1642
                $half_text = self::substr($str, 0, $half_side, $encoding);
1643
            }
1644 1
            if ($half_text !== false) {
1645 1
                if ($encoding === 'UTF-8') {
1646 1
                    $pos_start = (int) \max(
1647 1
                        \mb_strrpos($half_text, ' '),
1648 1
                        \mb_strrpos($half_text, '.')
1649
                    );
1650
                } else {
1651
                    $pos_start = (int) \max(
1652
                        self::strrpos($half_text, ' ', 0, $encoding),
1653
                        self::strrpos($half_text, '.', 0, $encoding)
1654
                    );
1655
                }
1656
            }
1657
        }
1658
1659 1
        if ($word_position && $half_side > 0) {
1660 1
            $offset = $pos_start + $length - 1;
1661 1
            $real_length = (int) self::strlen($str, $encoding);
1662
1663 1
            if ($offset > $real_length) {
1664
                $offset = $real_length;
1665
            }
1666
1667 1
            if ($encoding === 'UTF-8') {
1668 1
                $pos_end = (int) \min(
1669 1
                    \mb_strpos($str, ' ', $offset),
1670 1
                    \mb_strpos($str, '.', $offset)
1671 1
                ) - $pos_start;
1672
            } else {
1673
                $pos_end = (int) \min(
1674
                    self::strpos($str, ' ', $offset, $encoding),
1675
                    self::strpos($str, '.', $offset, $encoding)
1676
                ) - $pos_start;
1677
            }
1678
1679 1
            if (!$pos_end || $pos_end <= 0) {
1680 1
                if ($encoding === 'UTF-8') {
1681 1
                    $str_sub = \mb_substr($str, $pos_start, (int) \mb_strlen($str));
1682
                } else {
1683
                    $str_sub = self::substr($str, $pos_start, (int) self::strlen($str, $encoding), $encoding);
1684
                }
1685 1
                if ($str_sub !== false) {
1686 1
                    $extract = $replacer_for_skipped_text . \ltrim($str_sub, $trim_chars);
1687
                } else {
1688 1
                    $extract = '';
1689
                }
1690
            } else {
1691 1
                if ($encoding === 'UTF-8') {
1692 1
                    $str_sub = \mb_substr($str, $pos_start, $pos_end);
1693
                } else {
1694
                    $str_sub = self::substr($str, $pos_start, $pos_end, $encoding);
1695
                }
1696 1
                if ($str_sub !== false) {
1697 1
                    $extract = $replacer_for_skipped_text . \trim($str_sub, $trim_chars) . $replacer_for_skipped_text;
1698
                } else {
1699 1
                    $extract = '';
1700
                }
1701
            }
1702
        } else {
1703 1
            $offset = $length - 1;
1704 1
            $true_length = (int) self::strlen($str, $encoding);
1705
1706 1
            if ($offset > $true_length) {
1707
                $offset = $true_length;
1708
            }
1709
1710 1
            if ($encoding === 'UTF-8') {
1711 1
                $pos_end = (int) \min(
1712 1
                    \mb_strpos($str, ' ', $offset),
1713 1
                    \mb_strpos($str, '.', $offset)
1714
                );
1715
            } else {
1716
                $pos_end = (int) \min(
1717
                    self::strpos($str, ' ', $offset, $encoding),
1718
                    self::strpos($str, '.', $offset, $encoding)
1719
                );
1720
            }
1721
1722 1
            if ($pos_end) {
1723 1
                if ($encoding === 'UTF-8') {
1724 1
                    $str_sub = \mb_substr($str, 0, $pos_end);
1725
                } else {
1726
                    $str_sub = self::substr($str, 0, $pos_end, $encoding);
1727
                }
1728 1
                if ($str_sub !== false) {
1729 1
                    $extract = \rtrim($str_sub, $trim_chars) . $replacer_for_skipped_text;
1730
                } else {
1731 1
                    $extract = '';
1732
                }
1733
            } else {
1734 1
                $extract = $str;
1735
            }
1736
        }
1737
1738 1
        return $extract;
1739
    }
1740
1741
    /**
1742
     * Reads entire file into a string.
1743
     *
1744
     * EXAMPLE: <code>UTF8::file_get_contents('utf16le.txt'); // ...</code>
1745
     *
1746
     * WARNING: Do not use UTF-8 Option ($convert_to_utf8) for binary files (e.g.: images) !!!
1747
     *
1748
     * @see http://php.net/manual/en/function.file-get-contents.php
1749
     *
1750
     * @param string        $filename         <p>
1751
     *                                        Name of the file to read.
1752
     *                                        </p>
1753
     * @param bool          $use_include_path [optional] <p>
1754
     *                                        Prior to PHP 5, this parameter is called
1755
     *                                        use_include_path and is a bool.
1756
     *                                        As of PHP 5 the FILE_USE_INCLUDE_PATH can be used
1757
     *                                        to trigger include path
1758
     *                                        search.
1759
     *                                        </p>
1760
     * @param resource|null $context          [optional] <p>
1761
     *                                        A valid context resource created with
1762
     *                                        stream_context_create. If you don't need to use a
1763
     *                                        custom context, you can skip this parameter by &null;.
1764
     *                                        </p>
1765
     * @param int|null      $offset           [optional] <p>
1766
     *                                        The offset where the reading starts.
1767
     *                                        </p>
1768
     * @param int|null      $max_length       [optional] <p>
1769
     *                                        Maximum length of data read. The default is to read until end
1770
     *                                        of file is reached.
1771
     *                                        </p>
1772
     * @param int           $timeout          <p>The time in seconds for the timeout.</p>
1773
     * @param bool          $convert_to_utf8  <strong>WARNING!!!</strong> <p>Maybe you can't use this option for
1774
     *                                        some files, because they used non default utf-8 chars. Binary files
1775
     *                                        like images or pdf will not be converted.</p>
1776
     * @param string        $from_encoding    [optional] <p>e.g. 'UTF-16', 'UTF-8', 'ISO-8859-1', etc.<br>
1777
     *                                        A empty string will trigger the autodetect anyway.</p>
1778
     *
1779
     * @psalm-pure
1780
     *
1781
     * @return false|string
1782
     *                      <p>The function returns the read data as string or <b>false</b> on failure.</p>
1783
     */
1784 12
    public static function file_get_contents(
1785
        string $filename,
1786
        bool $use_include_path = false,
1787
        $context = null,
1788
        int $offset = null,
1789
        int $max_length = null,
1790
        int $timeout = 10,
1791
        bool $convert_to_utf8 = true,
1792
        string $from_encoding = ''
1793
    ) {
1794
        // init
1795
        /** @noinspection CallableParameterUseCaseInTypeContextInspection - is ok here */
1796 12
        $filename = Bootup::filter_sanitize_string_polyfill($filename);
1797 12
        if ($filename === false) {
1798
            return false;
1799
        }
1800
1801 12
        if ($timeout && $context === null) {
1802 9
            $context = \stream_context_create(
1803
                [
1804
                    'http' => [
1805 9
                        'timeout' => $timeout,
1806
                    ],
1807
                ]
1808
            );
1809
        }
1810
1811 12
        if ($offset === null) {
1812 12
            $offset = 0;
1813
        }
1814
1815 12
        if (\is_int($max_length)) {
1816 2
            if ($max_length < 0) {
1817
                $max_length = 0;
1818
            }
1819
1820 2
            $data = \file_get_contents($filename, $use_include_path, $context, $offset, $max_length);
1821
        } else {
1822 12
            $data = \file_get_contents($filename, $use_include_path, $context, $offset);
1823
        }
1824
1825
        // return false on error
1826 12
        if ($data === false) {
1827
            return false;
1828
        }
1829
1830 12
        if ($convert_to_utf8) {
1831
            if (
1832 12
                !self::is_binary($data, true)
1833
                ||
1834 9
                self::is_utf16($data, false) !== false
1835
                ||
1836 12
                self::is_utf32($data, false) !== false
1837
            ) {
1838 9
                $data = self::encode('UTF-8', $data, false, $from_encoding);
1839 9
                $data = self::cleanup($data);
1840
            }
1841
        }
1842
1843 12
        return $data;
1844
    }
1845
1846
    /**
1847
     * Checks if a file starts with BOM (Byte Order Mark) character.
1848
     *
1849
     * EXAMPLE: <code>UTF8::file_has_bom('utf8_with_bom.txt'); // true</code>
1850
     *
1851
     * @param string $file_path <p>Path to a valid file.</p>
1852
     *
1853
     * @throws \RuntimeException if file_get_contents() returned false
1854
     *
1855
     * @return bool
1856
     *              <p><strong>true</strong> if the file has BOM at the start, <strong>false</strong> otherwise</p>
1857
     *
1858
     * @psalm-pure
1859
     */
1860 2
    public static function file_has_bom(string $file_path): bool
1861
    {
1862 2
        $file_content = \file_get_contents($file_path);
1863 2
        if ($file_content === false) {
1864
            throw new \RuntimeException('file_get_contents() returned false for:' . $file_path);
1865
        }
1866
1867 2
        return self::string_has_bom($file_content);
1868
    }
1869
1870
    /**
1871
     * Normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1872
     *
1873
     * EXAMPLE: <code>UTF8::filter(array("\xE9", 'à', 'a')); // array('é', 'à', 'a')</code>
1874
     *
1875
     * @param array|object|string $var
1876
     * @param int                 $normalization_form
1877
     * @param string              $leading_combining
1878
     *
1879
     * @psalm-pure
1880
     *
1881
     * @return mixed
1882
     *
1883
     * @template TFilter
1884
     * @phpstan-param TFilter $var
1885
     * @phpstan-return TFilter
1886
     */
1887 64
    public static function filter(
1888
        $var,
1889
        int $normalization_form = \Normalizer::NFC,
1890
        string $leading_combining = '◌'
1891
    ) {
1892 64
        switch (\gettype($var)) {
1893 64
            case 'object':
1894 64
            case 'array':
1895 6
                foreach ($var as &$v) {
1896 6
                    $v = self::filter($v, $normalization_form, $leading_combining);
1897
                }
1898 6
                unset($v);
1899
1900 6
                break;
1901 64
            case 'string':
1902
1903 62
                if (\strpos($var, "\r") !== false) {
1904 2
                    $var = self::normalize_line_ending($var);
1905
                }
1906
1907 62
                if (!ASCII::is_ascii($var)) {
1908 32
                    if (\Normalizer::isNormalized($var, $normalization_form)) {
1909 26
                        $n = '-';
1910
                    } else {
1911 12
                        $n = \Normalizer::normalize($var, $normalization_form);
1912
1913 12
                        if ($n && isset($n[0])) {
1914 6
                            $var = $n;
1915
                        } else {
1916 8
                            $var = self::encode('UTF-8', $var);
1917
                        }
1918
                    }
1919
1920
                    \assert(\is_string($var));
1921
                    if (
1922 32
                        $n
1923
                        &&
1924 32
                        $var[0] >= "\x80"
1925
                        &&
1926 32
                        isset($n[0], $leading_combining[0])
1927
                        &&
1928 32
                        \preg_match('/^\\p{Mn}/u', $var)
1929
                    ) {
1930
                        // Prevent leading combining chars
1931
                        // for NFC-safe concatenations.
1932 2
                        $var = $leading_combining . $var;
1933
                    }
1934
                }
1935
1936 62
                break;
1937
            default:
1938
                // nothing
1939
        }
1940
1941
        /** @noinspection PhpSillyAssignmentInspection */
1942
        /** @phpstan-var TFilter $var */
1943 64
        $var = $var;
1944
1945 64
        return $var;
1946
    }
1947
1948
    /**
1949
     * "filter_input()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1950
     *
1951
     * Gets a specific external variable by name and optionally filters it.
1952
     *
1953
     * EXAMPLE: <code>
1954
     * // _GET['foo'] = 'bar';
1955
     * UTF8::filter_input(INPUT_GET, 'foo', FILTER_UNSAFE_RAW)); // 'bar'
1956
     * </code>
1957
     *
1958
     * @see http://php.net/manual/en/function.filter-input.php
1959
     *
1960
     * @param int            $type          <p>
1961
     *                                      One of <b>INPUT_GET</b>, <b>INPUT_POST</b>,
1962
     *                                      <b>INPUT_COOKIE</b>, <b>INPUT_SERVER</b>, or
1963
     *                                      <b>INPUT_ENV</b>.
1964
     *                                      </p>
1965
     * @param string         $variable_name <p>
1966
     *                                      Name of a variable to get.
1967
     *                                      </p>
1968
     * @param int            $filter        [optional] <p>
1969
     *                                      The ID of the filter to apply. The
1970
     *                                      manual page lists the available filters.
1971
     *                                      </p>
1972
     * @param int|int[]|null $options       [optional] <p>
1973
     *                                      Associative array of options or bitwise disjunction of flags. If filter
1974
     *                                      accepts options, flags can be provided in "flags" field of array.
1975
     *                                      </p>
1976
     *
1977
     * @psalm-pure
1978
     *
1979
     * @return mixed
1980
     *               <p>
1981
     *               Value of the requested variable on success, <b>FALSE</b> if the filter fails, or <b>NULL</b> if the
1982
     *               <i>variable_name</i> variable is not set. If the flag <b>FILTER_NULL_ON_FAILURE</b> is used, it
1983
     *               returns <b>FALSE</b> if the variable is not set and <b>NULL</b> if the filter fails.
1984
     *               </p>
1985
     */
1986 1
    public static function filter_input(
1987
        int $type,
1988
        string $variable_name,
1989
        int $filter = \FILTER_DEFAULT,
1990
        $options = null
1991
    ) {
1992
        /**
1993
         * @psalm-suppress ImpureFunctionCall - we use func_num_args only for args count matching here
1994
         */
1995 1
        if ($options === null || \func_num_args() < 4) {
1996 1
            $var = \filter_input($type, $variable_name, $filter);
1997
        } else {
1998
            $var = \filter_input($type, $variable_name, $filter, $options);
1999
        }
2000
2001 1
        return self::filter($var);
2002
    }
2003
2004
    /**
2005
     * "filter_input_array()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
2006
     *
2007
     * Gets external variables and optionally filters them.
2008
     *
2009
     * EXAMPLE: <code>
2010
     * // _GET['foo'] = 'bar';
2011
     * UTF8::filter_input_array(INPUT_GET, array('foo' => 'FILTER_UNSAFE_RAW')); // array('bar')
2012
     * </code>
2013
     *
2014
     * @see http://php.net/manual/en/function.filter-input-array.php
2015
     *
2016
     * @param int        $type       <p>
2017
     *                               One of <b>INPUT_GET</b>, <b>INPUT_POST</b>,
2018
     *                               <b>INPUT_COOKIE</b>, <b>INPUT_SERVER</b>, or
2019
     *                               <b>INPUT_ENV</b>.
2020
     *                               </p>
2021
     * @param array|null $definition [optional] <p>
2022
     *                               An array defining the arguments. A valid key is a string
2023
     *                               containing a variable name and a valid value is either a filter type, or an array
2024
     *                               optionally specifying the filter, flags and options. If the value is an
2025
     *                               array, valid keys are filter which specifies the
2026
     *                               filter type,
2027
     *                               flags which specifies any flags that apply to the
2028
     *                               filter, and options which specifies any options that
2029
     *                               apply to the filter. See the example below for a better understanding.
2030
     *                               </p>
2031
     *                               <p>
2032
     *                               This parameter can be also an integer holding a filter constant. Then all values in the
2033
     *                               input array are filtered by this filter.
2034
     *                               </p>
2035
     * @param bool       $add_empty  [optional] <p>
2036
     *                               Add missing keys as <b>NULL</b> to the return value.
2037
     *                               </p>
2038
     *
2039
     * @psalm-pure
2040
     *
2041
     * @return mixed
2042
     *               <p>
2043
     *               An array containing the values of the requested variables on success, or <b>FALSE</b> on failure.
2044
     *               An array value will be <b>FALSE</b> if the filter fails, or <b>NULL</b> if the variable is not
2045
     *               set. Or if the flag <b>FILTER_NULL_ON_FAILURE</b> is used, it returns <b>FALSE</b> if the variable
2046
     *               is not set and <b>NULL</b> if the filter fails.
2047
     *               </p>
2048
     */
2049 1
    public static function filter_input_array(
2050
        int $type,
2051
        $definition = null,
2052
        bool $add_empty = true
2053
    ) {
2054
        /**
2055
         * @psalm-suppress ImpureFunctionCall - we use func_num_args only for args count matching here
2056
         */
2057 1
        if ($definition === null || \func_num_args() < 2) {
2058
            $a = \filter_input_array($type);
2059
        } else {
2060 1
            $a = \filter_input_array($type, $definition, $add_empty);
2061
        }
2062
2063 1
        return self::filter($a);
2064
    }
2065
2066
    /**
2067
     * "filter_var()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
2068
     *
2069
     * Filters a variable with a specified filter.
2070
     *
2071
     * EXAMPLE: <code>UTF8::filter_var('-ABC-中文空白-', FILTER_VALIDATE_URL); // false</code>
2072
     *
2073
     * @see http://php.net/manual/en/function.filter-var.php
2074
     *
2075
     * @param float|int|string|null $variable <p>
2076
     *                                        Value to filter.
2077
     *                                        </p>
2078
     * @param int                   $filter   [optional] <p>
2079
     *                                        The ID of the filter to apply. The
2080
     *                                        manual page lists the available filters.
2081
     *                                        </p>
2082
     * @param int|int[]|null        $options  [optional] <p>
2083
     *                                        Associative array of options or bitwise disjunction of flags. If filter
2084
     *                                        accepts options, flags can be provided in "flags" field of array. For
2085
     *                                        the "callback" filter, callable type should be passed. The
2086
     *                                        callback must accept one argument, the value to be filtered, and return
2087
     *                                        the value after filtering/sanitizing it.
2088
     *                                        </p>
2089
     *                                        <p>
2090
     *                                        <code>
2091
     *                                        // for filters that accept options, use this format
2092
     *                                        $options = array(
2093
     *                                        'options' => array(
2094
     *                                        'default' => 3, // value to return if the filter fails
2095
     *                                        // other options here
2096
     *                                        'min_range' => 0
2097
     *                                        ),
2098
     *                                        'flags' => FILTER_FLAG_ALLOW_OCTAL,
2099
     *                                        );
2100
     *                                        $var = filter_var('0755', FILTER_VALIDATE_INT, $options);
2101
     *                                        // for filter that only accept flags, you can pass them directly
2102
     *                                        $var = filter_var('oops', FILTER_VALIDATE_BOOLEAN, FILTER_NULL_ON_FAILURE);
2103
     *                                        // for filter that only accept flags, you can also pass as an array
2104
     *                                        $var = filter_var('oops', FILTER_VALIDATE_BOOLEAN,
2105
     *                                        array('flags' => FILTER_NULL_ON_FAILURE));
2106
     *                                        // callback validate filter
2107
     *                                        function foo($value)
2108
     *                                        {
2109
     *                                        // Expected format: Surname, GivenNames
2110
     *                                        if (strpos($value, ", ") === false) return false;
2111
     *                                        list($surname, $givennames) = explode(", ", $value, 2);
2112
     *                                        $empty = (empty($surname) || empty($givennames));
2113
     *                                        $notstrings = (!is_string($surname) || !is_string($givennames));
2114
     *                                        if ($empty || $notstrings) {
2115
     *                                        return false;
2116
     *                                        } else {
2117
     *                                        return $value;
2118
     *                                        }
2119
     *                                        }
2120
     *                                        $var = filter_var('Doe, Jane Sue', FILTER_CALLBACK, array('options' => 'foo'));
2121
     *                                        </code>
2122
     *                                        </p>
2123
     *
2124
     * @psalm-pure
2125
     *
2126
     * @return mixed
2127
     *               <p>The filtered data, or <b>FALSE</b> if the filter fails.</p>
2128
     */
2129 2
    public static function filter_var(
2130
        $variable,
2131
        int $filter = \FILTER_DEFAULT,
2132
        $options = null
2133
    ) {
2134
        /**
2135
         * @psalm-suppress ImpureFunctionCall - we use func_num_args only for args count matching here
2136
         */
2137 2
        if (\func_num_args() < 3) {
2138 2
            $variable = \filter_var($variable, $filter);
2139
        } else {
2140 2
            $variable = \filter_var($variable, $filter, $options);
0 ignored issues
show
Bug introduced by
It seems like $options can also be of type null; however, parameter $options of filter_var() does only seem to accept array|integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

2140
            $variable = \filter_var($variable, $filter, /** @scrutinizer ignore-type */ $options);
Loading history...
2141
        }
2142
2143 2
        return self::filter($variable);
2144
    }
2145
2146
    /**
2147
     * "filter_var_array()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
2148
     *
2149
     * Gets multiple variables and optionally filters them.
2150
     *
2151
     * EXAMPLE: <code>
2152
     * $filters = [
2153
     *     'name'  => ['filter'  => FILTER_CALLBACK, 'options' => [UTF8::class, 'ucwords']],
2154
     *     'age'   => ['filter'  => FILTER_VALIDATE_INT, 'options' => ['min_range' => 1, 'max_range' => 120]],
2155
     *     'email' => FILTER_VALIDATE_EMAIL,
2156
     * ];
2157
     *
2158
     * $data = [
2159
     *     'name' => 'κόσμε',
2160
     *     'age' => '18',
2161
     *     'email' => '[email protected]'
2162
     * ];
2163
     *
2164
     * UTF8::filter_var_array($data, $filters, true); // ['name' => 'Κόσμε', 'age' => 18, 'email' => '[email protected]']
2165
     * </code>
2166
     *
2167
     * @see http://php.net/manual/en/function.filter-var-array.php
2168
     *
2169
     * @param array<mixed>   $data       <p>
2170
     *                                   An array with string keys containing the data to filter.
2171
     *                                   </p>
2172
     * @param array|int|null $definition [optional] <p>
2173
     *                                   An array defining the arguments. A valid key is a string
2174
     *                                   containing a variable name and a valid value is either a
2175
     *                                   filter type, or an
2176
     *                                   array optionally specifying the filter, flags and options.
2177
     *                                   If the value is an array, valid keys are filter
2178
     *                                   which specifies the filter type,
2179
     *                                   flags which specifies any flags that apply to the
2180
     *                                   filter, and options which specifies any options that
2181
     *                                   apply to the filter. See the example below for a better understanding.
2182
     *                                   </p>
2183
     *                                   <p>
2184
     *                                   This parameter can be also an integer holding a filter constant. Then all values
2185
     *                                   in the input array are filtered by this filter.
2186
     *                                   </p>
2187
     * @param bool           $add_empty  [optional] <p>
2188
     *                                   Add missing keys as <b>NULL</b> to the return value.
2189
     *                                   </p>
2190
     *
2191
     * @psalm-pure
2192
     *
2193
     * @return mixed
2194
     *               <p>
2195
     *               An array containing the values of the requested variables on success, or <b>FALSE</b> on failure.
2196
     *               An array value will be <b>FALSE</b> if the filter fails, or <b>NULL</b> if the variable is not
2197
     *               set.
2198
     *               </p>
2199
     */
2200 2
    public static function filter_var_array(
2201
        array $data,
2202
        $definition = null,
2203
        bool $add_empty = true
2204
    ) {
2205
        /**
2206
         * @psalm-suppress ImpureFunctionCall - we use func_num_args only for args count matching here
2207
         */
2208 2
        if (\func_num_args() < 2) {
2209 2
            $a = \filter_var_array($data);
2210
        } else {
2211 2
            $a = \filter_var_array($data, $definition, $add_empty);
0 ignored issues
show
Bug introduced by
It seems like $definition can also be of type null; however, parameter $options of filter_var_array() does only seem to accept array|integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

2211
            $a = \filter_var_array($data, /** @scrutinizer ignore-type */ $definition, $add_empty);
Loading history...
2212
        }
2213
2214 2
        return self::filter($a);
2215
    }
2216
2217
    /**
2218
     * Checks whether finfo is available on the server.
2219
     *
2220
     * @psalm-pure
2221
     *
2222
     * @return bool
2223
     *              <p><strong>true</strong> if available, <strong>false</strong> otherwise</p>
2224
     *
2225
     * @internal <p>Please do not use it anymore, we will make is private in next major version.</p>
2226
     */
2227
    public static function finfo_loaded(): bool
2228
    {
2229
        return \class_exists('finfo');
2230
    }
2231
2232
    /**
2233
     * Returns the first $n characters of the string.
2234
     *
2235
     * @param string $str      <p>The input string.</p>
2236
     * @param int    $n        <p>Number of characters to retrieve from the start.</p>
2237
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
2238
     *
2239
     * @psalm-pure
2240
     *
2241
     * @return string
2242
     */
2243 13
    public static function first_char(
2244
        string $str,
2245
        int $n = 1,
2246
        string $encoding = 'UTF-8'
2247
    ): string {
2248 13
        if ($str === '' || $n <= 0) {
2249 5
            return '';
2250
        }
2251
2252 8
        if ($encoding === 'UTF-8') {
2253 4
            return (string) \mb_substr($str, 0, $n);
2254
        }
2255
2256 4
        return (string) self::substr($str, 0, $n, $encoding);
2257
    }
2258
2259
    /**
2260
     * Check if the number of Unicode characters isn't greater than the specified integer.
2261
     *
2262
     * EXAMPLE: <code>UTF8::fits_inside('κόσμε', 6); // false</code>
2263
     *
2264
     * @param string $str      the original string to be checked
2265
     * @param int    $box_size the size in number of chars to be checked against string
2266
     *
2267
     * @psalm-pure
2268
     *
2269
     * @return bool
2270
     *              <p><strong>TRUE</strong> if string is less than or equal to $box_size, <strong>FALSE</strong> otherwise.</p>
2271
     */
2272 2
    public static function fits_inside(string $str, int $box_size): bool
2273
    {
2274 2
        return (int) self::strlen($str) <= $box_size;
2275
    }
2276
2277
    /**
2278
     * Try to fix simple broken UTF-8 strings.
2279
     *
2280
     * INFO: Take a look at "UTF8::fix_utf8()" if you need a more advanced fix for broken UTF-8 strings.
2281
     *
2282
     * EXAMPLE: <code>UTF8::fix_simple_utf8('Düsseldorf'); // 'Düsseldorf'</code>
2283
     *
2284
     * If you received an UTF-8 string that was converted from Windows-1252 as it was ISO-8859-1
2285
     * (ignoring Windows-1252 chars from 80 to 9F) use this function to fix it.
2286
     * See: http://en.wikipedia.org/wiki/Windows-1252
2287
     *
2288
     * @param string $str <p>The input string</p>
2289
     *
2290
     * @psalm-pure
2291
     *
2292
     * @return string
2293
     */
2294 46
    public static function fix_simple_utf8(string $str): string
2295
    {
2296 46
        if ($str === '') {
2297 4
            return '';
2298
        }
2299
2300
        /**
2301
         * @psalm-suppress ImpureStaticVariable
2302
         *
2303
         * @var array<mixed>|null
2304
         */
2305 46
        static $BROKEN_UTF8_TO_UTF8_KEYS_CACHE = null;
2306
2307
        /**
2308
         * @psalm-suppress ImpureStaticVariable
2309
         *
2310
         * @var array<mixed>|null
2311
         */
2312 46
        static $BROKEN_UTF8_TO_UTF8_VALUES_CACHE = null;
2313
2314 46
        if ($BROKEN_UTF8_TO_UTF8_KEYS_CACHE === null) {
2315 1
            if (self::$BROKEN_UTF8_FIX === null) {
2316 1
                self::$BROKEN_UTF8_FIX = self::getData('utf8_fix');
2317
            }
2318
2319 1
            $BROKEN_UTF8_TO_UTF8_KEYS_CACHE = \array_keys(self::$BROKEN_UTF8_FIX ?: []);
2320 1
            $BROKEN_UTF8_TO_UTF8_VALUES_CACHE = self::$BROKEN_UTF8_FIX;
2321
        }
2322
2323
        \assert(\is_array($BROKEN_UTF8_TO_UTF8_VALUES_CACHE));
2324
2325 46
        return \str_replace($BROKEN_UTF8_TO_UTF8_KEYS_CACHE, $BROKEN_UTF8_TO_UTF8_VALUES_CACHE, $str);
2326
    }
2327
2328
    /**
2329
     * Fix a double (or multiple) encoded UTF8 string.
2330
     *
2331
     * EXAMPLE: <code>UTF8::fix_utf8('Fédération'); // 'Fédération'</code>
2332
     *
2333
     * @param string|string[] $str you can use a string or an array of strings
2334
     *
2335
     * @psalm-pure
2336
     *
2337
     * @return string|string[]
2338
     *                         <p>Will return the fixed input-"array" or
2339
     *                         the fixed input-"string".</p>
2340
     *
2341
     * @template TFixUtf8
2342
     * @phpstan-param TFixUtf8 $str
2343
     * @phpstan-return TFixUtf8
2344
     */
2345 2
    public static function fix_utf8($str)
2346
    {
2347 2
        if (\is_array($str)) {
2348 2
            foreach ($str as &$v) {
2349 2
                $v = self::fix_utf8($v);
2350
            }
2351 2
            unset($v);
2352
2353
            /**
2354
             * @psalm-suppress InvalidReturnStatement
2355
             */
2356 2
            return $str;
2357
        }
2358
2359 2
        $str = (string) $str;
2360 2
        $last = '';
2361 2
        while ($last !== $str) {
2362 2
            $last = $str;
2363
            /**
2364
             * @psalm-suppress PossiblyInvalidArgument
2365
             */
2366 2
            $str = self::to_utf8(
2367 2
                self::utf8_decode($str, true)
2368
            );
2369
        }
2370
2371
        /**
2372
         * @psalm-suppress InvalidReturnStatement
2373
         */
2374 2
        return $str;
2375
    }
2376
2377
    /**
2378
     * Get character of a specific character.
2379
     *
2380
     * EXAMPLE: <code>UTF8::getCharDirection('ا'); // 'RTL'</code>
2381
     *
2382
     * @param string $char
2383
     *
2384
     * @psalm-pure
2385
     *
2386
     * @return string
2387
     *                <p>'RTL' or 'LTR'.</p>
2388
     */
2389 2
    public static function getCharDirection(string $char): string
2390
    {
2391 2
        if (self::$SUPPORT['intlChar'] === true) {
2392 2
            $tmp_return = \IntlChar::charDirection($char);
2393
2394
            // from "IntlChar"-Class
2395
            $char_direction = [
2396 2
                'RTL' => [1, 13, 14, 15, 21],
2397
                'LTR' => [0, 11, 12, 20],
2398
            ];
2399
2400 2
            if (\in_array($tmp_return, $char_direction['LTR'], true)) {
2401
                return 'LTR';
2402
            }
2403
2404 2
            if (\in_array($tmp_return, $char_direction['RTL'], true)) {
2405 2
                return 'RTL';
2406
            }
2407
        }
2408
2409 2
        $c = static::chr_to_decimal($char);
2410
2411 2
        if (!($c >= 0x5be && $c <= 0x10b7f)) {
2412 2
            return 'LTR';
2413
        }
2414
2415 2
        if ($c <= 0x85e) {
2416 2
            if ($c === 0x5be ||
2417 2
                $c === 0x5c0 ||
2418 2
                $c === 0x5c3 ||
2419 2
                $c === 0x5c6 ||
2420 2
                ($c >= 0x5d0 && $c <= 0x5ea) ||
2421 2
                ($c >= 0x5f0 && $c <= 0x5f4) ||
2422 2
                $c === 0x608 ||
2423 2
                $c === 0x60b ||
2424 2
                $c === 0x60d ||
2425 2
                $c === 0x61b ||
2426 2
                ($c >= 0x61e && $c <= 0x64a) ||
2427
                ($c >= 0x66d && $c <= 0x66f) ||
2428
                ($c >= 0x671 && $c <= 0x6d5) ||
2429
                ($c >= 0x6e5 && $c <= 0x6e6) ||
2430
                ($c >= 0x6ee && $c <= 0x6ef) ||
2431
                ($c >= 0x6fa && $c <= 0x70d) ||
2432
                $c === 0x710 ||
2433
                ($c >= 0x712 && $c <= 0x72f) ||
2434
                ($c >= 0x74d && $c <= 0x7a5) ||
2435
                $c === 0x7b1 ||
2436
                ($c >= 0x7c0 && $c <= 0x7ea) ||
2437
                ($c >= 0x7f4 && $c <= 0x7f5) ||
2438
                $c === 0x7fa ||
2439
                ($c >= 0x800 && $c <= 0x815) ||
2440
                $c === 0x81a ||
2441
                $c === 0x824 ||
2442
                $c === 0x828 ||
2443
                ($c >= 0x830 && $c <= 0x83e) ||
2444
                ($c >= 0x840 && $c <= 0x858) ||
2445 2
                $c === 0x85e
2446
            ) {
2447 2
                return 'RTL';
2448
            }
2449 2
        } elseif ($c === 0x200f) {
2450
            return 'RTL';
2451 2
        } elseif ($c >= 0xfb1d) {
2452 2
            if ($c === 0xfb1d ||
2453 2
                ($c >= 0xfb1f && $c <= 0xfb28) ||
2454 2
                ($c >= 0xfb2a && $c <= 0xfb36) ||
2455 2
                ($c >= 0xfb38 && $c <= 0xfb3c) ||
2456 2
                $c === 0xfb3e ||
2457 2
                ($c >= 0xfb40 && $c <= 0xfb41) ||
2458 2
                ($c >= 0xfb43 && $c <= 0xfb44) ||
2459 2
                ($c >= 0xfb46 && $c <= 0xfbc1) ||
2460 2
                ($c >= 0xfbd3 && $c <= 0xfd3d) ||
2461 2
                ($c >= 0xfd50 && $c <= 0xfd8f) ||
2462 2
                ($c >= 0xfd92 && $c <= 0xfdc7) ||
2463 2
                ($c >= 0xfdf0 && $c <= 0xfdfc) ||
2464 2
                ($c >= 0xfe70 && $c <= 0xfe74) ||
2465 2
                ($c >= 0xfe76 && $c <= 0xfefc) ||
2466 2
                ($c >= 0x10800 && $c <= 0x10805) ||
2467 2
                $c === 0x10808 ||
2468 2
                ($c >= 0x1080a && $c <= 0x10835) ||
2469 2
                ($c >= 0x10837 && $c <= 0x10838) ||
2470 2
                $c === 0x1083c ||
2471 2
                ($c >= 0x1083f && $c <= 0x10855) ||
2472 2
                ($c >= 0x10857 && $c <= 0x1085f) ||
2473 2
                ($c >= 0x10900 && $c <= 0x1091b) ||
2474 2
                ($c >= 0x10920 && $c <= 0x10939) ||
2475 2
                $c === 0x1093f ||
2476 2
                $c === 0x10a00 ||
2477 2
                ($c >= 0x10a10 && $c <= 0x10a13) ||
2478 2
                ($c >= 0x10a15 && $c <= 0x10a17) ||
2479 2
                ($c >= 0x10a19 && $c <= 0x10a33) ||
2480 2
                ($c >= 0x10a40 && $c <= 0x10a47) ||
2481 2
                ($c >= 0x10a50 && $c <= 0x10a58) ||
2482 2
                ($c >= 0x10a60 && $c <= 0x10a7f) ||
2483 2
                ($c >= 0x10b00 && $c <= 0x10b35) ||
2484 2
                ($c >= 0x10b40 && $c <= 0x10b55) ||
2485 2
                ($c >= 0x10b58 && $c <= 0x10b72) ||
2486 2
                ($c >= 0x10b78)
2487
            ) {
2488 2
                return 'RTL';
2489
            }
2490
        }
2491
2492 2
        return 'LTR';
2493
    }
2494
2495
    /**
2496
     * Check for php-support.
2497
     *
2498
     * @param string|null $key
2499
     *
2500
     * @psalm-pure
2501
     *
2502
     * @return mixed
2503
     *               Return the full support-"array", if $key === null<br>
2504
     *               return bool-value, if $key is used and available<br>
2505
     *               otherwise return <strong>null</strong>
2506
     */
2507 27
    public static function getSupportInfo(string $key = null)
2508
    {
2509 27
        if ($key === null) {
2510 4
            return self::$SUPPORT;
2511
        }
2512
2513 25
        if (self::$INTL_TRANSLITERATOR_LIST === null) {
2514 1
            self::$INTL_TRANSLITERATOR_LIST = self::getData('transliterator_list');
2515
        }
2516
        // compatibility fix for old versions
2517 25
        self::$SUPPORT['intl__transliterator_list_ids'] = self::$INTL_TRANSLITERATOR_LIST;
2518
2519 25
        return self::$SUPPORT[$key] ?? null;
2520
    }
2521
2522
    /**
2523
     * Warning: this method only works for some file-types (png, jpg)
2524
     *          if you need more supported types, please use e.g. "finfo"
2525
     *
2526
     * @param string $str
2527
     * @param array  $fallback <p>with this keys: 'ext', 'mime', 'type'
2528
     *
2529
     * @psalm-pure
2530
     *
2531
     * @return null[]|string[]
2532
     *                         <p>with this keys: 'ext', 'mime', 'type'</p>
2533
     *
2534
     * @phpstan-param array{ext: null|string, mime: null|string, type: null|string} $fallback
2535
     */
2536 39
    public static function get_file_type(
2537
        string $str,
2538
        array $fallback = [
2539
            'ext'  => null,
2540
            'mime' => 'application/octet-stream',
2541
            'type' => null,
2542
        ]
2543
    ): array {
2544 39
        if ($str === '') {
2545
            return $fallback;
2546
        }
2547
2548
        /** @var false|string $str_info - needed for PhpStan (stubs error) */
2549 39
        $str_info = \substr($str, 0, 2);
2550 39
        if ($str_info === false || \strlen($str_info) !== 2) {
2551 10
            return $fallback;
2552
        }
2553
2554
        // DEBUG
2555
        //var_dump($str_info);
2556
2557 36
        $str_info = \unpack('C2chars', $str_info);
2558
2559 36
        if ($str_info === false) {
2560
            return $fallback;
2561
        }
2562 36
        $type_code = (int) ($str_info['chars1'] . $str_info['chars2']);
2563
2564
        // DEBUG
2565
        //var_dump($type_code);
2566
2567
        //
2568
        // info: https://en.wikipedia.org/wiki/Magic_number_%28programming%29#Format_indicator
2569
        //
2570
        switch ($type_code) {
2571
            // WARNING: do not add too simple comparisons, because of false-positive results:
2572
            //
2573
            // 3780 => 'pdf', 7790 => 'exe', 7784 => 'midi', 8075 => 'zip',
2574
            // 8297 => 'rar', 7173 => 'gif', 7373 => 'tiff' 6677 => 'bmp', ...
2575
            //
2576 36
            case 255216:
2577
                $ext = 'jpg';
2578
                $mime = 'image/jpeg';
2579
                $type = 'binary';
2580
2581
                break;
2582 36
            case 13780:
2583 7
                $ext = 'png';
2584 7
                $mime = 'image/png';
2585 7
                $type = 'binary';
2586
2587 7
                break;
2588
            default:
2589 35
                return $fallback;
2590
        }
2591
2592
        return [
2593 7
            'ext'  => $ext,
2594 7
            'mime' => $mime,
2595 7
            'type' => $type,
2596
        ];
2597
    }
2598
2599
    /**
2600
     * @param int    $length         <p>Length of the random string.</p>
2601
     * @param string $possible_chars [optional] <p>Characters string for the random selection.</p>
2602
     * @param string $encoding       [optional] <p>Set the charset for e.g. "mb_" function</p>
2603
     *
2604
     * @return string
2605
     */
2606 1
    public static function get_random_string(
2607
        int $length,
2608
        string $possible_chars = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789',
2609
        string $encoding = 'UTF-8'
2610
    ): string {
2611
        // init
2612 1
        $i = 0;
2613 1
        $str = '';
2614
2615
        //
2616
        // add random chars
2617
        //
2618
2619 1
        if ($encoding === 'UTF-8') {
2620 1
            $max_length = (int) \mb_strlen($possible_chars);
2621 1
            if ($max_length === 0) {
2622 1
                return '';
2623
            }
2624
2625 1
            while ($i < $length) {
2626
                try {
2627 1
                    $rand_int = \random_int(0, $max_length - 1);
2628
                } catch (\Exception $e) {
2629
                    $rand_int = \mt_rand(0, $max_length - 1);
2630
                }
2631 1
                $char = \mb_substr($possible_chars, $rand_int, 1);
2632 1
                if ($char !== false) {
2633 1
                    $str .= $char;
2634 1
                    ++$i;
2635
                }
2636
            }
2637
        } else {
2638
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
2639
2640
            $max_length = (int) self::strlen($possible_chars, $encoding);
2641
            if ($max_length === 0) {
2642
                return '';
2643
            }
2644
2645
            while ($i < $length) {
2646
                try {
2647
                    $rand_int = \random_int(0, $max_length - 1);
2648
                } catch (\Exception $e) {
2649
                    $rand_int = \mt_rand(0, $max_length - 1);
2650
                }
2651
                $char = self::substr($possible_chars, $rand_int, 1, $encoding);
2652
                if ($char !== false) {
2653
                    $str .= $char;
2654
                    ++$i;
2655
                }
2656
            }
2657
        }
2658
2659 1
        return $str;
2660
    }
2661
2662
    /**
2663
     * @param int|string $extra_entropy [optional] <p>Extra entropy via a string or int value.</p>
2664
     * @param bool       $use_md5       [optional] <p>Return the unique identifier as md5-hash? Default: true</p>
2665
     *
2666
     * @return string
2667
     */
2668 1
    public static function get_unique_string($extra_entropy = '', bool $use_md5 = true): string
2669
    {
2670
        try {
2671 1
            $rand_int = \random_int(0, \mt_getrandmax());
2672
        } catch (\Exception $e) {
2673
            $rand_int = \mt_rand(0, \mt_getrandmax());
2674
        }
2675
2676
        $unique_helper = $rand_int .
2677 1
                         \session_id() .
2678 1
                         ($_SERVER['REMOTE_ADDR'] ?? '') .
2679 1
                         ($_SERVER['SERVER_ADDR'] ?? '') .
2680 1
                         $extra_entropy;
2681
2682 1
        $unique_string = \uniqid($unique_helper, true);
2683
2684 1
        if ($use_md5) {
2685 1
            $unique_string = \md5($unique_string . $unique_helper);
2686
        }
2687
2688 1
        return $unique_string;
2689
    }
2690
2691
    /**
2692
     * Returns true if the string contains a lower case char, false otherwise.
2693
     *
2694
     * @param string $str <p>The input string.</p>
2695
     *
2696
     * @psalm-pure
2697
     *
2698
     * @return bool
2699
     *              <p>Whether or not the string contains a lower case character.</p>
2700
     */
2701 47
    public static function has_lowercase(string $str): bool
2702
    {
2703 47
        if (self::$SUPPORT['mbstring'] === true) {
2704 47
            return \mb_ereg_match('.*[[:lower:]]', $str);
2705
        }
2706
2707
        return self::str_matches_pattern($str, '.*[[:lower:]]');
2708
    }
2709
2710
    /**
2711
     * Returns true if the string contains whitespace, false otherwise.
2712
     *
2713
     * @param string $str <p>The input string.</p>
2714
     *
2715
     * @psalm-pure
2716
     *
2717
     * @return bool
2718
     *              <p>Whether or not the string contains whitespace.</p>
2719
     */
2720 11
    public static function has_whitespace(string $str): bool
2721
    {
2722 11
        if (self::$SUPPORT['mbstring'] === true) {
2723 11
            return \mb_ereg_match('.*[[:space:]]', $str);
2724
        }
2725
2726
        return self::str_matches_pattern($str, '.*[[:space:]]');
2727
    }
2728
2729
    /**
2730
     * Returns true if the string contains an upper case char, false otherwise.
2731
     *
2732
     * @param string $str <p>The input string.</p>
2733
     *
2734
     * @psalm-pure
2735
     *
2736
     * @return bool
2737
     *              <p>Whether or not the string contains an upper case character.</p>
2738
     */
2739 12
    public static function has_uppercase(string $str): bool
2740
    {
2741 12
        if (self::$SUPPORT['mbstring'] === true) {
2742 12
            return \mb_ereg_match('.*[[:upper:]]', $str);
2743
        }
2744
2745
        return self::str_matches_pattern($str, '.*[[:upper:]]');
2746
    }
2747
2748
    /**
2749
     * Converts a hexadecimal value into a UTF-8 character.
2750
     *
2751
     * INFO: opposite to UTF8::chr_to_hex()
2752
     *
2753
     * EXAMPLE: <code>UTF8::hex_to_chr('U+00a7'); // '§'</code>
2754
     *
2755
     * @param string $hexdec <p>The hexadecimal value.</p>
2756
     *
2757
     * @psalm-pure
2758
     *
2759
     * @return false|string one single UTF-8 character
2760
     */
2761 4
    public static function hex_to_chr(string $hexdec)
2762
    {
2763
        /** @noinspection PhpUsageOfSilenceOperatorInspection - Invalid characters passed for attempted conversion, these have been ignored */
2764 4
        return self::decimal_to_chr((int) @\hexdec($hexdec));
2765
    }
2766
2767
    /**
2768
     * Converts hexadecimal U+xxxx code point representation to integer.
2769
     *
2770
     * INFO: opposite to UTF8::int_to_hex()
2771
     *
2772
     * EXAMPLE: <code>UTF8::hex_to_int('U+00f1'); // 241</code>
2773
     *
2774
     * @param string $hexdec <p>The hexadecimal code point representation.</p>
2775
     *
2776
     * @psalm-pure
2777
     *
2778
     * @return false|int
2779
     *                   <p>The code point, or false on failure.</p>
2780
     */
2781 2
    public static function hex_to_int($hexdec)
2782
    {
2783
        // init
2784 2
        $hexdec = (string) $hexdec;
2785
2786 2
        if ($hexdec === '') {
2787 2
            return false;
2788
        }
2789
2790 2
        if (\preg_match('/^(?:\\\u|U\+|)([a-zA-Z0-9]{4,6})$/', $hexdec, $match)) {
2791 2
            return \intval($match[1], 16);
2792
        }
2793
2794 2
        return false;
2795
    }
2796
2797
    /**
2798
     * Converts a UTF-8 string to a series of HTML numbered entities.
2799
     *
2800
     * INFO: opposite to UTF8::html_decode()
2801
     *
2802
     * EXAMPLE: <code>UTF8::html_encode('中文空白'); // '&#20013;&#25991;&#31354;&#30333;'</code>
2803
     *
2804
     * @param string $str              <p>The Unicode string to be encoded as numbered entities.</p>
2805
     * @param bool   $keep_ascii_chars [optional] <p>Keep ASCII chars.</p>
2806
     * @param string $encoding         [optional] <p>Set the charset for e.g. "mb_" function</p>
2807
     *
2808
     * @psalm-pure
2809
     *
2810
     * @return string HTML numbered entities
2811
     */
2812 14
    public static function html_encode(
2813
        string $str,
2814
        bool $keep_ascii_chars = false,
2815
        string $encoding = 'UTF-8'
2816
    ): string {
2817 14
        if ($str === '') {
2818 4
            return '';
2819
        }
2820
2821 14
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
2822 4
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
2823
        }
2824
2825
        // INFO: http://stackoverflow.com/questions/35854535/better-explanation-of-convmap-in-mb-encode-numericentity
2826 14
        if (self::$SUPPORT['mbstring'] === true) {
2827 14
            if ($keep_ascii_chars) {
2828 13
                $start_code = 0x80;
2829
            } else {
2830 3
                $start_code = 0x00;
2831
            }
2832
2833 14
            if ($encoding === 'UTF-8') {
2834
                /** @var false|string|null $return - needed for PhpStan (stubs error) */
2835 14
                $return = \mb_encode_numericentity(
0 ignored issues
show
Bug introduced by
The call to mb_encode_numericentity() has too few arguments starting with encoding. ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-call  annotation

2835
                $return = /** @scrutinizer ignore-call */ \mb_encode_numericentity(

This check compares calls to functions or methods with their respective definitions. If the call has less arguments than are defined, it raises an issue.

If a function is defined several times with a different number of parameters, the check may pick up the wrong definition and report false positives. One codebase where this has been known to happen is Wordpress. Please note the @ignore annotation hint above.

Loading history...
2836 14
                    $str,
2837 14
                    [$start_code, 0xfffff, 0, 0xfffff]
2838
                );
2839 14
                if ($return !== null && $return !== false) {
2840 14
                    return $return;
2841
                }
2842
            }
2843
2844
            /** @var false|string|null $return - needed for PhpStan (stubs error) */
2845 4
            $return = \mb_encode_numericentity(
2846 4
                $str,
2847 4
                [$start_code, 0xfffff, 0, 0xfffff],
2848 4
                $encoding
2849
            );
2850 4
            if ($return !== null && $return !== false) {
2851 4
                return $return;
2852
            }
2853
        }
2854
2855
        //
2856
        // fallback via vanilla php
2857
        //
2858
2859
        return \implode(
2860
            '',
2861
            \array_map(
2862
                static function (string $chr) use ($keep_ascii_chars, $encoding): string {
2863
                    return self::single_chr_html_encode($chr, $keep_ascii_chars, $encoding);
2864
                },
2865
                self::str_split($str)
2866
            )
2867
        );
2868
    }
2869
2870
    /**
2871
     * UTF-8 version of html_entity_decode()
2872
     *
2873
     * The reason we are not using html_entity_decode() by itself is because
2874
     * while it is not technically correct to leave out the semicolon
2875
     * at the end of an entity most browsers will still interpret the entity
2876
     * correctly. html_entity_decode() does not convert entities without
2877
     * semicolons, so we are left with our own little solution here. Bummer.
2878
     *
2879
     * Convert all HTML entities to their applicable characters.
2880
     *
2881
     * INFO: opposite to UTF8::html_encode()
2882
     *
2883
     * EXAMPLE: <code>UTF8::html_entity_decode('&#20013;&#25991;&#31354;&#30333;'); // '中文空白'</code>
2884
     *
2885
     * @see http://php.net/manual/en/function.html-entity-decode.php
2886
     *
2887
     * @param string   $str      <p>
2888
     *                           The input string.
2889
     *                           </p>
2890
     * @param int|null $flags    [optional] <p>
2891
     *                           A bitmask of one or more of the following flags, which specify how to handle quotes
2892
     *                           and which document type to use. The default is ENT_COMPAT | ENT_HTML401.
2893
     *                           <table>
2894
     *                           Available <i>flags</i> constants
2895
     *                           <tr valign="top">
2896
     *                           <td>Constant Name</td>
2897
     *                           <td>Description</td>
2898
     *                           </tr>
2899
     *                           <tr valign="top">
2900
     *                           <td><b>ENT_COMPAT</b></td>
2901
     *                           <td>Will convert double-quotes and leave single-quotes alone.</td>
2902
     *                           </tr>
2903
     *                           <tr valign="top">
2904
     *                           <td><b>ENT_QUOTES</b></td>
2905
     *                           <td>Will convert both double and single quotes.</td>
2906
     *                           </tr>
2907
     *                           <tr valign="top">
2908
     *                           <td><b>ENT_NOQUOTES</b></td>
2909
     *                           <td>Will leave both double and single quotes unconverted.</td>
2910
     *                           </tr>
2911
     *                           <tr valign="top">
2912
     *                           <td><b>ENT_HTML401</b></td>
2913
     *                           <td>
2914
     *                           Handle code as HTML 4.01.
2915
     *                           </td>
2916
     *                           </tr>
2917
     *                           <tr valign="top">
2918
     *                           <td><b>ENT_XML1</b></td>
2919
     *                           <td>
2920
     *                           Handle code as XML 1.
2921
     *                           </td>
2922
     *                           </tr>
2923
     *                           <tr valign="top">
2924
     *                           <td><b>ENT_XHTML</b></td>
2925
     *                           <td>
2926
     *                           Handle code as XHTML.
2927
     *                           </td>
2928
     *                           </tr>
2929
     *                           <tr valign="top">
2930
     *                           <td><b>ENT_HTML5</b></td>
2931
     *                           <td>
2932
     *                           Handle code as HTML 5.
2933
     *                           </td>
2934
     *                           </tr>
2935
     *                           </table>
2936
     *                           </p>
2937
     * @param string   $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
2938
     *
2939
     * @psalm-pure
2940
     *
2941
     * @return string the decoded string
2942
     */
2943 34
    public static function html_entity_decode(
2944
        string $str,
2945
        int $flags = null,
2946
        string $encoding = 'UTF-8'
2947
    ): string {
2948
        if (
2949 34
            !isset($str[3]) // examples: &; || &x;
2950
            ||
2951 34
            \strpos($str, '&') === false // no "&"
2952
        ) {
2953 23
            return $str;
2954
        }
2955
2956 34
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
2957 9
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
2958
        }
2959
2960 34
        if ($flags === null) {
2961 11
            $flags = \ENT_QUOTES | \ENT_HTML5;
2962
        }
2963
2964
        if (
2965 34
            $encoding !== 'UTF-8'
2966
            &&
2967 34
            $encoding !== 'ISO-8859-1'
2968
            &&
2969 34
            $encoding !== 'WINDOWS-1252'
2970
            &&
2971 34
            self::$SUPPORT['mbstring'] === false
2972
        ) {
2973
            /**
2974
             * @psalm-suppress ImpureFunctionCall - is is only a warning
2975
             */
2976
            \trigger_error('UTF8::html_entity_decode() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
2977
        }
2978
2979
        do {
2980 34
            $str_compare = $str;
2981
2982 34
            if (\strpos($str, '&') !== false) {
2983 34
                if (\strpos($str, '&#') !== false) {
2984
                    // decode also numeric & UTF16 two byte entities
2985 25
                    $str = (string) \preg_replace(
2986 25
                        '/(&#(?:x0*[0-9a-fA-F]{2,6}(?![0-9a-fA-F;])|(?:0*\d{2,6}(?![0-9;]))))/S',
2987 25
                        '$1;',
2988 25
                        $str
2989
                    );
2990
                }
2991
2992 34
                $str = \html_entity_decode(
2993 34
                    $str,
2994 34
                    $flags,
2995 34
                    $encoding
2996
                );
2997
            }
2998 34
        } while ($str_compare !== $str);
2999
3000 34
        return $str;
3001
    }
3002
3003
    /**
3004
     * Create a escape html version of the string via "UTF8::htmlspecialchars()".
3005
     *
3006
     * @param string $str
3007
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
3008
     *
3009
     * @psalm-pure
3010
     *
3011
     * @return string
3012
     */
3013 6
    public static function html_escape(string $str, string $encoding = 'UTF-8'): string
3014
    {
3015 6
        return self::htmlspecialchars(
3016 6
            $str,
3017 6
            \ENT_QUOTES | \ENT_SUBSTITUTE,
3018 6
            $encoding
3019
        );
3020
    }
3021
3022
    /**
3023
     * Remove empty html-tag.
3024
     *
3025
     * e.g.: <pre><tag></tag></pre>
3026
     *
3027
     * @param string $str
3028
     *
3029
     * @psalm-pure
3030
     *
3031
     * @return string
3032
     */
3033 1
    public static function html_stripe_empty_tags(string $str): string
3034
    {
3035 1
        return (string) \preg_replace(
3036 1
            '/<[^\\/>]*?>\\s*?<\\/[^>]*?>/u',
3037 1
            '',
3038 1
            $str
3039
        );
3040
    }
3041
3042
    /**
3043
     * Convert all applicable characters to HTML entities: UTF-8 version of htmlentities().
3044
     *
3045
     * EXAMPLE: <code>UTF8::htmlentities('<白-öäü>'); // '&lt;&#30333;-&ouml;&auml;&uuml;&gt;'</code>
3046
     *
3047
     * @see http://php.net/manual/en/function.htmlentities.php
3048
     *
3049
     * @param string $str           <p>
3050
     *                              The input string.
3051
     *                              </p>
3052
     * @param int    $flags         [optional] <p>
3053
     *                              A bitmask of one or more of the following flags, which specify how to handle
3054
     *                              quotes, invalid code unit sequences and the used document type. The default is
3055
     *                              ENT_COMPAT | ENT_HTML401.
3056
     *                              <table>
3057
     *                              Available <i>flags</i> constants
3058
     *                              <tr valign="top">
3059
     *                              <td>Constant Name</td>
3060
     *                              <td>Description</td>
3061
     *                              </tr>
3062
     *                              <tr valign="top">
3063
     *                              <td><b>ENT_COMPAT</b></td>
3064
     *                              <td>Will convert double-quotes and leave single-quotes alone.</td>
3065
     *                              </tr>
3066
     *                              <tr valign="top">
3067
     *                              <td><b>ENT_QUOTES</b></td>
3068
     *                              <td>Will convert both double and single quotes.</td>
3069
     *                              </tr>
3070
     *                              <tr valign="top">
3071
     *                              <td><b>ENT_NOQUOTES</b></td>
3072
     *                              <td>Will leave both double and single quotes unconverted.</td>
3073
     *                              </tr>
3074
     *                              <tr valign="top">
3075
     *                              <td><b>ENT_IGNORE</b></td>
3076
     *                              <td>
3077
     *                              Silently discard invalid code unit sequences instead of returning
3078
     *                              an empty string. Using this flag is discouraged as it
3079
     *                              may have security implications.
3080
     *                              </td>
3081
     *                              </tr>
3082
     *                              <tr valign="top">
3083
     *                              <td><b>ENT_SUBSTITUTE</b></td>
3084
     *                              <td>
3085
     *                              Replace invalid code unit sequences with a Unicode Replacement Character
3086
     *                              U+FFFD (UTF-8) or &#38;#38;#FFFD; (otherwise) instead of returning an empty
3087
     *                              string.
3088
     *                              </td>
3089
     *                              </tr>
3090
     *                              <tr valign="top">
3091
     *                              <td><b>ENT_DISALLOWED</b></td>
3092
     *                              <td>
3093
     *                              Replace invalid code points for the given document type with a
3094
     *                              Unicode Replacement Character U+FFFD (UTF-8) or &#38;#38;#FFFD;
3095
     *                              (otherwise) instead of leaving them as is. This may be useful, for
3096
     *                              instance, to ensure the well-formedness of XML documents with
3097
     *                              embedded external content.
3098
     *                              </td>
3099
     *                              </tr>
3100
     *                              <tr valign="top">
3101
     *                              <td><b>ENT_HTML401</b></td>
3102
     *                              <td>
3103
     *                              Handle code as HTML 4.01.
3104
     *                              </td>
3105
     *                              </tr>
3106
     *                              <tr valign="top">
3107
     *                              <td><b>ENT_XML1</b></td>
3108
     *                              <td>
3109
     *                              Handle code as XML 1.
3110
     *                              </td>
3111
     *                              </tr>
3112
     *                              <tr valign="top">
3113
     *                              <td><b>ENT_XHTML</b></td>
3114
     *                              <td>
3115
     *                              Handle code as XHTML.
3116
     *                              </td>
3117
     *                              </tr>
3118
     *                              <tr valign="top">
3119
     *                              <td><b>ENT_HTML5</b></td>
3120
     *                              <td>
3121
     *                              Handle code as HTML 5.
3122
     *                              </td>
3123
     *                              </tr>
3124
     *                              </table>
3125
     *                              </p>
3126
     * @param string $encoding      [optional] <p>
3127
     *                              Like <b>htmlspecialchars</b>,
3128
     *                              <b>htmlentities</b> takes an optional third argument
3129
     *                              <i>encoding</i> which defines encoding used in
3130
     *                              conversion.
3131
     *                              Although this argument is technically optional, you are highly
3132
     *                              encouraged to specify the correct value for your code.
3133
     *                              </p>
3134
     * @param bool   $double_encode [optional] <p>
3135
     *                              When <i>double_encode</i> is turned off PHP will not
3136
     *                              encode existing html entities. The default is to convert everything.
3137
     *                              </p>
3138
     *
3139
     * @psalm-pure
3140
     *
3141
     * @return string
3142
     *                <p>
3143
     *                The encoded string.
3144
     *                <br><br>
3145
     *                If the input <i>string</i> contains an invalid code unit
3146
     *                sequence within the given <i>encoding</i> an empty string
3147
     *                will be returned, unless either the <b>ENT_IGNORE</b> or
3148
     *                <b>ENT_SUBSTITUTE</b> flags are set.
3149
     *                </p>
3150
     */
3151 9
    public static function htmlentities(
3152
        string $str,
3153
        int $flags = \ENT_COMPAT,
3154
        string $encoding = 'UTF-8',
3155
        bool $double_encode = true
3156
    ): string {
3157 9
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
3158 7
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
3159
        }
3160
3161 9
        $str = \htmlentities(
3162 9
            $str,
3163 9
            $flags,
3164 9
            $encoding,
3165 9
            $double_encode
3166
        );
3167
3168
        /**
3169
         * PHP doesn't replace a backslash to its html entity since this is something
3170
         * that's mostly used to escape characters when inserting in a database. Since
3171
         * we're using a decent database layer, we don't need this shit and we're replacing
3172
         * the double backslashes by its' html entity equivalent.
3173
         *
3174
         * https://github.com/forkcms/library/blob/master/spoon/filter/filter.php#L303
3175
         */
3176 9
        $str = \str_replace('\\', '&#92;', $str);
3177
3178 9
        return self::html_encode($str, true, $encoding);
3179
    }
3180
3181
    /**
3182
     * Convert only special characters to HTML entities: UTF-8 version of htmlspecialchars()
3183
     *
3184
     * INFO: Take a look at "UTF8::htmlentities()"
3185
     *
3186
     * EXAMPLE: <code>UTF8::htmlspecialchars('<白-öäü>'); // '&lt;白-öäü&gt;'</code>
3187
     *
3188
     * @see http://php.net/manual/en/function.htmlspecialchars.php
3189
     *
3190
     * @param string $str           <p>
3191
     *                              The string being converted.
3192
     *                              </p>
3193
     * @param int    $flags         [optional] <p>
3194
     *                              A bitmask of one or more of the following flags, which specify how to handle
3195
     *                              quotes, invalid code unit sequences and the used document type. The default is
3196
     *                              ENT_COMPAT | ENT_HTML401.
3197
     *                              <table>
3198
     *                              Available <i>flags</i> constants
3199
     *                              <tr valign="top">
3200
     *                              <td>Constant Name</td>
3201
     *                              <td>Description</td>
3202
     *                              </tr>
3203
     *                              <tr valign="top">
3204
     *                              <td><b>ENT_COMPAT</b></td>
3205
     *                              <td>Will convert double-quotes and leave single-quotes alone.</td>
3206
     *                              </tr>
3207
     *                              <tr valign="top">
3208
     *                              <td><b>ENT_QUOTES</b></td>
3209
     *                              <td>Will convert both double and single quotes.</td>
3210
     *                              </tr>
3211
     *                              <tr valign="top">
3212
     *                              <td><b>ENT_NOQUOTES</b></td>
3213
     *                              <td>Will leave both double and single quotes unconverted.</td>
3214
     *                              </tr>
3215
     *                              <tr valign="top">
3216
     *                              <td><b>ENT_IGNORE</b></td>
3217
     *                              <td>
3218
     *                              Silently discard invalid code unit sequences instead of returning
3219
     *                              an empty string. Using this flag is discouraged as it
3220
     *                              may have security implications.
3221
     *                              </td>
3222
     *                              </tr>
3223
     *                              <tr valign="top">
3224
     *                              <td><b>ENT_SUBSTITUTE</b></td>
3225
     *                              <td>
3226
     *                              Replace invalid code unit sequences with a Unicode Replacement Character
3227
     *                              U+FFFD (UTF-8) or &#38;#38;#FFFD; (otherwise) instead of returning an empty
3228
     *                              string.
3229
     *                              </td>
3230
     *                              </tr>
3231
     *                              <tr valign="top">
3232
     *                              <td><b>ENT_DISALLOWED</b></td>
3233
     *                              <td>
3234
     *                              Replace invalid code points for the given document type with a
3235
     *                              Unicode Replacement Character U+FFFD (UTF-8) or &#38;#38;#FFFD;
3236
     *                              (otherwise) instead of leaving them as is. This may be useful, for
3237
     *                              instance, to ensure the well-formedness of XML documents with
3238
     *                              embedded external content.
3239
     *                              </td>
3240
     *                              </tr>
3241
     *                              <tr valign="top">
3242
     *                              <td><b>ENT_HTML401</b></td>
3243
     *                              <td>
3244
     *                              Handle code as HTML 4.01.
3245
     *                              </td>
3246
     *                              </tr>
3247
     *                              <tr valign="top">
3248
     *                              <td><b>ENT_XML1</b></td>
3249
     *                              <td>
3250
     *                              Handle code as XML 1.
3251
     *                              </td>
3252
     *                              </tr>
3253
     *                              <tr valign="top">
3254
     *                              <td><b>ENT_XHTML</b></td>
3255
     *                              <td>
3256
     *                              Handle code as XHTML.
3257
     *                              </td>
3258
     *                              </tr>
3259
     *                              <tr valign="top">
3260
     *                              <td><b>ENT_HTML5</b></td>
3261
     *                              <td>
3262
     *                              Handle code as HTML 5.
3263
     *                              </td>
3264
     *                              </tr>
3265
     *                              </table>
3266
     *                              </p>
3267
     * @param string $encoding      [optional] <p>
3268
     *                              Defines encoding used in conversion.
3269
     *                              </p>
3270
     *                              <p>
3271
     *                              For the purposes of this function, the encodings
3272
     *                              ISO-8859-1, ISO-8859-15,
3273
     *                              UTF-8, cp866,
3274
     *                              cp1251, cp1252, and
3275
     *                              KOI8-R are effectively equivalent, provided the
3276
     *                              <i>string</i> itself is valid for the encoding, as
3277
     *                              the characters affected by <b>htmlspecialchars</b> occupy
3278
     *                              the same positions in all of these encodings.
3279
     *                              </p>
3280
     * @param bool   $double_encode [optional] <p>
3281
     *                              When <i>double_encode</i> is turned off PHP will not
3282
     *                              encode existing html entities, the default is to convert everything.
3283
     *                              </p>
3284
     *
3285
     * @psalm-pure
3286
     *
3287
     * @return string the converted string.
3288
     *                </p>
3289
     *                <p>
3290
     *                If the input <i>string</i> contains an invalid code unit
3291
     *                sequence within the given <i>encoding</i> an empty string
3292
     *                will be returned, unless either the <b>ENT_IGNORE</b> or
3293
     *                <b>ENT_SUBSTITUTE</b> flags are set
3294
     */
3295 8
    public static function htmlspecialchars(
3296
        string $str,
3297
        int $flags = \ENT_COMPAT,
3298
        string $encoding = 'UTF-8',
3299
        bool $double_encode = true
3300
    ): string {
3301 8
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
3302 8
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
3303
        }
3304
3305 8
        return \htmlspecialchars(
3306 8
            $str,
3307 8
            $flags,
3308 8
            $encoding,
3309 8
            $double_encode
3310
        );
3311
    }
3312
3313
    /**
3314
     * Checks whether iconv is available on the server.
3315
     *
3316
     * @psalm-pure
3317
     *
3318
     * @return bool
3319
     *              <p><strong>true</strong> if available, <strong>false</strong> otherwise</p>
3320
     *
3321
     * @internal <p>Please do not use it anymore, we will make is private in next major version.</p>
3322
     */
3323
    public static function iconv_loaded(): bool
3324
    {
3325
        return \extension_loaded('iconv');
3326
    }
3327
3328
    /**
3329
     * Converts Integer to hexadecimal U+xxxx code point representation.
3330
     *
3331
     * INFO: opposite to UTF8::hex_to_int()
3332
     *
3333
     * EXAMPLE: <code>UTF8::int_to_hex(241); // 'U+00f1'</code>
3334
     *
3335
     * @param int    $int    <p>The integer to be converted to hexadecimal code point.</p>
3336
     * @param string $prefix [optional]
3337
     *
3338
     * @psalm-pure
3339
     *
3340
     * @return string the code point, or empty string on failure
3341
     */
3342 6
    public static function int_to_hex(int $int, string $prefix = 'U+'): string
3343
    {
3344 6
        $hex = \dechex($int);
3345
3346 6
        $hex = (\strlen($hex) < 4 ? \substr('0000' . $hex, -4) : $hex);
3347
3348 6
        return $prefix . $hex . '';
3349
    }
3350
3351
    /**
3352
     * Checks whether intl-char is available on the server.
3353
     *
3354
     * @psalm-pure
3355
     *
3356
     * @return bool
3357
     *              <p><strong>true</strong> if available, <strong>false</strong> otherwise</p>
3358
     *
3359
     * @internal <p>Please do not use it anymore, we will make is private in next major version.</p>
3360
     */
3361
    public static function intlChar_loaded(): bool
3362
    {
3363
        return \class_exists('IntlChar');
3364
    }
3365
3366
    /**
3367
     * Checks whether intl is available on the server.
3368
     *
3369
     * @psalm-pure
3370
     *
3371
     * @return bool
3372
     *              <p><strong>true</strong> if available, <strong>false</strong> otherwise</p>
3373
     *
3374
     * @internal <p>Please do not use it anymore, we will make is private in next major version.</p>
3375
     */
3376 5
    public static function intl_loaded(): bool
3377
    {
3378 5
        return \extension_loaded('intl');
3379
    }
3380
3381
    /**
3382
     * Returns true if the string contains only alphabetic chars, false otherwise.
3383
     *
3384
     * @param string $str <p>The input string.</p>
3385
     *
3386
     * @psalm-pure
3387
     *
3388
     * @return bool
3389
     *              <p>Whether or not $str contains only alphabetic chars.</p>
3390
     */
3391 10
    public static function is_alpha(string $str): bool
3392
    {
3393 10
        if (self::$SUPPORT['mbstring'] === true) {
3394 10
            return \mb_ereg_match('^[[:alpha:]]*$', $str);
3395
        }
3396
3397
        return self::str_matches_pattern($str, '^[[:alpha:]]*$');
3398
    }
3399
3400
    /**
3401
     * Returns true if the string contains only alphabetic and numeric chars, false otherwise.
3402
     *
3403
     * @param string $str <p>The input string.</p>
3404
     *
3405
     * @psalm-pure
3406
     *
3407
     * @return bool
3408
     *              <p>Whether or not $str contains only alphanumeric chars.</p>
3409
     */
3410 13
    public static function is_alphanumeric(string $str): bool
3411
    {
3412 13
        if (self::$SUPPORT['mbstring'] === true) {
3413 13
            return \mb_ereg_match('^[[:alnum:]]*$', $str);
3414
        }
3415
3416
        return self::str_matches_pattern($str, '^[[:alnum:]]*$');
3417
    }
3418
3419
    /**
3420
     * Returns true if the string contains only punctuation chars, false otherwise.
3421
     *
3422
     * @param string $str <p>The input string.</p>
3423
     *
3424
     * @psalm-pure
3425
     *
3426
     * @return bool
3427
     *              <p>Whether or not $str contains only punctuation chars.</p>
3428
     */
3429 10
    public static function is_punctuation(string $str): bool
3430
    {
3431 10
        return self::str_matches_pattern($str, '^[[:punct:]]*$');
3432
    }
3433
3434
    /**
3435
     * Returns true if the string contains only printable (non-invisible) chars, false otherwise.
3436
     *
3437
     * @param string $str                       <p>The input string.</p>
3438
     * @param bool   $ignore_control_characters [optional] <p>Ignore control characters like [LRM] or [LSEP].</p>
3439
     *
3440
     * @psalm-pure
3441
     *
3442
     * @return bool
3443
     *              <p>Whether or not $str contains only printable (non-invisible) chars.</p>
3444
     */
3445 1
    public static function is_printable(string $str, bool $ignore_control_characters = false): bool
3446
    {
3447 1
        return self::remove_invisible_characters($str, false, '', $ignore_control_characters) === $str;
3448
    }
3449
3450
    /**
3451
     * Checks if a string is 7 bit ASCII.
3452
     *
3453
     * EXAMPLE: <code>UTF8::is_ascii('白'); // false</code>
3454
     *
3455
     * @param string $str <p>The string to check.</p>
3456
     *
3457
     * @psalm-pure
3458
     *
3459
     * @return bool
3460
     *              <p>
3461
     *              <strong>true</strong> if it is ASCII<br>
3462
     *              <strong>false</strong> otherwise
3463
     *              </p>
3464
     */
3465 8
    public static function is_ascii(string $str): bool
3466
    {
3467 8
        return ASCII::is_ascii($str);
3468
    }
3469
3470
    /**
3471
     * Returns true if the string is base64 encoded, false otherwise.
3472
     *
3473
     * EXAMPLE: <code>UTF8::is_base64('4KSu4KWL4KSo4KS/4KSa'); // true</code>
3474
     *
3475
     * @param string|null $str                   <p>The input string.</p>
3476
     * @param bool        $empty_string_is_valid [optional] <p>Is an empty string valid base64 or not?</p>
3477
     *
3478
     * @psalm-pure
3479
     *
3480
     * @return bool
3481
     *              <p>Whether or not $str is base64 encoded.</p>
3482
     */
3483 16
    public static function is_base64($str, bool $empty_string_is_valid = false): bool
3484
    {
3485
        if (
3486 16
            !$empty_string_is_valid
3487
            &&
3488 16
            $str === ''
3489
        ) {
3490 3
            return false;
3491
        }
3492
3493 15
        if (!\is_string($str)) {
3494 2
            return false;
3495
        }
3496
3497 15
        $base64String = \base64_decode($str, true);
3498
3499 15
        return $base64String !== false && \base64_encode($base64String) === $str;
3500
    }
3501
3502
    /**
3503
     * Check if the input is binary... (is look like a hack).
3504
     *
3505
     * EXAMPLE: <code>UTF8::is_binary(01); // true</code>
3506
     *
3507
     * @param int|string $input
3508
     * @param bool       $strict
3509
     *
3510
     * @psalm-pure
3511
     *
3512
     * @return bool
3513
     */
3514 39
    public static function is_binary($input, bool $strict = false): bool
3515
    {
3516 39
        $input = (string) $input;
3517 39
        if ($input === '') {
3518 10
            return false;
3519
        }
3520
3521 39
        if (\preg_match('~^[01]+$~', $input)) {
3522 13
            return true;
3523
        }
3524
3525 39
        $ext = self::get_file_type($input);
3526 39
        if ($ext['type'] === 'binary') {
3527 7
            return true;
3528
        }
3529
3530 38
        if (!$strict) {
3531 7
            $test_length = \strlen($input);
3532 7
            $test_null_counting = \substr_count($input, "\x0", 0, $test_length);
3533 7
            if (($test_null_counting / $test_length) > 0.25) {
3534 5
                return true;
3535
            }
3536
        }
3537
3538 38
        if ($strict) {
3539 38
            if (self::$SUPPORT['finfo'] === false) {
3540
                throw new \RuntimeException('ext-fileinfo: is not installed');
3541
            }
3542
3543
            /**
3544
             * @psalm-suppress ImpureMethodCall - it will return the same result for the same file ...
3545
             */
3546 38
            $finfo_encoding = (new \finfo(\FILEINFO_MIME_ENCODING))->buffer($input);
3547 38
            if ($finfo_encoding && $finfo_encoding === 'binary') {
3548 20
                return true;
3549
            }
3550
        }
3551
3552 33
        return false;
3553
    }
3554
3555
    /**
3556
     * Check if the file is binary.
3557
     *
3558
     * EXAMPLE: <code>UTF8::is_binary('./utf32.txt'); // true</code>
3559
     *
3560
     * @param string $file
3561
     *
3562
     * @return bool
3563
     */
3564 6
    public static function is_binary_file($file): bool
3565
    {
3566
        // init
3567 6
        $block = '';
3568
3569 6
        $fp = \fopen($file, 'rb');
3570 6
        if (\is_resource($fp)) {
3571 6
            $block = \fread($fp, 512);
3572 6
            \fclose($fp);
3573
        }
3574
3575 6
        if ($block === '' || $block === false) {
3576 2
            return false;
3577
        }
3578
3579 6
        return self::is_binary($block, true);
3580
    }
3581
3582
    /**
3583
     * Returns true if the string contains only whitespace chars, false otherwise.
3584
     *
3585
     * @param string $str <p>The input string.</p>
3586
     *
3587
     * @psalm-pure
3588
     *
3589
     * @return bool
3590
     *              <p>Whether or not $str contains only whitespace characters.</p>
3591
     */
3592 15
    public static function is_blank(string $str): bool
3593
    {
3594 15
        if (self::$SUPPORT['mbstring'] === true) {
3595 15
            return \mb_ereg_match('^[[:space:]]*$', $str);
3596
        }
3597
3598
        return self::str_matches_pattern($str, '^[[:space:]]*$');
3599
    }
3600
3601
    /**
3602
     * Checks if the given string is equal to any "Byte Order Mark".
3603
     *
3604
     * WARNING: Use "UTF8::string_has_bom()" if you will check BOM in a string.
3605
     *
3606
     * EXAMPLE: <code>UTF8::is_bom("\xef\xbb\xbf"); // true</code>
3607
     *
3608
     * @param string $str <p>The input string.</p>
3609
     *
3610
     * @psalm-pure
3611
     *
3612
     * @return bool
3613
     *              <p><strong>true</strong> if the $utf8_chr is Byte Order Mark, <strong>false</strong> otherwise.</p>
3614
     */
3615 2
    public static function is_bom($str): bool
3616
    {
3617
        /** @noinspection PhpUnusedLocalVariableInspection */
3618 2
        foreach (self::$BOM as $bom_string => &$bom_byte_length) {
3619 2
            if ($str === $bom_string) {
3620 2
                return true;
3621
            }
3622
        }
3623
3624 2
        return false;
3625
    }
3626
3627
    /**
3628
     * Determine whether the string is considered to be empty.
3629
     *
3630
     * A variable is considered empty if it does not exist or if its value equals FALSE.
3631
     * empty() does not generate a warning if the variable does not exist.
3632
     *
3633
     * @param array|float|int|string $str
3634
     *
3635
     * @psalm-pure
3636
     *
3637
     * @return bool
3638
     *              <p>Whether or not $str is empty().</p>
3639
     */
3640 1
    public static function is_empty($str): bool
3641
    {
3642 1
        return empty($str);
3643
    }
3644
3645
    /**
3646
     * Returns true if the string contains only hexadecimal chars, false otherwise.
3647
     *
3648
     * @param string $str <p>The input string.</p>
3649
     *
3650
     * @psalm-pure
3651
     *
3652
     * @return bool
3653
     *              <p>Whether or not $str contains only hexadecimal chars.</p>
3654
     */
3655 13
    public static function is_hexadecimal(string $str): bool
3656
    {
3657 13
        if (self::$SUPPORT['mbstring'] === true) {
3658 13
            return \mb_ereg_match('^[[:xdigit:]]*$', $str);
3659
        }
3660
3661
        return self::str_matches_pattern($str, '^[[:xdigit:]]*$');
3662
    }
3663
3664
    /**
3665
     * Check if the string contains any HTML tags.
3666
     *
3667
     * EXAMPLE: <code>UTF8::is_html('<b>lall</b>'); // true</code>
3668
     *
3669
     * @param string $str <p>The input string.</p>
3670
     *
3671
     * @psalm-pure
3672
     *
3673
     * @return bool
3674
     *              <p>Whether or not $str contains html elements.</p>
3675
     */
3676 3
    public static function is_html(string $str): bool
3677
    {
3678 3
        if ($str === '') {
3679 3
            return false;
3680
        }
3681
3682
        // init
3683 3
        $matches = [];
3684
3685 3
        $str = self::emoji_encode($str); // hack for emoji support :/
3686
3687 3
        \preg_match("/<\\/?\\w+(?:(?:\\s+\\w+(?:\\s*=\\s*(?:\".*?\"|'.*?'|[^'\">\\s]+))?)*\\s*|\\s*)\\/?>/u", $str, $matches);
3688
3689 3
        return $matches !== [];
3690
    }
3691
3692
    /**
3693
     * Check if $url is an correct url.
3694
     *
3695
     * @param string $url
3696
     * @param bool   $disallow_localhost
3697
     *
3698
     * @psalm-pure
3699
     *
3700
     * @return bool
3701
     */
3702 1
    public static function is_url(string $url, bool $disallow_localhost = false): bool
3703
    {
3704 1
        if ($url === '') {
3705 1
            return false;
3706
        }
3707
3708
        // WARNING: keep this as hack protection
3709 1
        if (!self::str_istarts_with_any($url, ['http://', 'https://'])) {
3710 1
            return false;
3711
        }
3712
3713
        // e.g. -> the server itself connect to "https://foo.localhost/phpmyadmin/...
3714 1
        if ($disallow_localhost) {
3715 1
            if (self::str_istarts_with_any(
3716 1
                $url,
3717
                [
3718 1
                    'http://localhost',
3719
                    'https://localhost',
3720
                    'http://127.0.0.1',
3721
                    'https://127.0.0.1',
3722
                    'http://::1',
3723
                    'https://::1',
3724
                ]
3725
            )) {
3726 1
                return false;
3727
            }
3728
3729 1
            $regex = '/^(?:http(?:s)?:\/\/).*?(?:\.localhost)/iu';
3730 1
            if (\preg_match($regex, $url)) {
3731 1
                return false;
3732
            }
3733
        }
3734
3735
        // INFO: this is needed for e.g. "http://müller.de/" (internationalized domain names) and non ASCII-parameters
3736 1
        $regex = '/^(?:http(?:s)?:\\/\\/)(?:[\p{L}0-9][\p{L}0-9_-]*(?:\\.[\p{L}0-9][\p{L}0-9_-]*))(?:\\d+)?(?:\\/\\.*)?/iu';
3737 1
        if (\preg_match($regex, $url)) {
3738 1
            return true;
3739
        }
3740
3741 1
        return \filter_var($url, \FILTER_VALIDATE_URL) !== false;
3742
    }
3743
3744
    /**
3745
     * Try to check if "$str" is a JSON-string.
3746
     *
3747
     * EXAMPLE: <code>UTF8::is_json('{"array":[1,"¥","ä"]}'); // true</code>
3748
     *
3749
     * @param string $str                                    <p>The input string.</p>
3750
     * @param bool   $only_array_or_object_results_are_valid [optional] <p>Only array and objects are valid json
3751
     *                                                       results.</p>
3752
     *
3753
     * @return bool
3754
     *              <p>Whether or not the $str is in JSON format.</p>
3755
     */
3756 42
    public static function is_json(string $str, bool $only_array_or_object_results_are_valid = true): bool
3757
    {
3758 42
        if ($str === '') {
3759 4
            return false;
3760
        }
3761
3762 40
        if (self::$SUPPORT['json'] === false) {
3763
            throw new \RuntimeException('ext-json: is not installed');
3764
        }
3765
3766 40
        $jsonOrNull = self::json_decode($str);
3767 40
        if ($jsonOrNull === null && \strtoupper($str) !== 'NULL') {
3768 18
            return false;
3769
        }
3770
3771
        if (
3772 24
            $only_array_or_object_results_are_valid
3773
            &&
3774 24
            !\is_object($jsonOrNull)
3775
            &&
3776 24
            !\is_array($jsonOrNull)
3777
        ) {
3778 5
            return false;
3779
        }
3780
3781 19
        return \json_last_error() === \JSON_ERROR_NONE;
3782
    }
3783
3784
    /**
3785
     * @param string $str <p>The input string.</p>
3786
     *
3787
     * @psalm-pure
3788
     *
3789
     * @return bool
3790
     *              <p>Whether or not $str contains only lowercase chars.</p>
3791
     */
3792 8
    public static function is_lowercase(string $str): bool
3793
    {
3794 8
        if (self::$SUPPORT['mbstring'] === true) {
3795 8
            return \mb_ereg_match('^[[:lower:]]*$', $str);
3796
        }
3797
3798
        return self::str_matches_pattern($str, '^[[:lower:]]*$');
3799
    }
3800
3801
    /**
3802
     * Returns true if the string is serialized, false otherwise.
3803
     *
3804
     * @param string $str <p>The input string.</p>
3805
     *
3806
     * @psalm-pure
3807
     *
3808
     * @return bool
3809
     *              <p>Whether or not $str is serialized.</p>
3810
     */
3811 7
    public static function is_serialized(string $str): bool
3812
    {
3813 7
        if ($str === '') {
3814 1
            return false;
3815
        }
3816
3817
        /** @noinspection PhpUsageOfSilenceOperatorInspection */
3818
        /** @noinspection UnserializeExploitsInspection */
3819 6
        return $str === 'b:0;'
3820
               ||
3821 6
               @\unserialize($str, []) !== false;
3822
    }
3823
3824
    /**
3825
     * Returns true if the string contains only lower case chars, false
3826
     * otherwise.
3827
     *
3828
     * @param string $str <p>The input string.</p>
3829
     *
3830
     * @psalm-pure
3831
     *
3832
     * @return bool
3833
     *              <p>Whether or not $str contains only lower case characters.</p>
3834
     */
3835 8
    public static function is_uppercase(string $str): bool
3836
    {
3837 8
        if (self::$SUPPORT['mbstring'] === true) {
3838 8
            return \mb_ereg_match('^[[:upper:]]*$', $str);
3839
        }
3840
3841
        return self::str_matches_pattern($str, '^[[:upper:]]*$');
3842
    }
3843
3844
    /**
3845
     * Check if the string is UTF-16.
3846
     *
3847
     * EXAMPLE: <code>
3848
     * UTF8::is_utf16(file_get_contents('utf-16-le.txt')); // 1
3849
     * //
3850
     * UTF8::is_utf16(file_get_contents('utf-16-be.txt')); // 2
3851
     * //
3852
     * UTF8::is_utf16(file_get_contents('utf-8.txt')); // false
3853
     * </code>
3854
     *
3855
     * @param string $str                       <p>The input string.</p>
3856
     * @param bool   $check_if_string_is_binary
3857
     *
3858
     * @psalm-pure
3859
     *
3860
     * @return false|int
3861
     *                   <strong>false</strong> if is't not UTF-16,<br>
3862
     *                   <strong>1</strong> for UTF-16LE,<br>
3863
     *                   <strong>2</strong> for UTF-16BE
3864
     */
3865 21
    public static function is_utf16($str, bool $check_if_string_is_binary = true)
3866
    {
3867
        // init
3868 21
        $str = (string) $str;
3869 21
        $str_chars = [];
3870
3871
        // fix for the "binary"-check
3872 21
        if ($check_if_string_is_binary !== false && self::string_has_bom($str)) {
3873 2
            $check_if_string_is_binary = false;
3874
        }
3875
3876
        if (
3877 21
            $check_if_string_is_binary
3878
            &&
3879 21
            !self::is_binary($str, true)
3880
        ) {
3881 2
            return false;
3882
        }
3883
3884 21
        if (self::$SUPPORT['mbstring'] === false) {
3885
            /**
3886
             * @psalm-suppress ImpureFunctionCall - is is only a warning
3887
             */
3888 3
            \trigger_error('UTF8::is_utf16() without mbstring may did not work correctly', \E_USER_WARNING);
3889
        }
3890
3891 21
        $str = self::remove_bom($str);
3892
3893 21
        $maybe_utf16le = 0;
3894 21
        $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-16LE');
3895 21
        if ($test) {
3896 15
            $test2 = \mb_convert_encoding($test, 'UTF-16LE', 'UTF-8');
3897 15
            $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-16LE');
3898 15
            if ($test3 === $test) {
3899
                /**
3900
                 * @psalm-suppress RedundantCondition
3901
                 */
3902 15
                if ($str_chars === []) {
3903 15
                    $str_chars = self::count_chars($str, true, false);
3904
                }
3905 15
                foreach (self::count_chars($test3) as $test3char => &$test3charEmpty) {
0 ignored issues
show
Bug introduced by
The expression self::count_chars($test3) cannot be used as a reference.

Let?s assume that you have the following foreach statement:

foreach ($array as &$itemValue) { }

$itemValue is assigned by reference. This is possible because the expression (in the example $array) can be used as a reference target.

However, if we were to replace $array with something different like the result of a function call as in

foreach (getArray() as &$itemValue) { }

then assigning by reference is not possible anymore as there is no target that could be modified.

Available Fixes

1. Do not assign by reference
foreach (getArray() as $itemValue) { }
2. Assign to a local variable first
$array = getArray();
foreach ($array as &$itemValue) {}
3. Return a reference
function &getArray() { $array = array(); return $array; }

foreach (getArray() as &$itemValue) { }
Loading history...
Bug introduced by
It seems like $test3 can also be of type array; however, parameter $str of voku\helper\UTF8::count_chars() does only seem to accept string, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

3905
                foreach (self::count_chars(/** @scrutinizer ignore-type */ $test3) as $test3char => &$test3charEmpty) {
Loading history...
3906 15
                    if (\in_array($test3char, $str_chars, true)) {
3907 15
                        ++$maybe_utf16le;
3908
                    }
3909
                }
3910 15
                unset($test3charEmpty);
3911
            }
3912
        }
3913
3914 21
        $maybe_utf16be = 0;
3915 21
        $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-16BE');
3916 21
        if ($test) {
3917 15
            $test2 = \mb_convert_encoding($test, 'UTF-16BE', 'UTF-8');
3918 15
            $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-16BE');
3919 15
            if ($test3 === $test) {
3920 15
                if ($str_chars === []) {
3921 7
                    $str_chars = self::count_chars($str, true, false);
3922
                }
3923 15
                foreach (self::count_chars($test3) as $test3char => &$test3charEmpty) {
0 ignored issues
show
Bug introduced by
The expression self::count_chars($test3) cannot be used as a reference.

Let?s assume that you have the following foreach statement:

foreach ($array as &$itemValue) { }

$itemValue is assigned by reference. This is possible because the expression (in the example $array) can be used as a reference target.

However, if we were to replace $array with something different like the result of a function call as in

foreach (getArray() as &$itemValue) { }

then assigning by reference is not possible anymore as there is no target that could be modified.

Available Fixes

1. Do not assign by reference
foreach (getArray() as $itemValue) { }
2. Assign to a local variable first
$array = getArray();
foreach ($array as &$itemValue) {}
3. Return a reference
function &getArray() { $array = array(); return $array; }

foreach (getArray() as &$itemValue) { }
Loading history...
3924 15
                    if (\in_array($test3char, $str_chars, true)) {
3925 15
                        ++$maybe_utf16be;
3926
                    }
3927
                }
3928 15
                unset($test3charEmpty);
3929
            }
3930
        }
3931
3932 21
        if ($maybe_utf16be !== $maybe_utf16le) {
3933 7
            if ($maybe_utf16le > $maybe_utf16be) {
3934 5
                return 1;
3935
            }
3936
3937 6
            return 2;
3938
        }
3939
3940 17
        return false;
3941
    }
3942
3943
    /**
3944
     * Check if the string is UTF-32.
3945
     *
3946
     * EXAMPLE: <code>
3947
     * UTF8::is_utf32(file_get_contents('utf-32-le.txt')); // 1
3948
     * //
3949
     * UTF8::is_utf32(file_get_contents('utf-32-be.txt')); // 2
3950
     * //
3951
     * UTF8::is_utf32(file_get_contents('utf-8.txt')); // false
3952
     * </code>
3953
     *
3954
     * @param string $str                       <p>The input string.</p>
3955
     * @param bool   $check_if_string_is_binary
3956
     *
3957
     * @psalm-pure
3958
     *
3959
     * @return false|int
3960
     *                   <strong>false</strong> if is't not UTF-32,<br>
3961
     *                   <strong>1</strong> for UTF-32LE,<br>
3962
     *                   <strong>2</strong> for UTF-32BE
3963
     */
3964 19
    public static function is_utf32($str, bool $check_if_string_is_binary = true)
3965
    {
3966
        // init
3967 19
        $str = (string) $str;
3968 19
        $str_chars = [];
3969
3970
        // fix for the "binary"-check
3971 19
        if ($check_if_string_is_binary !== false && self::string_has_bom($str)) {
3972 2
            $check_if_string_is_binary = false;
3973
        }
3974
3975
        if (
3976 19
            $check_if_string_is_binary
3977
            &&
3978 19
            !self::is_binary($str, true)
3979
        ) {
3980 2
            return false;
3981
        }
3982
3983 19
        if (self::$SUPPORT['mbstring'] === false) {
3984
            /**
3985
             * @psalm-suppress ImpureFunctionCall - is is only a warning
3986
             */
3987 3
            \trigger_error('UTF8::is_utf32() without mbstring may did not work correctly', \E_USER_WARNING);
3988
        }
3989
3990 19
        $str = self::remove_bom($str);
3991
3992 19
        $maybe_utf32le = 0;
3993 19
        $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-32LE');
3994 19
        if ($test) {
3995 13
            $test2 = \mb_convert_encoding($test, 'UTF-32LE', 'UTF-8');
3996 13
            $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-32LE');
3997 13
            if ($test3 === $test) {
3998
                /**
3999
                 * @psalm-suppress RedundantCondition
4000
                 */
4001 13
                if ($str_chars === []) {
4002 13
                    $str_chars = self::count_chars($str, true, false);
4003
                }
4004 13
                foreach (self::count_chars($test3) as $test3char => &$test3charEmpty) {
0 ignored issues
show
Bug introduced by
The expression self::count_chars($test3) cannot be used as a reference.

Let?s assume that you have the following foreach statement:

foreach ($array as &$itemValue) { }

$itemValue is assigned by reference. This is possible because the expression (in the example $array) can be used as a reference target.

However, if we were to replace $array with something different like the result of a function call as in

foreach (getArray() as &$itemValue) { }

then assigning by reference is not possible anymore as there is no target that could be modified.

Available Fixes

1. Do not assign by reference
foreach (getArray() as $itemValue) { }
2. Assign to a local variable first
$array = getArray();
foreach ($array as &$itemValue) {}
3. Return a reference
function &getArray() { $array = array(); return $array; }

foreach (getArray() as &$itemValue) { }
Loading history...
Bug introduced by
It seems like $test3 can also be of type array; however, parameter $str of voku\helper\UTF8::count_chars() does only seem to accept string, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

4004
                foreach (self::count_chars(/** @scrutinizer ignore-type */ $test3) as $test3char => &$test3charEmpty) {
Loading history...
4005 13
                    if (\in_array($test3char, $str_chars, true)) {
4006 13
                        ++$maybe_utf32le;
4007
                    }
4008
                }
4009 13
                unset($test3charEmpty);
4010
            }
4011
        }
4012
4013 19
        $maybe_utf32be = 0;
4014 19
        $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-32BE');
4015 19
        if ($test) {
4016 13
            $test2 = \mb_convert_encoding($test, 'UTF-32BE', 'UTF-8');
4017 13
            $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-32BE');
4018 13
            if ($test3 === $test) {
4019 13
                if ($str_chars === []) {
4020 7
                    $str_chars = self::count_chars($str, true, false);
4021
                }
4022 13
                foreach (self::count_chars($test3) as $test3char => &$test3charEmpty) {
0 ignored issues
show
Bug introduced by
The expression self::count_chars($test3) cannot be used as a reference.

Let?s assume that you have the following foreach statement:

foreach ($array as &$itemValue) { }

$itemValue is assigned by reference. This is possible because the expression (in the example $array) can be used as a reference target.

However, if we were to replace $array with something different like the result of a function call as in

foreach (getArray() as &$itemValue) { }

then assigning by reference is not possible anymore as there is no target that could be modified.

Available Fixes

1. Do not assign by reference
foreach (getArray() as $itemValue) { }
2. Assign to a local variable first
$array = getArray();
foreach ($array as &$itemValue) {}
3. Return a reference
function &getArray() { $array = array(); return $array; }

foreach (getArray() as &$itemValue) { }
Loading history...
4023 13
                    if (\in_array($test3char, $str_chars, true)) {
4024 13
                        ++$maybe_utf32be;
4025
                    }
4026
                }
4027 13
                unset($test3charEmpty);
4028
            }
4029
        }
4030
4031 19
        if ($maybe_utf32be !== $maybe_utf32le) {
4032 3
            if ($maybe_utf32le > $maybe_utf32be) {
4033 2
                return 1;
4034
            }
4035
4036 3
            return 2;
4037
        }
4038
4039 19
        return false;
4040
    }
4041
4042
    /**
4043
     * Checks whether the passed input contains only byte sequences that appear valid UTF-8.
4044
     *
4045
     * EXAMPLE: <code>
4046
     * UTF8::is_utf8(['Iñtërnâtiônàlizætiøn', 'foo']); // true
4047
     * //
4048
     * UTF8::is_utf8(["Iñtërnâtiônàlizætiøn\xA0\xA1", 'bar']); // false
4049
     * </code>
4050
     *
4051
     * @param int|string|string[]|null $str    <p>The input to be checked.</p>
4052
     * @param bool                     $strict <p>Check also if the string is not UTF-16 or UTF-32.</p>
4053
     *
4054
     * @psalm-pure
4055
     *
4056
     * @return bool
4057
     */
4058 83
    public static function is_utf8($str, bool $strict = false): bool
4059
    {
4060 83
        if (\is_array($str)) {
4061 2
            foreach ($str as &$v) {
4062 2
                if (!self::is_utf8($v, $strict)) {
4063 2
                    return false;
4064
                }
4065
            }
4066
4067
            return true;
4068
        }
4069
4070 83
        return self::is_utf8_string((string) $str, $strict);
4071
    }
4072
4073
    /**
4074
     * (PHP 5 &gt;= 5.2.0, PECL json &gt;= 1.2.0)<br/>
4075
     * Decodes a JSON string
4076
     *
4077
     * EXAMPLE: <code>UTF8::json_decode('[1,"\u00a5","\u00e4"]'); // array(1, '¥', 'ä')</code>
4078
     *
4079
     * @see http://php.net/manual/en/function.json-decode.php
4080
     *
4081
     * @param string $json    <p>
4082
     *                        The <i>json</i> string being decoded.
4083
     *                        </p>
4084
     *                        <p>
4085
     *                        This function only works with UTF-8 encoded strings.
4086
     *                        </p>
4087
     *                        <p>PHP implements a superset of
4088
     *                        JSON - it will also encode and decode scalar types and <b>NULL</b>. The JSON standard
4089
     *                        only supports these values when they are nested inside an array or an object.
4090
     *                        </p>
4091
     * @param bool   $assoc   [optional] <p>
4092
     *                        When <b>TRUE</b>, returned objects will be converted into
4093
     *                        associative arrays.
4094
     *                        </p>
4095
     * @param int    $depth   [optional] <p>
4096
     *                        User specified recursion depth.
4097
     *                        </p>
4098
     * @param int    $options [optional] <p>
4099
     *                        Bitmask of JSON decode options. Currently only
4100
     *                        <b>JSON_BIGINT_AS_STRING</b>
4101
     *                        is supported (default is to cast large integers as floats)
4102
     *                        </p>
4103
     *
4104
     * @psalm-pure
4105
     *
4106
     * @return mixed
4107
     *               <p>The value encoded in <i>json</i> in appropriate PHP type. Values true, false and
4108
     *               null (case-insensitive) are returned as <b>TRUE</b>, <b>FALSE</b> and <b>NULL</b> respectively.
4109
     *               <b>NULL</b> is returned if the <i>json</i> cannot be decoded or if the encoded data
4110
     *               is deeper than the recursion limit.</p>
4111
     */
4112 43
    public static function json_decode(
4113
        string $json,
4114
        bool $assoc = false,
4115
        int $depth = 512,
4116
        int $options = 0
4117
    ) {
4118 43
        $json = self::filter($json);
4119
4120 43
        if (self::$SUPPORT['json'] === false) {
4121
            throw new \RuntimeException('ext-json: is not installed');
4122
        }
4123
4124 43
        if ($depth < 1) {
4125
            $depth = 1;
4126
        }
4127
4128 43
        return \json_decode($json, $assoc, $depth, $options);
4129
    }
4130
4131
    /**
4132
     * (PHP 5 &gt;= 5.2.0, PECL json &gt;= 1.2.0)<br/>
4133
     * Returns the JSON representation of a value.
4134
     *
4135
     * EXAMPLE: <code>UTF8::json_enocde(array(1, '¥', 'ä')); // '[1,"\u00a5","\u00e4"]'</code>
4136
     *
4137
     * @see http://php.net/manual/en/function.json-encode.php
4138
     *
4139
     * @param mixed $value   <p>
4140
     *                       The <i>value</i> being encoded. Can be any type except
4141
     *                       a resource.
4142
     *                       </p>
4143
     *                       <p>
4144
     *                       All string data must be UTF-8 encoded.
4145
     *                       </p>
4146
     *                       <p>PHP implements a superset of
4147
     *                       JSON - it will also encode and decode scalar types and <b>NULL</b>. The JSON standard
4148
     *                       only supports these values when they are nested inside an array or an object.
4149
     *                       </p>
4150
     * @param int   $options [optional] <p>
4151
     *                       Bitmask consisting of <b>JSON_HEX_QUOT</b>,
4152
     *                       <b>JSON_HEX_TAG</b>,
4153
     *                       <b>JSON_HEX_AMP</b>,
4154
     *                       <b>JSON_HEX_APOS</b>,
4155
     *                       <b>JSON_NUMERIC_CHECK</b>,
4156
     *                       <b>JSON_PRETTY_PRINT</b>,
4157
     *                       <b>JSON_UNESCAPED_SLASHES</b>,
4158
     *                       <b>JSON_FORCE_OBJECT</b>,
4159
     *                       <b>JSON_UNESCAPED_UNICODE</b>. The behaviour of these
4160
     *                       constants is described on
4161
     *                       the JSON constants page.
4162
     *                       </p>
4163
     * @param int   $depth   [optional] <p>
4164
     *                       Set the maximum depth. Must be greater than zero.
4165
     *                       </p>
4166
     *
4167
     * @psalm-pure
4168
     *
4169
     * @return false|string
4170
     *                      A JSON encoded <strong>string</strong> on success or<br>
4171
     *                      <strong>FALSE</strong> on failure
4172
     */
4173 5
    public static function json_encode($value, int $options = 0, int $depth = 512)
4174
    {
4175 5
        $value = self::filter($value);
4176
4177 5
        if (self::$SUPPORT['json'] === false) {
4178
            throw new \RuntimeException('ext-json: is not installed');
4179
        }
4180
4181 5
        if ($depth < 1) {
4182
            $depth = 1;
4183
        }
4184
4185 5
        return \json_encode($value, $options, $depth);
4186
    }
4187
4188
    /**
4189
     * Checks whether JSON is available on the server.
4190
     *
4191
     * @psalm-pure
4192
     *
4193
     * @return bool
4194
     *              <p><strong>true</strong> if available, <strong>false</strong> otherwise</p>
4195
     *
4196
     * @internal <p>Please do not use it anymore, we will make is private in next major version.</p>
4197
     */
4198
    public static function json_loaded(): bool
4199
    {
4200
        return \function_exists('json_decode');
4201
    }
4202
4203
    /**
4204
     * Makes string's first char lowercase.
4205
     *
4206
     * EXAMPLE: <code>UTF8::lcfirst('ÑTËRNÂTIÔNÀLIZÆTIØN'); // ñTËRNÂTIÔNÀLIZÆTIØN</code>
4207
     *
4208
     * @param string      $str                           <p>The input string</p>
4209
     * @param string      $encoding                      [optional] <p>Set the charset for e.g. "mb_" function</p>
4210
     * @param bool        $clean_utf8                    [optional] <p>Remove non UTF-8 chars from the string.</p>
4211
     * @param string|null $lang                          [optional] <p>Set the language for special cases: az, el, lt,
4212
     *                                                   tr</p>
4213
     * @param bool        $try_to_keep_the_string_length [optional] <p>true === try to keep the string length: e.g. ẞ
4214
     *                                                   -> ß</p>
4215
     *
4216
     * @psalm-pure
4217
     *
4218
     * @return string the resulting string
4219
     */
4220 46
    public static function lcfirst(
4221
        string $str,
4222
        string $encoding = 'UTF-8',
4223
        bool $clean_utf8 = false,
4224
        string $lang = null,
4225
        bool $try_to_keep_the_string_length = false
4226
    ): string {
4227 46
        if ($clean_utf8) {
4228
            $str = self::clean($str);
4229
        }
4230
4231 46
        $use_mb_functions = ($lang === null && !$try_to_keep_the_string_length);
4232
4233 46
        if ($encoding === 'UTF-8') {
4234 43
            $str_part_two = (string) \mb_substr($str, 1);
4235
4236 43
            if ($use_mb_functions) {
4237 43
                $str_part_one = \mb_strtolower(
4238 43
                    (string) \mb_substr($str, 0, 1)
4239
                );
4240
            } else {
4241
                $str_part_one = self::strtolower(
4242
                    (string) \mb_substr($str, 0, 1),
4243
                    $encoding,
4244
                    false,
4245
                    $lang,
4246 43
                    $try_to_keep_the_string_length
4247
                );
4248
            }
4249
        } else {
4250 3
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
4251
4252 3
            $str_part_two = (string) self::substr($str, 1, null, $encoding);
4253
4254 3
            $str_part_one = self::strtolower(
4255 3
                (string) self::substr($str, 0, 1, $encoding),
4256 3
                $encoding,
4257 3
                false,
4258 3
                $lang,
4259 3
                $try_to_keep_the_string_length
4260
            );
4261
        }
4262
4263 46
        return $str_part_one . $str_part_two;
4264
    }
4265
4266
    /**
4267
     * Lowercase for all words in the string.
4268
     *
4269
     * @param string      $str                           <p>The input string.</p>
4270
     * @param string[]    $exceptions                    [optional] <p>Exclusion for some words.</p>
4271
     * @param string      $char_list                     [optional] <p>Additional chars that contains to words and do
4272
     *                                                   not start a new word.</p>
4273
     * @param string      $encoding                      [optional] <p>Set the charset.</p>
4274
     * @param bool        $clean_utf8                    [optional] <p>Remove non UTF-8 chars from the string.</p>
4275
     * @param string|null $lang                          [optional] <p>Set the language for special cases: az, el, lt,
4276
     *                                                   tr</p>
4277
     * @param bool        $try_to_keep_the_string_length [optional] <p>true === try to keep the string length: e.g. ẞ
4278
     *                                                   -> ß</p>
4279
     *
4280
     * @psalm-pure
4281
     *
4282
     * @return string
4283
     */
4284 4
    public static function lcwords(
4285
        string $str,
4286
        array $exceptions = [],
4287
        string $char_list = '',
4288
        string $encoding = 'UTF-8',
4289
        bool $clean_utf8 = false,
4290
        string $lang = null,
4291
        bool $try_to_keep_the_string_length = false
4292
    ): string {
4293 4
        if (!$str) {
4294 2
            return '';
4295
        }
4296
4297 4
        $words = self::str_to_words($str, $char_list);
4298 4
        $use_exceptions = $exceptions !== [];
4299
4300 4
        $words_str = '';
4301 4
        foreach ($words as &$word) {
4302 4
            if (!$word) {
4303 4
                continue;
4304
            }
4305
4306
            if (
4307 4
                !$use_exceptions
4308
                ||
4309 4
                !\in_array($word, $exceptions, true)
4310
            ) {
4311 4
                $words_str .= self::lcfirst($word, $encoding, $clean_utf8, $lang, $try_to_keep_the_string_length);
4312
            } else {
4313 4
                $words_str .= $word;
4314
            }
4315
        }
4316
4317 4
        return $words_str;
4318
    }
4319
4320
    /**
4321
     * Calculate Levenshtein distance between two strings.
4322
     *
4323
     * For better performance, in a real application with a single input string
4324
     * matched against many strings from a database, you will probably want to pre-
4325
     * encode the input only once and use \levenshtein().
4326
     *
4327
     * Source: https://github.com/KEINOS/mb_levenshtein
4328
     * @see https://www.php.net/manual/en/function.levenshtein
4329
     *
4330
     * @param  string  $str1            <p>One of the strings being evaluated for Levenshtein distance.</p>
4331
     * @param  string  $str2            <p>One of the strings being evaluated for Levenshtein distance.</p>
4332
     * @param  integer $insertionCost   [optional] <p>Defines the cost of insertion.</p>
4333
     * @param  integer $replacementCost [optional] <p>Defines the cost of replacement.</p>
4334
     * @param  integer $deletionCost    [optional] <p>Defines the cost of deletion.</p>
4335
     *
4336
     * @return int
4337
     */
4338 5
    public static function levenshtein(
4339
        string $str1,
4340
        string $str2,
4341
        int $insertionCost = 1,
4342
        int $replacementCost = 1,
4343
        int $deletionCost = 1
4344
    ): int {
4345 5
        $result = ASCII::to_ascii_remap($str1, $str2);
4346
4347 5
        return \levenshtein($result[0], $result[1], $insertionCost, $replacementCost, $deletionCost);
4348
    }
4349
4350
    /**
4351
     * Strip whitespace or other characters from the beginning of a UTF-8 string.
4352
     *
4353
     * EXAMPLE: <code>UTF8::ltrim(' 中文空白  '); // '中文空白  '</code>
4354
     *
4355
     * @param string      $str   <p>The string to be trimmed</p>
4356
     * @param string|null $chars <p>Optional characters to be stripped</p>
4357
     *
4358
     * @psalm-pure
4359
     *
4360
     * @return string the string with unwanted characters stripped from the left
4361
     */
4362 23
    public static function ltrim(string $str = '', string $chars = null): string
4363
    {
4364 23
        if ($str === '') {
4365 3
            return '';
4366
        }
4367
4368 22
        if (self::$SUPPORT['mbstring'] === true) {
4369 22
            if ($chars !== null) {
4370
                /** @noinspection PregQuoteUsageInspection */
4371 11
                $chars = \preg_quote($chars);
4372 11
                $pattern = "^[${chars}]+";
4373
            } else {
4374 14
                $pattern = '^[\\s]+';
4375
            }
4376
4377 22
            return (string) \mb_ereg_replace($pattern, '', $str);
4378
        }
4379
4380
        if ($chars !== null) {
4381
            $chars = \preg_quote($chars, '/');
4382
            $pattern = "^[${chars}]+";
4383
        } else {
4384
            $pattern = '^[\\s]+';
4385
        }
4386
4387
        return self::regex_replace($str, $pattern, '');
4388
    }
4389
4390
    /**
4391
     * Returns the UTF-8 character with the maximum code point in the given data.
4392
     *
4393
     * EXAMPLE: <code>UTF8::max('abc-äöü-中文空白'); // 'ø'</code>
4394
     *
4395
     * @param array<string>|string $arg <p>A UTF-8 encoded string or an array of such strings.</p>
4396
     *
4397
     * @psalm-pure
4398
     *
4399
     * @return string|null the character with the highest code point than others, returns null on failure or empty input
4400
     */
4401
    public static function max($arg)
4402
    {
4403 2
        if (\is_array($arg)) {
4404 2
            $arg = \implode('', $arg);
4405
        }
4406
4407 2
        $codepoints = self::codepoints($arg);
4408 2
        if ($codepoints === []) {
4409 2
            return null;
4410
        }
4411
4412 2
        $codepoint_max = \max($codepoints);
4413
4414 2
        return self::chr((int) $codepoint_max);
4415
    }
4416
4417
    /**
4418
     * Calculates and returns the maximum number of bytes taken by any
4419
     * UTF-8 encoded character in the given string.
4420
     *
4421
     * EXAMPLE: <code>UTF8::max_chr_width('Intërnâtiônàlizætiøn'); // 2</code>
4422
     *
4423
     * @param string $str <p>The original Unicode string.</p>
4424
     *
4425
     * @psalm-pure
4426
     *
4427
     * @return int
4428
     *             <p>Max byte lengths of the given chars.</p>
4429
     */
4430
    public static function max_chr_width(string $str): int
4431
    {
4432 2
        $bytes = self::chr_size_list($str);
4433 2
        if ($bytes !== []) {
4434 2
            return (int) \max($bytes);
4435
        }
4436
4437 2
        return 0;
4438
    }
4439
4440
    /**
4441
     * Checks whether mbstring is available on the server.
4442
     *
4443
     * @psalm-pure
4444
     *
4445
     * @return bool
4446
     *              <p><strong>true</strong> if available, <strong>false</strong> otherwise</p>
4447
     *
4448
     * @internal <p>Please do not use it anymore, we will make is private in next major version.</p>
4449
     */
4450
    public static function mbstring_loaded(): bool
4451
    {
4452 28
        return \extension_loaded('mbstring');
4453
    }
4454
4455
    /**
4456
     * Returns the UTF-8 character with the minimum code point in the given data.
4457
     *
4458
     * EXAMPLE: <code>UTF8::min('abc-äöü-中文空白'); // '-'</code>
4459
     *
4460
     * @param string|string[] $arg <strong>A UTF-8 encoded string or an array of such strings.</strong>
4461
     *
4462
     * @psalm-pure
4463
     *
4464
     * @return string|null
4465
     *                     <p>The character with the lowest code point than others, returns null on failure or empty input.</p>
4466
     */
4467
    public static function min($arg)
4468
    {
4469 2
        if (\is_array($arg)) {
4470 2
            $arg = \implode('', $arg);
4471
        }
4472
4473 2
        $codepoints = self::codepoints($arg);
4474 2
        if ($codepoints === []) {
4475 2
            return null;
4476
        }
4477
4478 2
        $codepoint_min = \min($codepoints);
4479
4480 2
        return self::chr((int) $codepoint_min);
4481
    }
4482
4483
    /**
4484
     * Normalize the encoding-"name" input.
4485
     *
4486
     * EXAMPLE: <code>UTF8::normalize_encoding('UTF8'); // 'UTF-8'</code>
4487
     *
4488
     * @param mixed $encoding <p>e.g.: ISO, UTF8, WINDOWS-1251 etc.</p>
4489
     * @param mixed $fallback <p>e.g.: UTF-8</p>
4490
     *
4491
     * @psalm-pure
4492
     *
4493
     * @return mixed|string
4494
     *                      <p>e.g.: ISO-8859-1, UTF-8, WINDOWS-1251 etc.<br>Will return a empty string as fallback (by default)</p>
4495
     *
4496
     * @template TNormalizeEncodingFallback
4497
     * @phpstan-param string|TNormalizeEncodingFallback $fallback
4498
     * @phpstan-return string|TNormalizeEncodingFallback
4499
     */
4500
    public static function normalize_encoding($encoding, $fallback = '')
4501
    {
4502
        /**
4503
         * @psalm-suppress ImpureStaticVariable
4504
         *
4505
         * @var array<string,string>
4506
         */
4507 339
        static $STATIC_NORMALIZE_ENCODING_CACHE = [];
4508
4509
        // init
4510 339
        $encoding = (string) $encoding;
4511
4512 339
        if (!$encoding) {
4513 290
            return $fallback;
4514
        }
4515
4516
        if (
4517 53
            $encoding === 'UTF-8'
4518
            ||
4519 53
            $encoding === 'UTF8'
4520
        ) {
4521 29
            return 'UTF-8';
4522
        }
4523
4524
        if (
4525 44
            $encoding === '8BIT'
4526
            ||
4527 44
            $encoding === 'BINARY'
4528
        ) {
4529
            return 'CP850';
4530
        }
4531
4532
        if (
4533 44
            $encoding === 'HTML'
4534
            ||
4535 44
            $encoding === 'HTML-ENTITIES'
4536
        ) {
4537 2
            return 'HTML-ENTITIES';
4538
        }
4539
4540
        if (
4541 44
            $encoding === 'ISO'
4542
            ||
4543 44
            $encoding === 'ISO-8859-1'
4544
        ) {
4545 41
            return 'ISO-8859-1';
4546
        }
4547
4548
        if (
4549 11
            $encoding === '1' // only a fallback, for non "strict_types" usage ...
4550
            ||
4551 11
            $encoding === '0' // only a fallback, for non "strict_types" usage ...
4552
        ) {
4553
            return $fallback;
4554
        }
4555
4556 11
        if (isset($STATIC_NORMALIZE_ENCODING_CACHE[$encoding])) {
4557 8
            return $STATIC_NORMALIZE_ENCODING_CACHE[$encoding];
4558
        }
4559
4560 5
        if (self::$ENCODINGS === null) {
4561 1
            self::$ENCODINGS = self::getData('encodings');
4562
        }
4563
4564 5
        if (\in_array($encoding, self::$ENCODINGS, true)) {
4565 3
            $STATIC_NORMALIZE_ENCODING_CACHE[$encoding] = $encoding;
4566
4567 3
            return $encoding;
4568
        }
4569
4570 4
        $encoding_original = $encoding;
4571 4
        $encoding = \strtoupper($encoding);
4572 4
        $encoding_upper_helper = (string) \preg_replace('/[^a-zA-Z0-9]/u', '', $encoding);
4573
4574
        $equivalences = [
4575 4
            'ISO8859'     => 'ISO-8859-1',
4576
            'ISO88591'    => 'ISO-8859-1',
4577
            'ISO'         => 'ISO-8859-1',
4578
            'LATIN'       => 'ISO-8859-1',
4579
            'LATIN1'      => 'ISO-8859-1', // Western European
4580
            'ISO88592'    => 'ISO-8859-2',
4581
            'LATIN2'      => 'ISO-8859-2', // Central European
4582
            'ISO88593'    => 'ISO-8859-3',
4583
            'LATIN3'      => 'ISO-8859-3', // Southern European
4584
            'ISO88594'    => 'ISO-8859-4',
4585
            'LATIN4'      => 'ISO-8859-4', // Northern European
4586
            'ISO88595'    => 'ISO-8859-5',
4587
            'ISO88596'    => 'ISO-8859-6', // Greek
4588
            'ISO88597'    => 'ISO-8859-7',
4589
            'ISO88598'    => 'ISO-8859-8', // Hebrew
4590
            'ISO88599'    => 'ISO-8859-9',
4591
            'LATIN5'      => 'ISO-8859-9', // Turkish
4592
            'ISO885911'   => 'ISO-8859-11',
4593
            'TIS620'      => 'ISO-8859-11', // Thai
4594
            'ISO885910'   => 'ISO-8859-10',
4595
            'LATIN6'      => 'ISO-8859-10', // Nordic
4596
            'ISO885913'   => 'ISO-8859-13',
4597
            'LATIN7'      => 'ISO-8859-13', // Baltic
4598
            'ISO885914'   => 'ISO-8859-14',
4599
            'LATIN8'      => 'ISO-8859-14', // Celtic
4600
            'ISO885915'   => 'ISO-8859-15',
4601
            'LATIN9'      => 'ISO-8859-15', // Western European (with some extra chars e.g. €)
4602
            'ISO885916'   => 'ISO-8859-16',
4603
            'LATIN10'     => 'ISO-8859-16', // Southeast European
4604
            'CP1250'      => 'WINDOWS-1250',
4605
            'WIN1250'     => 'WINDOWS-1250',
4606
            'WINDOWS1250' => 'WINDOWS-1250',
4607
            'CP1251'      => 'WINDOWS-1251',
4608
            'WIN1251'     => 'WINDOWS-1251',
4609
            'WINDOWS1251' => 'WINDOWS-1251',
4610
            'CP1252'      => 'WINDOWS-1252',
4611
            'WIN1252'     => 'WINDOWS-1252',
4612
            'WINDOWS1252' => 'WINDOWS-1252',
4613
            'CP1253'      => 'WINDOWS-1253',
4614
            'WIN1253'     => 'WINDOWS-1253',
4615
            'WINDOWS1253' => 'WINDOWS-1253',
4616
            'CP1254'      => 'WINDOWS-1254',
4617
            'WIN1254'     => 'WINDOWS-1254',
4618
            'WINDOWS1254' => 'WINDOWS-1254',
4619
            'CP1255'      => 'WINDOWS-1255',
4620
            'WIN1255'     => 'WINDOWS-1255',
4621
            'WINDOWS1255' => 'WINDOWS-1255',
4622
            'CP1256'      => 'WINDOWS-1256',
4623
            'WIN1256'     => 'WINDOWS-1256',
4624
            'WINDOWS1256' => 'WINDOWS-1256',
4625
            'CP1257'      => 'WINDOWS-1257',
4626
            'WIN1257'     => 'WINDOWS-1257',
4627
            'WINDOWS1257' => 'WINDOWS-1257',
4628
            'CP1258'      => 'WINDOWS-1258',
4629
            'WIN1258'     => 'WINDOWS-1258',
4630
            'WINDOWS1258' => 'WINDOWS-1258',
4631
            'UTF16'       => 'UTF-16',
4632
            'UTF32'       => 'UTF-32',
4633
            'UTF8'        => 'UTF-8',
4634
            'UTF'         => 'UTF-8',
4635
            'UTF7'        => 'UTF-7',
4636
            '8BIT'        => 'CP850',
4637
            'BINARY'      => 'CP850',
4638
        ];
4639
4640 4
        if (!empty($equivalences[$encoding_upper_helper])) {
4641 3
            $encoding = $equivalences[$encoding_upper_helper];
4642
        }
4643
4644 4
        $STATIC_NORMALIZE_ENCODING_CACHE[$encoding_original] = $encoding;
4645
4646 4
        return $encoding;
4647
    }
4648
4649
    /**
4650
     * Standardize line ending to unix-like.
4651
     *
4652
     * @param string          $str      <p>The input string.</p>
4653
     * @param string|string[] $replacer <p>The replacer char e.g. "\n" (Linux) or "\r\n" (Windows). You can also use \PHP_EOL
4654
     *                                  here.</p>
4655
     *
4656
     * @psalm-pure
4657
     *
4658
     * @return string
4659
     *                <p>A string with normalized line ending.</p>
4660
     */
4661
    public static function normalize_line_ending(string $str, $replacer = "\n"): string
4662
    {
4663 4
        return \str_replace(["\r\n", "\r", "\n"], $replacer, $str);
4664
    }
4665
4666
    /**
4667
     * Normalize some MS Word special characters.
4668
     *
4669
     * EXAMPLE: <code>UTF8::normalize_msword('„Abcdef…”'); // '"Abcdef..."'</code>
4670
     *
4671
     * @param string $str <p>The string to be normalized.</p>
4672
     *
4673
     * @psalm-pure
4674
     *
4675
     * @return string
4676
     *                <p>A string with normalized characters for commonly used chars in Word documents.</p>
4677
     */
4678
    public static function normalize_msword(string $str): string
4679
    {
4680 10
        return ASCII::normalize_msword($str);
4681
    }
4682
4683
    /**
4684
     * Normalize the whitespace.
4685
     *
4686
     * EXAMPLE: <code>UTF8::normalize_whitespace("abc-\xc2\xa0-öäü-\xe2\x80\xaf-\xE2\x80\xAC", true); // "abc-\xc2\xa0-öäü- -"</code>
4687
     *
4688
     * @param string $str                          <p>The string to be normalized.</p>
4689
     * @param bool   $keep_non_breaking_space      [optional] <p>Set to true, to keep non-breaking-spaces.</p>
4690
     * @param bool   $keep_bidi_unicode_controls   [optional] <p>Set to true, to keep non-printable (for the web)
4691
     *                                             bidirectional text chars.</p>
4692
     * @param bool   $normalize_control_characters [optional] <p>Set to true, to convert e.g. LINE-, PARAGRAPH-SEPARATOR with "\n" and LINE TABULATION with "\t".</p>
4693
     *
4694
     * @psalm-pure
4695
     *
4696
     * @return string
4697
     *                <p>A string with normalized whitespace.</p>
4698
     */
4699
    public static function normalize_whitespace(
4700
        string $str,
4701
        bool $keep_non_breaking_space = false,
4702
        bool $keep_bidi_unicode_controls = false,
4703
        bool $normalize_control_characters = false
4704
    ): string {
4705 61
        return ASCII::normalize_whitespace(
4706 61
            $str,
4707 61
            $keep_non_breaking_space,
4708 61
            $keep_bidi_unicode_controls,
4709 61
            $normalize_control_characters
4710
        );
4711
    }
4712
4713
    /**
4714
     * Calculates Unicode code point of the given UTF-8 encoded character.
4715
     *
4716
     * INFO: opposite to UTF8::chr()
4717
     *
4718
     * EXAMPLE: <code>UTF8::ord('☃'); // 0x2603</code>
4719
     *
4720
     * @param string $chr      <p>The character of which to calculate code point.<p/>
4721
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
4722
     *
4723
     * @psalm-pure
4724
     *
4725
     * @return int
4726
     *             <p>Unicode code point of the given character,<br>
4727
     *             0 on invalid UTF-8 byte sequence</p>
4728
     */
4729
    public static function ord($chr, string $encoding = 'UTF-8'): int
4730
    {
4731
        /**
4732
         * @psalm-suppress ImpureStaticVariable
4733
         *
4734
         * @var array<string,int>
4735
         */
4736 27
        static $CHAR_CACHE = [];
4737
4738
        // init
4739 27
        $chr = (string) $chr;
4740
4741 27
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
4742 5
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
4743
        }
4744
4745 27
        $cache_key = $chr . '_' . $encoding;
4746 27
        if (isset($CHAR_CACHE[$cache_key])) {
4747 27
            return $CHAR_CACHE[$cache_key];
4748
        }
4749
4750
        // check again, if it's still not UTF-8
4751 11
        if ($encoding !== 'UTF-8') {
4752 3
            $chr = self::encode($encoding, $chr);
4753
        }
4754
4755 11
        if (self::$ORD === null) {
4756 1
            self::$ORD = self::getData('ord');
4757
        }
4758
4759 11
        if (isset(self::$ORD[$chr])) {
4760 11
            return $CHAR_CACHE[$cache_key] = self::$ORD[$chr];
4761
        }
4762
4763
        //
4764
        // fallback via "IntlChar"
4765
        //
4766
4767 6
        if (self::$SUPPORT['intlChar'] === true) {
4768 5
            $code = \IntlChar::ord($chr);
4769 5
            if ($code) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $code of type integer|null is loosely compared to true; this is ambiguous if the integer can be 0. You might want to explicitly use !== null instead.

In PHP, under loose comparison (like ==, or !=, or switch conditions), values of different types might be equal.

For integer values, zero is a special case, in particular the following results might be unexpected:

0   == false // true
0   == null  // true
123 == false // false
123 == null  // false

// It is often better to use strict comparison
0 === false // false
0 === null  // false
Loading history...
4770 5
                return $CHAR_CACHE[$cache_key] = $code;
4771
            }
4772
        }
4773
4774
        //
4775
        // fallback via vanilla php
4776
        //
4777
4778 1
        $chr = \unpack('C*', (string) \substr($chr, 0, 4));
4779
        /** @noinspection PhpSillyAssignmentInspection - hack for phpstan */
4780
        /** @var int[] $chr - "unpack": only false if the format string contains errors */
4781 1
        $chr = $chr;
4782 1
        $code = $chr ? $chr[1] : 0;
4783
4784 1
        if ($code >= 0xF0 && isset($chr[4])) {
4785
            return $CHAR_CACHE[$cache_key] = ((($code - 0xF0) << 18) + (($chr[2] - 0x80) << 12) + (($chr[3] - 0x80) << 6) + $chr[4] - 0x80);
4786
        }
4787
4788 1
        if ($code >= 0xE0 && isset($chr[3])) {
4789 1
            return $CHAR_CACHE[$cache_key] = ((($code - 0xE0) << 12) + (($chr[2] - 0x80) << 6) + $chr[3] - 0x80);
4790
        }
4791
4792 1
        if ($code >= 0xC0 && isset($chr[2])) {
4793 1
            return $CHAR_CACHE[$cache_key] = ((($code - 0xC0) << 6) + $chr[2] - 0x80);
4794
        }
4795
4796
        return $CHAR_CACHE[$cache_key] = $code;
4797
    }
4798
4799
    /**
4800
     * Parses the string into an array (into the the second parameter).
4801
     *
4802
     * WARNING: Unlike "parse_str()", this method does not (re-)place variables in the current scope,
4803
     *          if the second parameter is not set!
4804
     *
4805
     * EXAMPLE: <code>
4806
     * UTF8::parse_str('Iñtërnâtiônéàlizætiøn=測試&arr[]=foo+測試&arr[]=ການທົດສອບ', $array);
4807
     * echo $array['Iñtërnâtiônéàlizætiøn']; // '測試'
4808
     * </code>
4809
     *
4810
     * @see http://php.net/manual/en/function.parse-str.php
4811
     *
4812
     * @param string $str        <p>The input string.</p>
4813
     * @param array  $result     <p>The result will be returned into this reference parameter.</p>
4814
     * @param bool   $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
4815
     *
4816
     * @psalm-pure
4817
     *
4818
     * @return bool
4819
     *              <p>Will return <strong>false</strong> if php can't parse the string and we haven't any $result.</p>
4820
     */
4821
    public static function parse_str(string $str, &$result, bool $clean_utf8 = false): bool
4822
    {
4823 2
        if ($clean_utf8) {
4824 2
            $str = self::clean($str);
4825
        }
4826
4827 2
        if (self::$SUPPORT['mbstring'] === true) {
4828 2
            $return = \mb_parse_str($str, $result);
4829
4830 2
            return $return !== false && $result !== [];
4831
        }
4832
4833
        /**
4834
         * @psalm-suppress ImpureFunctionCall - we use the second parameter, so we don't change variables by magic
4835
         */
4836
        \parse_str($str, $result);
4837
4838
        return $result !== [];
4839
    }
4840
4841
    /**
4842
     * Checks if \u modifier is available that enables Unicode support in PCRE.
4843
     *
4844
     * @psalm-pure
4845
     *
4846
     * @return bool
4847
     *              <p>
4848
     *              <strong>true</strong> if support is available,<br>
4849
     *              <strong>false</strong> otherwise
4850
     *              </p>
4851
     */
4852
    public static function pcre_utf8_support(): bool
4853
    {
4854
        /** @noinspection PhpUsageOfSilenceOperatorInspection */
4855
        return (bool) @\preg_match('//u', '');
4856
    }
4857
4858
    /**
4859
     * Create an array containing a range of UTF-8 characters.
4860
     *
4861
     * EXAMPLE: <code>UTF8::range('κ', 'ζ'); // array('κ', 'ι', 'θ', 'η', 'ζ',)</code>
4862
     *
4863
     * @param int|string $var1      <p>Numeric or hexadecimal code points, or a UTF-8 character to start from.</p>
4864
     * @param int|string $var2      <p>Numeric or hexadecimal code points, or a UTF-8 character to end at.</p>
4865
     * @param bool       $use_ctype <p>use ctype to detect numeric and hexadecimal, otherwise we will use a simple
4866
     *                              "is_numeric"</p>
4867
     * @param string     $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
4868
     * @param float|int  $step      [optional] <p>
4869
     *                              If a step value is given, it will be used as the
4870
     *                              increment between elements in the sequence. step
4871
     *                              should be given as a positive number. If not specified,
4872
     *                              step will default to 1.
4873
     *                              </p>
4874
     *
4875
     * @psalm-pure
4876
     *
4877
     * @return string[]
4878
     */
4879
    public static function range(
4880
        $var1,
4881
        $var2,
4882
        bool $use_ctype = true,
4883
        string $encoding = 'UTF-8',
4884
        $step = 1
4885
    ): array {
4886 2
        if (!$var1 || !$var2) {
4887 2
            return [];
4888
        }
4889
4890 2
        if ($step !== 1) {
4891
            /**
4892
             * @psalm-suppress RedundantConditionGivenDocblockType
4893
             * @psalm-suppress DocblockTypeContradiction
4894
             */
4895 1
            if (!\is_numeric($step)) {
0 ignored issues
show
introduced by
The condition is_numeric($step) is always true.
Loading history...
4896
                throw new \InvalidArgumentException('$step need to be a number, type given: ' . \gettype($step));
4897
            }
4898
4899
            /**
4900
             * @psalm-suppress RedundantConditionGivenDocblockType - false-positive from psalm?
4901
             */
4902 1
            if ($step <= 0) {
4903
                throw new \InvalidArgumentException('$step need to be a positive number, given: ' . $step);
4904
            }
4905
        }
4906
4907 2
        if ($use_ctype && self::$SUPPORT['ctype'] === false) {
4908
            throw new \RuntimeException('ext-ctype: is not installed');
4909
        }
4910
4911 2
        $is_digit = false;
4912 2
        $is_xdigit = false;
4913
4914 2
        if ($use_ctype && \ctype_digit((string) $var1) && \ctype_digit((string) $var2)) {
4915 2
            $is_digit = true;
4916 2
            $start = (int) $var1;
4917 2
        } elseif ($use_ctype && \ctype_xdigit($var1) && \ctype_xdigit($var2)) {
4918
            $is_xdigit = true;
4919
            $start = (int) self::hex_to_int((string) $var1);
4920 2
        } elseif (!$use_ctype && \is_numeric($var1)) {
4921 1
            $start = (int) $var1;
4922
        } else {
4923 2
            $start = self::ord((string) $var1);
4924
        }
4925
4926 2
        if (!$start) {
4927
            return [];
4928
        }
4929
4930 2
        if ($is_digit) {
4931 2
            $end = (int) $var2;
4932 2
        } elseif ($is_xdigit) {
4933
            $end = (int) self::hex_to_int((string) $var2);
4934 2
        } elseif (!$use_ctype && \is_numeric($var2)) {
4935 1
            $end = (int) $var2;
4936
        } else {
4937 2
            $end = self::ord((string) $var2);
4938
        }
4939
4940 2
        if (!$end) {
4941
            return [];
4942
        }
4943
4944 2
        $array = [];
4945 2
        foreach (\range($start, $end, $step) as $i) {
4946 2
            $array[] = (string) self::chr((int) $i, $encoding);
4947
        }
4948
4949 2
        return $array;
4950
    }
4951
4952
    /**
4953
     * Multi decode HTML entity + fix urlencoded-win1252-chars.
4954
     *
4955
     * EXAMPLE: <code>UTF8::rawurldecode('tes%20öäü%20\u00edtest+test'); // 'tes öäü ítest+test'</code>
4956
     *
4957
     * e.g:
4958
     * 'test+test'                     => 'test+test'
4959
     * 'D&#252;sseldorf'               => 'Düsseldorf'
4960
     * 'D%FCsseldorf'                  => 'Düsseldorf'
4961
     * 'D&#xFC;sseldorf'               => 'Düsseldorf'
4962
     * 'D%26%23xFC%3Bsseldorf'         => 'Düsseldorf'
4963
     * 'Düsseldorf'                   => 'Düsseldorf'
4964
     * 'D%C3%BCsseldorf'               => 'Düsseldorf'
4965
     * 'D%C3%83%C2%BCsseldorf'         => 'Düsseldorf'
4966
     * 'D%25C3%2583%25C2%25BCsseldorf' => 'Düsseldorf'
4967
     *
4968
     * @param string $str          <p>The input string.</p>
4969
     * @param bool   $multi_decode <p>Decode as often as possible.</p>
4970
     *
4971
     * @psalm-pure
4972
     *
4973
     * @return string
4974
     *                <p>The decoded URL, as a string.</p>
4975
     */
4976
    public static function rawurldecode(string $str, bool $multi_decode = true): string
4977
    {
4978 6
        if ($str === '') {
4979 4
            return '';
4980
        }
4981
4982 6
        $str = self::urldecode_unicode_helper($str);
4983
4984 6
        if ($multi_decode) {
4985
            do {
4986 5
                $str_compare = $str;
4987
4988
                /**
4989
                 * @psalm-suppress PossiblyInvalidArgument
4990
                 */
4991 5
                $str = \rawurldecode(
4992 5
                    self::html_entity_decode(
4993 5
                        self::to_utf8($str),
4994 5
                        \ENT_QUOTES | \ENT_HTML5
4995
                    )
4996
                );
4997 5
            } while ($str_compare !== $str);
4998
        } else {
4999
            /**
5000
             * @psalm-suppress PossiblyInvalidArgument
5001
             */
5002 1
            $str = \rawurldecode(
5003 1
                self::html_entity_decode(
5004 1
                    self::to_utf8($str),
5005 1
                    \ENT_QUOTES | \ENT_HTML5
5006
                )
5007
            );
5008
        }
5009
5010 6
        return self::fix_simple_utf8($str);
5011
    }
5012
5013
    /**
5014
     * Replaces all occurrences of $pattern in $str by $replacement.
5015
     *
5016
     * @param string $str         <p>The input string.</p>
5017
     * @param string $pattern     <p>The regular expression pattern.</p>
5018
     * @param string $replacement <p>The string to replace with.</p>
5019
     * @param string $options     [optional] <p>Matching conditions to be used.</p>
5020
     * @param string $delimiter   [optional] <p>Delimiter the the regex. Default: '/'</p>
5021
     *
5022
     * @psalm-pure
5023
     *
5024
     * @return string
5025
     */
5026
    public static function regex_replace(
5027
        string $str,
5028
        string $pattern,
5029
        string $replacement,
5030
        string $options = '',
5031
        string $delimiter = '/'
5032
    ): string {
5033 18
        if ($options === 'msr') {
5034 9
            $options = 'ms';
5035
        }
5036
5037
        // fallback
5038 18
        if (!$delimiter) {
5039
            $delimiter = '/';
5040
        }
5041
5042 18
        return (string) \preg_replace(
5043 18
            $delimiter . $pattern . $delimiter . 'u' . $options,
5044 18
            $replacement,
5045 18
            $str
5046
        );
5047
    }
5048
5049
    /**
5050
     * Remove the BOM from UTF-8 / UTF-16 / UTF-32 strings.
5051
     *
5052
     * EXAMPLE: <code>UTF8::remove_bom("\xEF\xBB\xBFΜπορώ να"); // 'Μπορώ να'</code>
5053
     *
5054
     * @param string $str <p>The input string.</p>
5055
     *
5056
     * @psalm-pure
5057
     *
5058
     * @return string
5059
     *                <p>A string without UTF-BOM.</p>
5060
     */
5061
    public static function remove_bom(string $str): string
5062
    {
5063 54
        if ($str === '') {
5064 9
            return '';
5065
        }
5066
5067 54
        $str_length = \strlen($str);
5068 54
        foreach (self::$BOM as $bom_string => $bom_byte_length) {
5069 54
            if (\strncmp($str, $bom_string, $bom_byte_length) === 0) {
5070
                /** @var false|string $str_tmp - needed for PhpStan (stubs error) */
5071 9
                $str_tmp = \substr($str, $bom_byte_length, $str_length);
5072 9
                if ($str_tmp === false) {
5073
                    return '';
5074
                }
5075
5076 9
                $str_length -= $bom_byte_length;
5077
5078 54
                $str = (string) $str_tmp;
5079
            }
5080
        }
5081
5082 54
        return $str;
5083
    }
5084
5085
    /**
5086
     * Removes duplicate occurrences of a string in another string.
5087
     *
5088
     * EXAMPLE: <code>UTF8::remove_duplicates('öäü-κόσμεκόσμε-äöü', 'κόσμε'); // 'öäü-κόσμε-äöü'</code>
5089
     *
5090
     * @param string          $str  <p>The base string.</p>
5091
     * @param string|string[] $what <p>String to search for in the base string.</p>
5092
     *
5093
     * @psalm-pure
5094
     *
5095
     * @return string
5096
     *                <p>A string with removed duplicates.</p>
5097
     */
5098
    public static function remove_duplicates(string $str, $what = ' '): string
5099
    {
5100 2
        if (\is_string($what)) {
5101 2
            $what = [$what];
5102
        }
5103
5104
        /**
5105
         * @psalm-suppress RedundantConditionGivenDocblockType
5106
         */
5107 2
        if (\is_array($what)) {
0 ignored issues
show
introduced by
The condition is_array($what) is always true.
Loading history...
5108 2
            foreach ($what as $item) {
5109 2
                $str = (string) \preg_replace('/(' . \preg_quote($item, '/') . ')+/u', $item, $str);
5110
            }
5111
        }
5112
5113 2
        return $str;
5114
    }
5115
5116
    /**
5117
     * Remove html via "strip_tags()" from the string.
5118
     *
5119
     * @param string $str            <p>The input string.</p>
5120
     * @param string $allowable_tags [optional] <p>You can use the optional second parameter to specify tags which
5121
     *                               should not be stripped. Default: null
5122
     *                               </p>
5123
     *
5124
     * @psalm-pure
5125
     *
5126
     * @return string
5127
     *                <p>A string with without html tags.</p>
5128
     */
5129
    public static function remove_html(string $str, string $allowable_tags = ''): string
5130
    {
5131 6
        return \strip_tags($str, $allowable_tags);
5132
    }
5133
5134
    /**
5135
     * Remove all breaks [<br> | \r\n | \r | \n | ...] from the string.
5136
     *
5137
     * @param string $str         <p>The input string.</p>
5138
     * @param string $replacement [optional] <p>Default is a empty string.</p>
5139
     *
5140
     * @psalm-pure
5141
     *
5142
     * @return string
5143
     *                <p>A string without breaks.</p>
5144
     */
5145
    public static function remove_html_breaks(string $str, string $replacement = ''): string
5146
    {
5147 6
        return (string) \preg_replace("#/\r\n|\r|\n|<br.*/?>#isU", $replacement, $str);
5148
    }
5149
5150
    /**
5151
     * Remove invisible characters from a string.
5152
     *
5153
     * e.g.: This prevents sandwiching null characters between ascii characters, like Java\0script.
5154
     *
5155
     * EXAMPLE: <code>UTF8::remove_invisible_characters("κόσ\0με"); // 'κόσμε'</code>
5156
     *
5157
     * copy&past from https://github.com/bcit-ci/CodeIgniter/blob/develop/system/core/Common.php
5158
     *
5159
     * @param string $str                           <p>The input string.</p>
5160
     * @param bool   $url_encoded                   [optional] <p>
5161
     *                                              Try to remove url encoded control character.
5162
     *                                              WARNING: maybe contains false-positives e.g. aa%0Baa -> aaaa.
5163
     *                                              <br>
5164
     *                                              Default: false
5165
     *                                              </p>
5166
     * @param string $replacement                   [optional] <p>The replacement character.</p>
5167
     * @param bool   $keep_basic_control_characters [optional] <p>Keep control characters like [LRM] or [LSEP].</p>
5168
     *
5169
     * @psalm-pure
5170
     *
5171
     * @return string
5172
     *                <p>A string without invisible chars.</p>
5173
     */
5174
    public static function remove_invisible_characters(
5175
        string $str,
5176
        bool $url_encoded = false,
5177
        string $replacement = '',
5178
        bool $keep_basic_control_characters = true
5179
    ): string {
5180 92
        return ASCII::remove_invisible_characters(
5181 92
            $str,
5182 92
            $url_encoded,
5183 92
            $replacement,
5184 92
            $keep_basic_control_characters
5185
        );
5186
    }
5187
5188
    /**
5189
     * Returns a new string with the prefix $substring removed, if present.
5190
     *
5191
     * @param string $str       <p>The input string.</p>
5192
     * @param string $substring <p>The prefix to remove.</p>
5193
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
5194
     *
5195
     * @psalm-pure
5196
     *
5197
     * @return string
5198
     *                <p>A string without the prefix $substring.</p>
5199
     */
5200
    public static function remove_left(
5201
        string $str,
5202
        string $substring,
5203
        string $encoding = 'UTF-8'
5204
    ): string {
5205
        if (
5206 12
            $substring
5207
            &&
5208 12
            \strpos($str, $substring) === 0
5209
        ) {
5210 6
            if ($encoding === 'UTF-8') {
5211 4
                return (string) \mb_substr(
5212 4
                    $str,
5213 4
                    (int) \mb_strlen($substring)
5214
                );
5215
            }
5216
5217 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
5218
5219 2
            return (string) self::substr(
5220 2
                $str,
5221 2
                (int) self::strlen($substring, $encoding),
5222 2
                null,
5223 2
                $encoding
5224
            );
5225
        }
5226
5227 6
        return $str;
5228
    }
5229
5230
    /**
5231
     * Returns a new string with the suffix $substring removed, if present.
5232
     *
5233
     * @param string $str
5234
     * @param string $substring <p>The suffix to remove.</p>
5235
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
5236
     *
5237
     * @psalm-pure
5238
     *
5239
     * @return string
5240
     *                <p>A string having a $str without the suffix $substring.</p>
5241
     */
5242
    public static function remove_right(
5243
        string $str,
5244
        string $substring,
5245
        string $encoding = 'UTF-8'
5246
    ): string {
5247 12
        if ($substring && \substr($str, -\strlen($substring)) === $substring) {
5248 6
            if ($encoding === 'UTF-8') {
5249 4
                return (string) \mb_substr(
5250 4
                    $str,
5251 4
                    0,
5252 4
                    (int) \mb_strlen($str) - (int) \mb_strlen($substring)
5253
                );
5254
            }
5255
5256 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
5257
5258 2
            return (string) self::substr(
5259 2
                $str,
5260 2
                0,
5261 2
                (int) self::strlen($str, $encoding) - (int) self::strlen($substring, $encoding),
5262 2
                $encoding
5263
            );
5264
        }
5265
5266 6
        return $str;
5267
    }
5268
5269
    /**
5270
     * Replaces all occurrences of $search in $str by $replacement.
5271
     *
5272
     * @param string $str            <p>The input string.</p>
5273
     * @param string $search         <p>The needle to search for.</p>
5274
     * @param string $replacement    <p>The string to replace with.</p>
5275
     * @param bool   $case_sensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
5276
     *
5277
     * @psalm-pure
5278
     *
5279
     * @return string
5280
     *                <p>A string with replaced parts.</p>
5281
     */
5282
    public static function replace(
5283
        string $str,
5284
        string $search,
5285
        string $replacement,
5286
        bool $case_sensitive = true
5287
    ): string {
5288 29
        if ($case_sensitive) {
5289 22
            return \str_replace($search, $replacement, $str);
5290
        }
5291
5292 7
        return self::str_ireplace($search, $replacement, $str);
0 ignored issues
show
Bug Best Practice introduced by
The expression return self::str_ireplac...ch, $replacement, $str) could return the type string[] which is incompatible with the type-hinted return string. Consider adding an additional type-check to rule them out.
Loading history...
5293
    }
5294
5295
    /**
5296
     * Replaces all occurrences of $search in $str by $replacement.
5297
     *
5298
     * @param string       $str            <p>The input string.</p>
5299
     * @param array        $search         <p>The elements to search for.</p>
5300
     * @param array|string $replacement    <p>The string to replace with.</p>
5301
     * @param bool         $case_sensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
5302
     *
5303
     * @psalm-pure
5304
     *
5305
     * @return string
5306
     *                <p>A string with replaced parts.</p>
5307
     */
5308
    public static function replace_all(
5309
        string $str,
5310
        array $search,
5311
        $replacement,
5312
        bool $case_sensitive = true
5313
    ): string {
5314 30
        if ($case_sensitive) {
5315 23
            return \str_replace($search, $replacement, $str);
5316
        }
5317
5318 7
        return self::str_ireplace($search, $replacement, $str);
0 ignored issues
show
Bug Best Practice introduced by
The expression return self::str_ireplac...ch, $replacement, $str) could return the type string[] which is incompatible with the type-hinted return string. Consider adding an additional type-check to rule them out.
Loading history...
5319
    }
5320
5321
    /**
5322
     * Replace the diamond question mark (�) and invalid-UTF8 chars with the replacement.
5323
     *
5324
     * EXAMPLE: <code>UTF8::replace_diamond_question_mark('中文空白�', ''); // '中文空白'</code>
5325
     *
5326
     * @param string $str                        <p>The input string</p>
5327
     * @param string $replacement_char           <p>The replacement character.</p>
5328
     * @param bool   $process_invalid_utf8_chars <p>Convert invalid UTF-8 chars </p>
5329
     *
5330
     * @psalm-pure
5331
     *
5332
     * @return string
5333
     *                <p>A string without diamond question marks (�).</p>
5334
     */
5335
    public static function replace_diamond_question_mark(
5336
        string $str,
5337
        string $replacement_char = '',
5338
        bool $process_invalid_utf8_chars = true
5339
    ): string {
5340 35
        if ($str === '') {
5341 9
            return '';
5342
        }
5343
5344 35
        if ($process_invalid_utf8_chars) {
5345 35
            if ($replacement_char === '') {
5346 35
                $replacement_char_helper = 'none';
5347
            } else {
5348 2
                $replacement_char_helper = \ord($replacement_char);
5349
            }
5350
5351 35
            if (self::$SUPPORT['mbstring'] === false) {
5352
                // if there is no native support for "mbstring",
5353
                // then we need to clean the string before ...
5354
                $str = self::clean($str);
5355
            }
5356
5357
            /**
5358
             * @psalm-suppress ImpureFunctionCall - we will reset the value in the next step
5359
             */
5360 35
            $save = \mb_substitute_character();
5361
            /** @noinspection PhpUsageOfSilenceOperatorInspection - ignore "Unknown character" warnings, it's working anyway */
5362 35
            @\mb_substitute_character($replacement_char_helper);
0 ignored issues
show
Security Best Practice introduced by
It seems like you do not handle an error condition for mb_substitute_character(). This can introduce security issues, and is generally not recommended. ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-unhandled  annotation

5362
            /** @scrutinizer ignore-unhandled */ @\mb_substitute_character($replacement_char_helper);

If you suppress an error, we recommend checking for the error condition explicitly:

// For example instead of
@mkdir($dir);

// Better use
if (@mkdir($dir) === false) {
    throw new \RuntimeException('The directory '.$dir.' could not be created.');
}
Loading history...
5363
            // the polyfill maybe return false, so cast to string
5364 35
            $str = (string) \mb_convert_encoding($str, 'UTF-8', 'UTF-8');
5365 35
            \mb_substitute_character($save);
0 ignored issues
show
Bug introduced by
It seems like $save can also be of type true; however, parameter $substitute_character of mb_substitute_character() does only seem to accept integer|null|string, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

5365
            \mb_substitute_character(/** @scrutinizer ignore-type */ $save);
Loading history...
5366
        }
5367
5368 35
        return \str_replace(
5369
            [
5370 35
                "\xEF\xBF\xBD",
5371
                '�',
5372
            ],
5373
            [
5374 35
                $replacement_char,
5375 35
                $replacement_char,
5376
            ],
5377 35
            $str
5378
        );
5379
    }
5380
5381
    /**
5382
     * Strip whitespace or other characters from the end of a UTF-8 string.
5383
     *
5384
     * EXAMPLE: <code>UTF8::rtrim('-ABC-中文空白-  '); // '-ABC-中文空白-'</code>
5385
     *
5386
     * @param string      $str   <p>The string to be trimmed.</p>
5387
     * @param string|null $chars <p>Optional characters to be stripped.</p>
5388
     *
5389
     * @psalm-pure
5390
     *
5391
     * @return string
5392
     *                <p>A string with unwanted characters stripped from the right.</p>
5393
     */
5394
    public static function rtrim(string $str = '', string $chars = null): string
5395
    {
5396 21
        if ($str === '') {
5397 3
            return '';
5398
        }
5399
5400 20
        if (self::$SUPPORT['mbstring'] === true) {
5401 20
            if ($chars !== null) {
5402
                /** @noinspection PregQuoteUsageInspection */
5403 9
                $chars = \preg_quote($chars);
5404 9
                $pattern = "[${chars}]+$";
5405
            } else {
5406 14
                $pattern = '[\\s]+$';
5407
            }
5408
5409 20
            return (string) \mb_ereg_replace($pattern, '', $str);
5410
        }
5411
5412
        if ($chars !== null) {
5413
            $chars = \preg_quote($chars, '/');
5414
            $pattern = "[${chars}]+$";
5415
        } else {
5416
            $pattern = '[\\s]+$';
5417
        }
5418
5419
        return self::regex_replace($str, $pattern, '');
5420
    }
5421
5422
    /**
5423
     * WARNING: Print native UTF-8 support (libs) by default, e.g. for debugging.
5424
     *
5425
     * @param bool $useEcho
5426
     *
5427
     * @psalm-pure
5428
     *
5429
     * @return string|void
5430
     */
5431
    public static function showSupport(bool $useEcho = true)
5432
    {
5433
        // init
5434 2
        $html = '';
5435
5436 2
        $html .= '<pre>';
5437 2
        foreach (self::$SUPPORT as $key => &$value) {
5438 2
            $html .= $key . ' - ' . \print_r($value, true) . "\n<br>";
0 ignored issues
show
Bug introduced by
Are you sure print_r($value, true) of type string|true can be used in concatenation? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

5438
            $html .= $key . ' - ' . /** @scrutinizer ignore-type */ \print_r($value, true) . "\n<br>";
Loading history...
5439
        }
5440 2
        $html .= '</pre>';
5441
5442 2
        if ($useEcho) {
5443 1
            echo $html;
5444
        }
5445
5446 2
        return $html;
5447
    }
5448
5449
    /**
5450
     * Converts a UTF-8 character to HTML Numbered Entity like "&#123;".
5451
     *
5452
     * EXAMPLE: <code>UTF8::single_chr_html_encode('κ'); // '&#954;'</code>
5453
     *
5454
     * @param string $char             <p>The Unicode character to be encoded as numbered entity.</p>
5455
     * @param bool   $keep_ascii_chars <p>Set to <strong>true</strong> to keep ASCII chars.</>
5456
     * @param string $encoding         [optional] <p>Set the charset for e.g. "mb_" function</p>
5457
     *
5458
     * @psalm-pure
5459
     *
5460
     * @return string
5461
     *                <p>The HTML numbered entity for the given character.</p>
5462
     */
5463
    public static function single_chr_html_encode(
5464
        string $char,
5465
        bool $keep_ascii_chars = false,
5466
        string $encoding = 'UTF-8'
5467
    ): string {
5468 2
        if ($char === '') {
5469 2
            return '';
5470
        }
5471
5472
        if (
5473 2
            $keep_ascii_chars
5474
            &&
5475 2
            ASCII::is_ascii($char)
5476
        ) {
5477 2
            return $char;
5478
        }
5479
5480 2
        return '&#' . self::ord($char, $encoding) . ';';
5481
    }
5482
5483
    /**
5484
     * @param string $str
5485
     * @param int    $tab_length
5486
     *
5487
     * @psalm-pure
5488
     *
5489
     * @return string
5490
     */
5491
    public static function spaces_to_tabs(string $str, int $tab_length = 4): string
5492
    {
5493 5
        if ($tab_length === 4) {
5494 3
            $tab = '    ';
5495 2
        } elseif ($tab_length === 2) {
5496 1
            $tab = '  ';
5497
        } else {
5498 1
            $tab = \str_repeat(' ', $tab_length);
5499
        }
5500
5501 5
        return \str_replace($tab, "\t", $str);
5502
    }
5503
5504
    /**
5505
     * Returns a camelCase version of the string. Trims surrounding spaces,
5506
     * capitalizes letters following digits, spaces, dashes and underscores,
5507
     * and removes spaces, dashes, as well as underscores.
5508
     *
5509
     * @param string      $str                           <p>The input string.</p>
5510
     * @param string      $encoding                      [optional] <p>Default: 'UTF-8'</p>
5511
     * @param bool        $clean_utf8                    [optional] <p>Remove non UTF-8 chars from the string.</p>
5512
     * @param string|null $lang                          [optional] <p>Set the language for special cases: az, el, lt,
5513
     *                                                   tr</p>
5514
     * @param bool        $try_to_keep_the_string_length [optional] <p>true === try to keep the string length: e.g. ẞ
5515
     *                                                   -> ß</p>
5516
     *
5517
     * @psalm-pure
5518
     *
5519
     * @return string
5520
     */
5521
    public static function str_camelize(
5522
        string $str,
5523
        string $encoding = 'UTF-8',
5524
        bool $clean_utf8 = false,
5525
        string $lang = null,
5526
        bool $try_to_keep_the_string_length = false
5527
    ): string {
5528 32
        if ($clean_utf8) {
5529
            $str = self::clean($str);
5530
        }
5531
5532 32
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
5533 26
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
5534
        }
5535
5536 32
        $str = self::lcfirst(
5537 32
            \trim($str),
5538 32
            $encoding,
5539 32
            false,
5540 32
            $lang,
5541 32
            $try_to_keep_the_string_length
5542
        );
5543 32
        $str = (string) \preg_replace('/^[-_]+/', '', $str);
5544
5545 32
        $use_mb_functions = $lang === null && !$try_to_keep_the_string_length;
5546
5547 32
        $str = (string) \preg_replace_callback(
5548 32
            '/[-_\\s]+(.)?/u',
5549
            /**
5550
             * @param array $match
5551
             *
5552
             * @psalm-pure
5553
             *
5554
             * @return string
5555
             */
5556
            static function (array $match) use ($use_mb_functions, $encoding, $lang, $try_to_keep_the_string_length): string {
5557 27
                if (isset($match[1])) {
5558 27
                    if ($use_mb_functions) {
5559 27
                        if ($encoding === 'UTF-8') {
5560 27
                            return \mb_strtoupper($match[1]);
5561
                        }
5562
5563
                        return \mb_strtoupper($match[1], $encoding);
5564
                    }
5565
5566
                    return self::strtoupper($match[1], $encoding, false, $lang, $try_to_keep_the_string_length);
5567
                }
5568
5569 1
                return '';
5570 32
            },
5571 32
            $str
5572
        );
5573
5574 32
        return (string) \preg_replace_callback(
5575 32
            '/[\\p{N}]+(.)?/u',
5576
            /**
5577
             * @param array $match
5578
             *
5579
             * @psalm-pure
5580
             *
5581
             * @return string
5582
             */
5583
            static function (array $match) use ($use_mb_functions, $encoding, $clean_utf8, $lang, $try_to_keep_the_string_length): string {
5584 6
                if ($use_mb_functions) {
5585 6
                    if ($encoding === 'UTF-8') {
5586 6
                        return \mb_strtoupper($match[0]);
5587
                    }
5588
5589
                    return \mb_strtoupper($match[0], $encoding);
5590
                }
5591
5592
                return self::strtoupper($match[0], $encoding, $clean_utf8, $lang, $try_to_keep_the_string_length);
5593 32
            },
5594 32
            $str
5595
        );
5596
    }
5597
5598
    /**
5599
     * Returns the string with the first letter of each word capitalized,
5600
     * except for when the word is a name which shouldn't be capitalized.
5601
     *
5602
     * @param string $str
5603
     *
5604
     * @psalm-pure
5605
     *
5606
     * @return string
5607
     *                <p>A string with $str capitalized.</p>
5608
     */
5609
    public static function str_capitalize_name(string $str): string
5610
    {
5611 1
        return self::str_capitalize_name_helper(
5612 1
            self::str_capitalize_name_helper(
5613 1
                self::collapse_whitespace($str),
5614 1
                ' '
5615
            ),
5616 1
            '-'
5617
        );
5618
    }
5619
5620
    /**
5621
     * Returns true if the string contains $needle, false otherwise. By default
5622
     * the comparison is case-sensitive, but can be made insensitive by setting
5623
     * $case_sensitive to false.
5624
     *
5625
     * @param string $haystack       <p>The input string.</p>
5626
     * @param string $needle         <p>Substring to look for.</p>
5627
     * @param bool   $case_sensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
5628
     *
5629
     * @psalm-pure
5630
     *
5631
     * @return bool
5632
     *              <p>Whether or not $haystack contains $needle.</p>
5633
     */
5634
    public static function str_contains(
5635
        string $haystack,
5636
        string $needle,
5637
        bool $case_sensitive = true
5638
    ): bool {
5639 21
        if ($case_sensitive) {
5640 11
            if (\PHP_VERSION_ID >= 80000) {
5641
                /** @phpstan-ignore-next-line - only for PHP8 */
5642
                return \str_contains($haystack, $needle);
5643
            }
5644
5645 11
            return \strpos($haystack, $needle) !== false;
5646
        }
5647
5648 10
        return \mb_stripos($haystack, $needle) !== false;
5649
    }
5650
5651
    /**
5652
     * Returns true if the string contains all $needles, false otherwise. By
5653
     * default the comparison is case-sensitive, but can be made insensitive by
5654
     * setting $case_sensitive to false.
5655
     *
5656
     * @param string $haystack       <p>The input string.</p>
5657
     * @param array  $needles        <p>SubStrings to look for.</p>
5658
     * @param bool   $case_sensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
5659
     *
5660
     * @psalm-pure
5661
     *
5662
     * @return bool
5663
     *              <p>Whether or not $haystack contains $needle.</p>
5664
     */
5665
    public static function str_contains_all(
5666
        string $haystack,
5667
        array $needles,
5668
        bool $case_sensitive = true
5669
    ): bool {
5670 45
        if ($haystack === '' || $needles === []) {
5671 1
            return false;
5672
        }
5673
5674 44
        foreach ($needles as &$needle) {
5675 44
            if ($case_sensitive) {
5676 24
                if (!$needle || \strpos($haystack, $needle) === false) {
5677 12
                    return false;
5678
                }
5679
            }
5680
5681 33
            if (!$needle || \mb_stripos($haystack, $needle) === false) {
5682 33
                return false;
5683
            }
5684
        }
5685
5686 24
        return true;
5687
    }
5688
5689
    /**
5690
     * Returns true if the string contains any $needles, false otherwise. By
5691
     * default the comparison is case-sensitive, but can be made insensitive by
5692
     * setting $case_sensitive to false.
5693
     *
5694
     * @param string $haystack       <p>The input string.</p>
5695
     * @param array  $needles        <p>SubStrings to look for.</p>
5696
     * @param bool   $case_sensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
5697
     *
5698
     * @psalm-pure
5699
     *
5700
     * @return bool
5701
     *              <p>Whether or not $str contains $needle.</p>
5702
     */
5703
    public static function str_contains_any(
5704
        string $haystack,
5705
        array $needles,
5706
        bool $case_sensitive = true
5707
    ): bool {
5708 46
        if ($haystack === '' || $needles === []) {
5709 1
            return false;
5710
        }
5711
5712 45
        foreach ($needles as &$needle) {
5713 45
            if (!$needle) {
5714
                continue;
5715
            }
5716
5717 45
            if ($case_sensitive) {
5718 25
                if (\strpos($haystack, $needle) !== false) {
5719 14
                    return true;
5720
                }
5721
5722 13
                continue;
5723
            }
5724
5725 20
            if (\mb_stripos($haystack, $needle) !== false) {
5726 20
                return true;
5727
            }
5728
        }
5729
5730 19
        return false;
5731
    }
5732
5733
    /**
5734
     * Returns a lowercase and trimmed string separated by dashes. Dashes are
5735
     * inserted before uppercase characters (with the exception of the first
5736
     * character of the string), and in place of spaces as well as underscores.
5737
     *
5738
     * @param string $str      <p>The input string.</p>
5739
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
5740
     *
5741
     * @psalm-pure
5742
     *
5743
     * @return string
5744
     */
5745
    public static function str_dasherize(string $str, string $encoding = 'UTF-8'): string
5746
    {
5747 19
        return self::str_delimit($str, '-', $encoding);
5748
    }
5749
5750
    /**
5751
     * Returns a lowercase and trimmed string separated by the given delimiter.
5752
     * Delimiters are inserted before uppercase characters (with the exception
5753
     * of the first character of the string), and in place of spaces, dashes,
5754
     * and underscores. Alpha delimiters are not converted to lowercase.
5755
     *
5756
     * @param string      $str                           <p>The input string.</p>
5757
     * @param string      $delimiter                     <p>Sequence used to separate parts of the string.</p>
5758
     * @param string      $encoding                      [optional] <p>Set the charset for e.g. "mb_" function</p>
5759
     * @param bool        $clean_utf8                    [optional] <p>Remove non UTF-8 chars from the string.</p>
5760
     * @param string|null $lang                          [optional] <p>Set the language for special cases: az, el, lt,
5761
     *                                                   tr</p>
5762
     * @param bool        $try_to_keep_the_string_length [optional] <p>true === try to keep the string length: e.g. ẞ ->
5763
     *                                                   ß</p>
5764
     *
5765
     * @psalm-pure
5766
     *
5767
     * @return string
5768
     */
5769
    public static function str_delimit(
5770
        string $str,
5771
        string $delimiter,
5772
        string $encoding = 'UTF-8',
5773
        bool $clean_utf8 = false,
5774
        string $lang = null,
5775
        bool $try_to_keep_the_string_length = false
5776
    ): string {
5777 49
        if (self::$SUPPORT['mbstring'] === true) {
5778 49
            $str = (string) \mb_ereg_replace('\\B(\\p{Lu})', '-\1', \trim($str));
5779
5780 49
            $use_mb_functions = $lang === null && !$try_to_keep_the_string_length;
5781 49
            if ($use_mb_functions && $encoding === 'UTF-8') {
5782 22
                $str = \mb_strtolower($str);
5783
            } else {
5784 27
                $str = self::strtolower($str, $encoding, $clean_utf8, $lang, $try_to_keep_the_string_length);
5785
            }
5786
5787 49
            return (string) \mb_ereg_replace('[\\-_\\s]+', $delimiter, $str);
5788
        }
5789
5790
        $str = (string) \preg_replace('/\\B(\\p{Lu})/u', '-\1', \trim($str));
5791
5792
        $use_mb_functions = $lang === null && !$try_to_keep_the_string_length;
5793
        if ($use_mb_functions && $encoding === 'UTF-8') {
5794
            $str = \mb_strtolower($str);
5795
        } else {
5796
            $str = self::strtolower($str, $encoding, $clean_utf8, $lang, $try_to_keep_the_string_length);
5797
        }
5798
5799
        return (string) \preg_replace('/[\\-_\\s]+/u', $delimiter, $str);
5800
    }
5801
5802
    /**
5803
     * Optimized "mb_detect_encoding()"-function -> with support for UTF-16 and UTF-32.
5804
     *
5805
     * EXAMPLE: <code>
5806
     * UTF8::str_detect_encoding('中文空白'); // 'UTF-8'
5807
     * UTF8::str_detect_encoding('Abc'); // 'ASCII'
5808
     * </code>
5809
     *
5810
     * @param string $str <p>The input string.</p>
5811
     *
5812
     * @psalm-pure
5813
     *
5814
     * @return false|string
5815
     *                      <p>
5816
     *                      The detected string-encoding e.g. UTF-8 or UTF-16BE,<br>
5817
     *                      otherwise it will return false e.g. for BINARY or not detected encoding.
5818
     *                      </p>
5819
     */
5820
    public static function str_detect_encoding($str)
5821
    {
5822
        // init
5823 30
        $str = (string) $str;
5824
5825
        //
5826
        // 1.) check binary strings (010001001...) like UTF-16 / UTF-32 / PDF / Images / ...
5827
        //
5828
5829 30
        if (self::is_binary($str, self::string_has_bom($str) ? false : true)) {
5830 10
            $is_utf32 = self::is_utf32($str, false);
5831 10
            if ($is_utf32 === 1) {
5832
                return 'UTF-32LE';
5833
            }
5834 10
            if ($is_utf32 === 2) {
5835 1
                return 'UTF-32BE';
5836
            }
5837
5838 10
            $is_utf16 = self::is_utf16($str, false);
5839 10
            if ($is_utf16 === 1) {
5840 3
                return 'UTF-16LE';
5841
            }
5842 10
            if ($is_utf16 === 2) {
5843 2
                return 'UTF-16BE';
5844
            }
5845
5846
            // is binary but not "UTF-16" or "UTF-32"
5847 8
            return false;
5848
        }
5849
5850
        //
5851
        // 2.) simple check for ASCII chars
5852
        //
5853
5854 27
        if (ASCII::is_ascii($str)) {
5855 10
            return 'ASCII';
5856
        }
5857
5858
        //
5859
        // 3.) simple check for UTF-8 chars
5860
        //
5861
5862 27
        if (self::is_utf8_string($str)) {
5863 19
            return 'UTF-8';
5864
        }
5865
5866
        //
5867
        // 4.) check via "mb_detect_encoding()"
5868
        //
5869
        // INFO: UTF-16, UTF-32, UCS2 and UCS4, encoding detection will fail always with "mb_detect_encoding()"
5870
5871
        $encoding_detecting_order = [
5872 16
            'ISO-8859-1',
5873
            'ISO-8859-2',
5874
            'ISO-8859-3',
5875
            'ISO-8859-4',
5876
            'ISO-8859-5',
5877
            'ISO-8859-6',
5878
            'ISO-8859-7',
5879
            'ISO-8859-8',
5880
            'ISO-8859-9',
5881
            'ISO-8859-10',
5882
            'ISO-8859-13',
5883
            'ISO-8859-14',
5884
            'ISO-8859-15',
5885
            'ISO-8859-16',
5886
            'WINDOWS-1251',
5887
            'WINDOWS-1252',
5888
            'WINDOWS-1254',
5889
            'CP932',
5890
            'CP936',
5891
            'CP950',
5892
            'CP866',
5893
            'CP850',
5894
            'CP51932',
5895
            'CP50220',
5896
            'CP50221',
5897
            'CP50222',
5898
            'ISO-2022-JP',
5899
            'ISO-2022-KR',
5900
            'JIS',
5901
            'JIS-ms',
5902
            'EUC-CN',
5903
            'EUC-JP',
5904
        ];
5905
5906 16
        if (self::$SUPPORT['mbstring'] === true) {
5907
            // info: do not use the symfony polyfill here
5908 16
            $encoding = \mb_detect_encoding($str, $encoding_detecting_order, true);
5909 16
            if ($encoding) {
5910 16
                return $encoding;
5911
            }
5912
        }
5913
5914
        //
5915
        // 5.) check via "iconv()"
5916
        //
5917
5918
        if (self::$ENCODINGS === null) {
5919
            self::$ENCODINGS = self::getData('encodings');
5920
        }
5921
5922
        foreach (self::$ENCODINGS as $encoding_tmp) {
5923
            // INFO: //IGNORE but still throw notice
5924
            /** @noinspection PhpUsageOfSilenceOperatorInspection */
5925
            if ((string) @\iconv($encoding_tmp, $encoding_tmp . '//IGNORE', $str) === $str) {
5926
                return $encoding_tmp;
5927
            }
5928
        }
5929
5930
        return false;
5931
    }
5932
5933
    /**
5934
     * Check if the string ends with the given substring.
5935
     *
5936
     * EXAMPLE: <code>
5937
     * UTF8::str_ends_with('BeginMiddleΚόσμε', 'Κόσμε'); // true
5938
     * UTF8::str_ends_with('BeginMiddleΚόσμε', 'κόσμε'); // false
5939
     * </code>
5940
     *
5941
     * @param string $haystack <p>The string to search in.</p>
5942
     * @param string $needle   <p>The substring to search for.</p>
5943
     *
5944
     * @psalm-pure
5945
     *
5946
     * @return bool
5947
     */
5948
    public static function str_ends_with(string $haystack, string $needle): bool
5949
    {
5950 9
        if ($needle === '') {
5951 2
            return true;
5952
        }
5953
5954 9
        if ($haystack === '') {
5955
            return false;
5956
        }
5957
5958 9
        if (\PHP_VERSION_ID >= 80000) {
5959
            /** @phpstan-ignore-next-line - only for PHP8 */
5960
            return \str_ends_with($haystack, $needle);
5961
        }
5962
5963 9
        return \substr($haystack, -\strlen($needle)) === $needle;
5964
    }
5965
5966
    /**
5967
     * Returns true if the string ends with any of $substrings, false otherwise.
5968
     *
5969
     * - case-sensitive
5970
     *
5971
     * @param string   $str        <p>The input string.</p>
5972
     * @param string[] $substrings <p>Substrings to look for.</p>
5973
     *
5974
     * @psalm-pure
5975
     *
5976
     * @return bool
5977
     *              <p>Whether or not $str ends with $substring.</p>
5978
     */
5979
    public static function str_ends_with_any(string $str, array $substrings): bool
5980
    {
5981 7
        if ($substrings === []) {
5982
            return false;
5983
        }
5984
5985 7
        foreach ($substrings as &$substring) {
5986 7
            if (\substr($str, -\strlen($substring)) === $substring) {
5987 7
                return true;
5988
            }
5989
        }
5990
5991 6
        return false;
5992
    }
5993
5994
    /**
5995
     * Ensures that the string begins with $substring. If it doesn't, it's
5996
     * prepended.
5997
     *
5998
     * @param string $str       <p>The input string.</p>
5999
     * @param string $substring <p>The substring to add if not present.</p>
6000
     *
6001
     * @psalm-pure
6002
     *
6003
     * @return string
6004
     */
6005
    public static function str_ensure_left(string $str, string $substring): string
6006
    {
6007
        if (
6008 10
            $substring !== ''
6009
            &&
6010 10
            \strpos($str, $substring) === 0
6011
        ) {
6012 6
            return $str;
6013
        }
6014
6015 4
        return $substring . $str;
6016
    }
6017
6018
    /**
6019
     * Ensures that the string ends with $substring. If it doesn't, it's appended.
6020
     *
6021
     * @param string $str       <p>The input string.</p>
6022
     * @param string $substring <p>The substring to add if not present.</p>
6023
     *
6024
     * @psalm-pure
6025
     *
6026
     * @return string
6027
     */
6028
    public static function str_ensure_right(string $str, string $substring): string
6029
    {
6030
        if (
6031 10
            $str === ''
6032
            ||
6033 10
            $substring === ''
6034
            ||
6035 10
            \substr($str, -\strlen($substring)) !== $substring
6036
        ) {
6037 4
            $str .= $substring;
6038
        }
6039
6040 10
        return $str;
6041
    }
6042
6043
    /**
6044
     * Capitalizes the first word of the string, replaces underscores with
6045
     * spaces, and strips '_id'.
6046
     *
6047
     * @param string $str
6048
     *
6049
     * @psalm-pure
6050
     *
6051
     * @return string
6052
     */
6053
    public static function str_humanize($str): string
6054
    {
6055 3
        $str = \str_replace(
6056
            [
6057 3
                '_id',
6058
                '_',
6059
            ],
6060
            [
6061 3
                '',
6062
                ' ',
6063
            ],
6064 3
            $str
6065
        );
6066
6067 3
        return self::ucfirst(\trim($str));
6068
    }
6069
6070
    /**
6071
     * Check if the string ends with the given substring, case-insensitive.
6072
     *
6073
     * EXAMPLE: <code>
6074
     * UTF8::str_iends_with('BeginMiddleΚόσμε', 'Κόσμε'); // true
6075
     * UTF8::str_iends_with('BeginMiddleΚόσμε', 'κόσμε'); // true
6076
     * </code>
6077
     *
6078
     * @param string $haystack <p>The string to search in.</p>
6079
     * @param string $needle   <p>The substring to search for.</p>
6080
     *
6081
     * @psalm-pure
6082
     *
6083
     * @return bool
6084
     */
6085
    public static function str_iends_with(string $haystack, string $needle): bool
6086
    {
6087 12
        if ($needle === '') {
6088 2
            return true;
6089
        }
6090
6091 12
        if ($haystack === '') {
6092
            return false;
6093
        }
6094
6095 12
        return self::strcasecmp(\substr($haystack, -\strlen($needle)), $needle) === 0;
6096
    }
6097
6098
    /**
6099
     * Returns true if the string ends with any of $substrings, false otherwise.
6100
     *
6101
     * - case-insensitive
6102
     *
6103
     * @param string   $str        <p>The input string.</p>
6104
     * @param string[] $substrings <p>Substrings to look for.</p>
6105
     *
6106
     * @psalm-pure
6107
     *
6108
     * @return bool
6109
     *              <p>Whether or not $str ends with $substring.</p>
6110
     */
6111
    public static function str_iends_with_any(string $str, array $substrings): bool
6112
    {
6113 4
        if ($substrings === []) {
6114
            return false;
6115
        }
6116
6117 4
        foreach ($substrings as &$substring) {
6118 4
            if (self::str_iends_with($str, $substring)) {
6119 4
                return true;
6120
            }
6121
        }
6122
6123
        return false;
6124
    }
6125
6126
    /**
6127
     * Inserts $substring into the string at the $index provided.
6128
     *
6129
     * @param string $str       <p>The input string.</p>
6130
     * @param string $substring <p>String to be inserted.</p>
6131
     * @param int    $index     <p>The index at which to insert the substring.</p>
6132
     * @param string $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
6133
     *
6134
     * @psalm-pure
6135
     *
6136
     * @return string
6137
     */
6138
    public static function str_insert(
6139
        string $str,
6140
        string $substring,
6141
        int $index,
6142
        string $encoding = 'UTF-8'
6143
    ): string {
6144 8
        if ($encoding === 'UTF-8') {
6145 4
            $len = (int) \mb_strlen($str);
6146 4
            if ($index > $len) {
6147
                return $str;
6148
            }
6149
6150
            /** @noinspection UnnecessaryCastingInspection */
6151 4
            return (string) \mb_substr($str, 0, $index) .
6152 4
                   $substring .
6153 4
                   (string) \mb_substr($str, $index, $len);
6154
        }
6155
6156 4
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
6157
6158 4
        $len = (int) self::strlen($str, $encoding);
6159 4
        if ($index > $len) {
6160 1
            return $str;
6161
        }
6162
6163 3
        return ((string) self::substr($str, 0, $index, $encoding)) .
6164 3
               $substring .
6165 3
               ((string) self::substr($str, $index, $len, $encoding));
6166
    }
6167
6168
    /**
6169
     * Case-insensitive and UTF-8 safe version of <function>str_replace</function>.
6170
     *
6171
     * EXAMPLE: <code>
6172
     * UTF8::str_ireplace('lIzÆ', 'lise', 'Iñtërnâtiônàlizætiøn'); // 'Iñtërnâtiônàlisetiøn'
6173
     * </code>
6174
     *
6175
     * @see http://php.net/manual/en/function.str-ireplace.php
6176
     *
6177
     * @param string|string[] $search      <p>
6178
     *                                     Every replacement with search array is
6179
     *                                     performed on the result of previous replacement.
6180
     *                                     </p>
6181
     * @param string|string[] $replacement <p>The replacement.</p>
6182
     * @param string|string[] $subject     <p>
6183
     *                                     If subject is an array, then the search and
6184
     *                                     replace is performed with every entry of
6185
     *                                     subject, and the return value is an array as
6186
     *                                     well.
6187
     *                                     </p>
6188
     * @param int             $count       [optional] <p>
6189
     *                                     The number of matched and replaced needles will
6190
     *                                     be returned in count which is passed by
6191
     *                                     reference.
6192
     *                                     </p>
6193
     *
6194
     * @psalm-pure
6195
     *
6196
     * @return string|string[]
6197
     *                         <p>A string or an array of replacements.</p>
6198
     *
6199
     * @template TStrIReplaceSubject
6200
     * @phpstan-param TStrIReplaceSubject $subject
6201
     * @phpstan-return TStrIReplaceSubject
6202
     */
6203
    public static function str_ireplace($search, $replacement, $subject, &$count = null)
6204
    {
6205 29
        $search = (array) $search;
6206
6207
        /** @noinspection AlterInForeachInspection */
6208 29
        foreach ($search as &$s) {
6209 29
            $s = (string) $s;
6210 29
            if ($s === '') {
6211 6
                $s = '/^(?<=.)$/';
6212
            } else {
6213 29
                $s = '/' . \preg_quote($s, '/') . '/ui';
6214
            }
6215
        }
6216
6217
        // fallback
6218
        /** @phpstan-ignore-next-line - only a fallback for PHP8 */
6219 29
        if ($replacement === null) {
0 ignored issues
show
introduced by
The condition $replacement === null is always false.
Loading history...
6220 1
            $replacement = '';
6221
        }
6222
        /** @phpstan-ignore-next-line - only a fallback for PHP8 */
6223 29
        if ($subject === null) {
6224 1
            $subject = '';
6225
        }
6226
6227
        /**
6228
         * @psalm-suppress PossiblyNullArgument
6229
         * @phpstan-var TStrIReplaceSubject $subject
6230
         */
6231 29
        $subject = \preg_replace($search, $replacement, $subject, -1, $count);
6232
6233 29
        return $subject;
6234
    }
6235
6236
    /**
6237
     * Replaces $search from the beginning of string with $replacement.
6238
     *
6239
     * @param string $str         <p>The input string.</p>
6240
     * @param string $search      <p>The string to search for.</p>
6241
     * @param string $replacement <p>The replacement.</p>
6242
     *
6243
     * @psalm-pure
6244
     *
6245
     * @return string
6246
     *                <p>The string after the replacement.</p>
6247
     */
6248
    public static function str_ireplace_beginning(string $str, string $search, string $replacement): string
6249
    {
6250 17
        if ($str === '') {
6251 4
            if ($replacement === '') {
6252 2
                return '';
6253
            }
6254
6255 2
            if ($search === '') {
6256 2
                return $replacement;
6257
            }
6258
        }
6259
6260 13
        if ($search === '') {
6261 2
            return $str . $replacement;
6262
        }
6263
6264 11
        $searchLength = \strlen($search);
6265 11
        if (\strncasecmp($str, $search, $searchLength) === 0) {
6266 10
            return $replacement . \substr($str, $searchLength);
6267
        }
6268
6269 1
        return $str;
6270
    }
6271
6272
    /**
6273
     * Replaces $search from the ending of string with $replacement.
6274
     *
6275
     * @param string $str         <p>The input string.</p>
6276
     * @param string $search      <p>The string to search for.</p>
6277
     * @param string $replacement <p>The replacement.</p>
6278
     *
6279
     * @psalm-pure
6280
     *
6281
     * @return string
6282
     *                <p>The string after the replacement.</p>
6283
     */
6284
    public static function str_ireplace_ending(string $str, string $search, string $replacement): string
6285
    {
6286 17
        if ($str === '') {
6287 4
            if ($replacement === '') {
6288 2
                return '';
6289
            }
6290
6291 2
            if ($search === '') {
6292 2
                return $replacement;
6293
            }
6294
        }
6295
6296 13
        if ($search === '') {
6297 2
            return $str . $replacement;
6298
        }
6299
6300 11
        if (\stripos($str, $search, \strlen($str) - \strlen($search)) !== false) {
6301 9
            $str = \substr($str, 0, -\strlen($search)) . $replacement;
6302
        }
6303
6304 11
        return $str;
6305
    }
6306
6307
    /**
6308
     * Check if the string starts with the given substring, case-insensitive.
6309
     *
6310
     * EXAMPLE: <code>
6311
     * UTF8::str_istarts_with('ΚόσμεMiddleEnd', 'Κόσμε'); // true
6312
     * UTF8::str_istarts_with('ΚόσμεMiddleEnd', 'κόσμε'); // true
6313
     * </code>
6314
     *
6315
     * @param string $haystack <p>The string to search in.</p>
6316
     * @param string $needle   <p>The substring to search for.</p>
6317
     *
6318
     * @psalm-pure
6319
     *
6320
     * @return bool
6321
     */
6322
    public static function str_istarts_with(string $haystack, string $needle): bool
6323
    {
6324 13
        if ($needle === '') {
6325 2
            return true;
6326
        }
6327
6328 13
        if ($haystack === '') {
6329
            return false;
6330
        }
6331
6332 13
        return self::stripos($haystack, $needle) === 0;
6333
    }
6334
6335
    /**
6336
     * Returns true if the string begins with any of $substrings, false otherwise.
6337
     *
6338
     * - case-insensitive
6339
     *
6340
     * @param string $str        <p>The input string.</p>
6341
     * @param array  $substrings <p>Substrings to look for.</p>
6342
     *
6343
     * @psalm-pure
6344
     *
6345
     * @return bool
6346
     *              <p>Whether or not $str starts with $substring.</p>
6347
     */
6348
    public static function str_istarts_with_any(string $str, array $substrings): bool
6349
    {
6350 5
        if ($str === '') {
6351
            return false;
6352
        }
6353
6354 5
        if ($substrings === []) {
6355
            return false;
6356
        }
6357
6358 5
        foreach ($substrings as &$substring) {
6359 5
            if (self::str_istarts_with($str, $substring)) {
6360 5
                return true;
6361
            }
6362
        }
6363
6364 1
        return false;
6365
    }
6366
6367
    /**
6368
     * Gets the substring after the first occurrence of a separator.
6369
     *
6370
     * @param string $str       <p>The input string.</p>
6371
     * @param string $separator <p>The string separator.</p>
6372
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
6373
     *
6374
     * @psalm-pure
6375
     *
6376
     * @return string
6377
     */
6378
    public static function str_isubstr_after_first_separator(
6379
        string $str,
6380
        string $separator,
6381
        string $encoding = 'UTF-8'
6382
    ): string {
6383 1
        if ($separator === '' || $str === '') {
6384 1
            return '';
6385
        }
6386
6387 1
        $offset = self::stripos($str, $separator);
6388 1
        if ($offset === false) {
6389 1
            return '';
6390
        }
6391
6392 1
        if ($encoding === 'UTF-8') {
6393 1
            return (string) \mb_substr(
6394 1
                $str,
6395 1
                $offset + (int) \mb_strlen($separator)
6396
            );
6397
        }
6398
6399
        return (string) self::substr(
6400
            $str,
6401
            $offset + (int) self::strlen($separator, $encoding),
6402
            null,
6403
            $encoding
6404
        );
6405
    }
6406
6407
    /**
6408
     * Gets the substring after the last occurrence of a separator.
6409
     *
6410
     * @param string $str       <p>The input string.</p>
6411
     * @param string $separator <p>The string separator.</p>
6412
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
6413
     *
6414
     * @psalm-pure
6415
     *
6416
     * @return string
6417
     */
6418
    public static function str_isubstr_after_last_separator(
6419
        string $str,
6420
        string $separator,
6421
        string $encoding = 'UTF-8'
6422
    ): string {
6423 1
        if ($separator === '' || $str === '') {
6424 1
            return '';
6425
        }
6426
6427 1
        $offset = self::strripos($str, $separator);
6428 1
        if ($offset === false) {
6429 1
            return '';
6430
        }
6431
6432 1
        if ($encoding === 'UTF-8') {
6433 1
            return (string) \mb_substr(
6434 1
                $str,
6435 1
                $offset + (int) self::strlen($separator)
6436
            );
6437
        }
6438
6439
        return (string) self::substr(
6440
            $str,
6441
            $offset + (int) self::strlen($separator, $encoding),
6442
            null,
6443
            $encoding
6444
        );
6445
    }
6446
6447
    /**
6448
     * Gets the substring before the first occurrence of a separator.
6449
     *
6450
     * @param string $str       <p>The input string.</p>
6451
     * @param string $separator <p>The string separator.</p>
6452
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
6453
     *
6454
     * @psalm-pure
6455
     *
6456
     * @return string
6457
     */
6458
    public static function str_isubstr_before_first_separator(
6459
        string $str,
6460
        string $separator,
6461
        string $encoding = 'UTF-8'
6462
    ): string {
6463 1
        if ($separator === '' || $str === '') {
6464 1
            return '';
6465
        }
6466
6467 1
        $offset = self::stripos($str, $separator);
6468 1
        if ($offset === false) {
6469 1
            return '';
6470
        }
6471
6472 1
        if ($encoding === 'UTF-8') {
6473 1
            return (string) \mb_substr($str, 0, $offset);
6474
        }
6475
6476
        return (string) self::substr($str, 0, $offset, $encoding);
6477
    }
6478
6479
    /**
6480
     * Gets the substring before the last occurrence of a separator.
6481
     *
6482
     * @param string $str       <p>The input string.</p>
6483
     * @param string $separator <p>The string separator.</p>
6484
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
6485
     *
6486
     * @psalm-pure
6487
     *
6488
     * @return string
6489
     */
6490
    public static function str_isubstr_before_last_separator(
6491
        string $str,
6492
        string $separator,
6493
        string $encoding = 'UTF-8'
6494
    ): string {
6495 1
        if ($separator === '' || $str === '') {
6496 1
            return '';
6497
        }
6498
6499 1
        if ($encoding === 'UTF-8') {
6500 1
            $offset = \mb_strripos($str, $separator);
6501 1
            if ($offset === false) {
6502 1
                return '';
6503
            }
6504
6505 1
            return (string) \mb_substr($str, 0, $offset);
6506
        }
6507
6508
        $offset = self::strripos($str, $separator, 0, $encoding);
6509
        if ($offset === false) {
6510
            return '';
6511
        }
6512
6513
        return (string) self::substr($str, 0, $offset, $encoding);
6514
    }
6515
6516
    /**
6517
     * Gets the substring after (or before via "$before_needle") the first occurrence of the "$needle".
6518
     *
6519
     * @param string $str           <p>The input string.</p>
6520
     * @param string $needle        <p>The string to look for.</p>
6521
     * @param bool   $before_needle [optional] <p>Default: false</p>
6522
     * @param string $encoding      [optional] <p>Default: 'UTF-8'</p>
6523
     *
6524
     * @psalm-pure
6525
     *
6526
     * @return string
6527
     */
6528
    public static function str_isubstr_first(
6529
        string $str,
6530
        string $needle,
6531
        bool $before_needle = false,
6532
        string $encoding = 'UTF-8'
6533
    ): string {
6534
        if (
6535 2
            $needle === ''
6536
            ||
6537 2
            $str === ''
6538
        ) {
6539 2
            return '';
6540
        }
6541
6542 2
        $part = self::stristr(
6543 2
            $str,
6544 2
            $needle,
6545 2
            $before_needle,
6546 2
            $encoding
6547
        );
6548 2
        if ($part === false) {
6549 2
            return '';
6550
        }
6551
6552 2
        return $part;
6553
    }
6554
6555
    /**
6556
     * Gets the substring after (or before via "$before_needle") the last occurrence of the "$needle".
6557
     *
6558
     * @param string $str           <p>The input string.</p>
6559
     * @param string $needle        <p>The string to look for.</p>
6560
     * @param bool   $before_needle [optional] <p>Default: false</p>
6561
     * @param string $encoding      [optional] <p>Default: 'UTF-8'</p>
6562
     *
6563
     * @psalm-pure
6564
     *
6565
     * @return string
6566
     */
6567
    public static function str_isubstr_last(
6568
        string $str,
6569
        string $needle,
6570
        bool $before_needle = false,
6571
        string $encoding = 'UTF-8'
6572
    ): string {
6573
        if (
6574 1
            $needle === ''
6575
            ||
6576 1
            $str === ''
6577
        ) {
6578 1
            return '';
6579
        }
6580
6581 1
        $part = self::strrichr(
6582 1
            $str,
6583 1
            $needle,
6584 1
            $before_needle,
6585 1
            $encoding
6586
        );
6587 1
        if ($part === false) {
6588 1
            return '';
6589
        }
6590
6591 1
        return $part;
6592
    }
6593
6594
    /**
6595
     * Returns the last $n characters of the string.
6596
     *
6597
     * @param string $str      <p>The input string.</p>
6598
     * @param int    $n        <p>Number of characters to retrieve from the end.</p>
6599
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6600
     *
6601
     * @psalm-pure
6602
     *
6603
     * @return string
6604
     */
6605
    public static function str_last_char(
6606
        string $str,
6607
        int $n = 1,
6608
        string $encoding = 'UTF-8'
6609
    ): string {
6610 12
        if ($str === '' || $n <= 0) {
6611 4
            return '';
6612
        }
6613
6614 8
        if ($encoding === 'UTF-8') {
6615 4
            return (string) \mb_substr($str, -$n);
6616
        }
6617
6618 4
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
6619
6620 4
        return (string) self::substr($str, -$n, null, $encoding);
6621
    }
6622
6623
    /**
6624
     * Limit the number of characters in a string.
6625
     *
6626
     * @param string $str        <p>The input string.</p>
6627
     * @param int    $length     [optional] <p>Default: 100</p>
6628
     * @param string $str_add_on [optional] <p>Default: …</p>
6629
     * @param string $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
6630
     *
6631
     * @psalm-pure
6632
     *
6633
     * @return string
6634
     */
6635
    public static function str_limit(
6636
        string $str,
6637
        int $length = 100,
6638
        string $str_add_on = '…',
6639
        string $encoding = 'UTF-8'
6640
    ): string {
6641 2
        if ($str === '' || $length <= 0) {
6642 2
            return '';
6643
        }
6644
6645 2
        if ($encoding === 'UTF-8') {
6646 2
            if ((int) \mb_strlen($str) <= $length) {
6647 2
                return $str;
6648
            }
6649
6650
            /** @noinspection UnnecessaryCastingInspection */
6651 2
            return (string) \mb_substr($str, 0, $length - (int) self::strlen($str_add_on)) . $str_add_on;
6652
        }
6653
6654
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
6655
6656
        if ((int) self::strlen($str, $encoding) <= $length) {
6657
            return $str;
6658
        }
6659
6660
        return ((string) self::substr($str, 0, $length - (int) self::strlen($str_add_on), $encoding)) . $str_add_on;
6661
    }
6662
6663
    /**
6664
     * Limit the number of characters in a string, but also after the next word.
6665
     *
6666
     * EXAMPLE: <code>UTF8::str_limit_after_word('fòô bàř fòô', 8, ''); // 'fòô bàř'</code>
6667
     *
6668
     * @param string $str        <p>The input string.</p>
6669
     * @param int    $length     [optional] <p>Default: 100</p>
6670
     * @param string $str_add_on [optional] <p>Default: …</p>
6671
     * @param string $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
6672
     *
6673
     * @psalm-pure
6674
     *
6675
     * @return string
6676
     */
6677
    public static function str_limit_after_word(
6678
        string $str,
6679
        int $length = 100,
6680
        string $str_add_on = '…',
6681
        string $encoding = 'UTF-8'
6682
    ): string {
6683 6
        if ($str === '' || $length <= 0) {
6684 2
            return '';
6685
        }
6686
6687 6
        if ($encoding === 'UTF-8') {
6688 2
            if ((int) \mb_strlen($str) <= $length) {
6689 2
                return $str;
6690
            }
6691
6692 2
            if (\mb_substr($str, $length - 1, 1) === ' ') {
6693 2
                return ((string) \mb_substr($str, 0, $length - 1)) . $str_add_on;
6694
            }
6695
6696 2
            $str = \mb_substr($str, 0, $length);
6697
6698 2
            $array = \explode(' ', $str, -1);
6699 2
            $new_str = \implode(' ', $array);
6700
6701 2
            if ($new_str === '') {
6702 2
                return ((string) \mb_substr($str, 0, $length - 1)) . $str_add_on;
6703
            }
6704
        } else {
6705 4
            if ((int) self::strlen($str, $encoding) <= $length) {
6706
                return $str;
6707
            }
6708
6709 4
            if (self::substr($str, $length - 1, 1, $encoding) === ' ') {
6710 3
                return ((string) self::substr($str, 0, $length - 1, $encoding)) . $str_add_on;
6711
            }
6712
6713
            /** @noinspection CallableParameterUseCaseInTypeContextInspection - FP */
6714 1
            $str = self::substr($str, 0, $length, $encoding);
6715 1
            if ($str === false) {
6716
                return '' . $str_add_on;
6717
            }
6718
6719 1
            $array = \explode(' ', $str, -1);
6720 1
            $new_str = \implode(' ', $array);
6721
6722 1
            if ($new_str === '') {
6723
                return ((string) self::substr($str, 0, $length - 1, $encoding)) . $str_add_on;
6724
            }
6725
        }
6726
6727 3
        return $new_str . $str_add_on;
6728
    }
6729
6730
    /**
6731
     * Returns the longest common prefix between the $str1 and $str2.
6732
     *
6733
     * @param string $str1     <p>The input sting.</p>
6734
     * @param string $str2     <p>Second string for comparison.</p>
6735
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6736
     *
6737
     * @psalm-pure
6738
     *
6739
     * @return string
6740
     */
6741
    public static function str_longest_common_prefix(
6742
        string $str1,
6743
        string $str2,
6744
        string $encoding = 'UTF-8'
6745
    ): string {
6746
        // init
6747 10
        $longest_common_prefix = '';
6748
6749 10
        if ($encoding === 'UTF-8') {
6750 5
            $max_length = (int) \min(
6751 5
                \mb_strlen($str1),
6752 5
                \mb_strlen($str2)
6753
            );
6754
6755 5
            for ($i = 0; $i < $max_length; ++$i) {
6756 4
                $char = \mb_substr($str1, $i, 1);
6757
6758
                if (
6759 4
                    $char !== false
6760
                    &&
6761 4
                    $char === \mb_substr($str2, $i, 1)
6762
                ) {
6763 3
                    $longest_common_prefix .= $char;
6764
                } else {
6765 3
                    break;
6766
                }
6767
            }
6768
        } else {
6769 5
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
6770
6771 5
            $max_length = (int) \min(
6772 5
                self::strlen($str1, $encoding),
6773 5
                self::strlen($str2, $encoding)
6774
            );
6775
6776 5
            for ($i = 0; $i < $max_length; ++$i) {
6777 4
                $char = self::substr($str1, $i, 1, $encoding);
6778
6779
                if (
6780 4
                    $char !== false
6781
                    &&
6782 4
                    $char === self::substr($str2, $i, 1, $encoding)
6783
                ) {
6784 3
                    $longest_common_prefix .= $char;
6785
                } else {
6786 3
                    break;
6787
                }
6788
            }
6789
        }
6790
6791 10
        return $longest_common_prefix;
6792
    }
6793
6794
    /**
6795
     * Returns the longest common substring between the $str1 and $str2.
6796
     * In the case of ties, it returns that which occurs first.
6797
     *
6798
     * @param string $str1
6799
     * @param string $str2     <p>Second string for comparison.</p>
6800
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6801
     *
6802
     * @psalm-pure
6803
     *
6804
     * @return string
6805
     *                <p>A string with its $str being the longest common substring.</p>
6806
     */
6807
    public static function str_longest_common_substring(
6808
        string $str1,
6809
        string $str2,
6810
        string $encoding = 'UTF-8'
6811
    ): string {
6812 11
        if ($str1 === '' || $str2 === '') {
6813 2
            return '';
6814
        }
6815
6816
        // Uses dynamic programming to solve
6817
        // http://en.wikipedia.org/wiki/Longest_common_substring_problem
6818
6819 9
        if ($encoding === 'UTF-8') {
6820 4
            $str_length = (int) \mb_strlen($str1);
6821 4
            $other_length = (int) \mb_strlen($str2);
6822
        } else {
6823 5
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
6824
6825 5
            $str_length = (int) self::strlen($str1, $encoding);
6826 5
            $other_length = (int) self::strlen($str2, $encoding);
6827
        }
6828
6829
        // Return if either string is empty
6830 9
        if ($str_length === 0 || $other_length === 0) {
6831
            return '';
6832
        }
6833
6834 9
        $len = 0;
6835 9
        $end = 0;
6836 9
        $table = \array_fill(
6837 9
            0,
6838 9
            $str_length + 1,
6839 9
            \array_fill(0, $other_length + 1, 0)
6840
        );
6841
6842 9
        if ($encoding === 'UTF-8') {
6843 9
            for ($i = 1; $i <= $str_length; ++$i) {
6844 9
                for ($j = 1; $j <= $other_length; ++$j) {
6845 9
                    $str_char = \mb_substr($str1, $i - 1, 1);
6846 9
                    $other_char = \mb_substr($str2, $j - 1, 1);
6847
6848 9
                    if ($str_char === $other_char) {
6849 8
                        $table[$i][$j] = $table[$i - 1][$j - 1] + 1;
6850 8
                        if ($table[$i][$j] > $len) {
6851 8
                            $len = $table[$i][$j];
6852 8
                            $end = $i;
6853
                        }
6854
                    } else {
6855 9
                        $table[$i][$j] = 0;
6856
                    }
6857
                }
6858
            }
6859
        } else {
6860
            for ($i = 1; $i <= $str_length; ++$i) {
6861
                for ($j = 1; $j <= $other_length; ++$j) {
6862
                    $str_char = self::substr($str1, $i - 1, 1, $encoding);
6863
                    $other_char = self::substr($str2, $j - 1, 1, $encoding);
6864
6865
                    if ($str_char === $other_char) {
6866
                        $table[$i][$j] = $table[$i - 1][$j - 1] + 1;
6867
                        if ($table[$i][$j] > $len) {
6868
                            $len = $table[$i][$j];
6869
                            $end = $i;
6870
                        }
6871
                    } else {
6872
                        $table[$i][$j] = 0;
6873
                    }
6874
                }
6875
            }
6876
        }
6877
6878 9
        if ($encoding === 'UTF-8') {
6879 9
            return (string) \mb_substr($str1, $end - $len, $len);
6880
        }
6881
6882
        return (string) self::substr($str1, $end - $len, $len, $encoding);
6883
    }
6884
6885
    /**
6886
     * Returns the longest common suffix between the $str1 and $str2.
6887
     *
6888
     * @param string $str1
6889
     * @param string $str2     <p>Second string for comparison.</p>
6890
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6891
     *
6892
     * @psalm-pure
6893
     *
6894
     * @return string
6895
     */
6896
    public static function str_longest_common_suffix(
6897
        string $str1,
6898
        string $str2,
6899
        string $encoding = 'UTF-8'
6900
    ): string {
6901 10
        if ($str1 === '' || $str2 === '') {
6902 2
            return '';
6903
        }
6904
6905 8
        if ($encoding === 'UTF-8') {
6906 4
            $max_length = (int) \min(
6907 4
                \mb_strlen($str1, $encoding),
6908 4
                \mb_strlen($str2, $encoding)
6909
            );
6910
6911 4
            $longest_common_suffix = '';
6912 4
            for ($i = 1; $i <= $max_length; ++$i) {
6913 4
                $char = \mb_substr($str1, -$i, 1);
6914
6915
                if (
6916 4
                    $char !== false
6917
                    &&
6918 4
                    $char === \mb_substr($str2, -$i, 1)
6919
                ) {
6920 3
                    $longest_common_suffix = $char . $longest_common_suffix;
6921
                } else {
6922 3
                    break;
6923
                }
6924
            }
6925
        } else {
6926 4
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
6927
6928 4
            $max_length = (int) \min(
6929 4
                self::strlen($str1, $encoding),
6930 4
                self::strlen($str2, $encoding)
6931
            );
6932
6933 4
            $longest_common_suffix = '';
6934 4
            for ($i = 1; $i <= $max_length; ++$i) {
6935 4
                $char = self::substr($str1, -$i, 1, $encoding);
6936
6937
                if (
6938 4
                    $char !== false
6939
                    &&
6940 4
                    $char === self::substr($str2, -$i, 1, $encoding)
6941
                ) {
6942 3
                    $longest_common_suffix = $char . $longest_common_suffix;
6943
                } else {
6944 3
                    break;
6945
                }
6946
            }
6947
        }
6948
6949 8
        return $longest_common_suffix;
6950
    }
6951
6952
    /**
6953
     * Returns true if $str matches the supplied pattern, false otherwise.
6954
     *
6955
     * @param string $str     <p>The input string.</p>
6956
     * @param string $pattern <p>Regex pattern to match against.</p>
6957
     *
6958
     * @psalm-pure
6959
     *
6960
     * @return bool
6961
     *              <p>Whether or not $str matches the pattern.</p>
6962
     */
6963
    public static function str_matches_pattern(string $str, string $pattern): bool
6964
    {
6965 10
        return (bool) \preg_match('/' . $pattern . '/u', $str);
6966
    }
6967
6968
    /**
6969
     * Returns whether or not a character exists at an index. Offsets may be
6970
     * negative to count from the last character in the string. Implements
6971
     * part of the ArrayAccess interface.
6972
     *
6973
     * @param string $str      <p>The input string.</p>
6974
     * @param int    $offset   <p>The index to check.</p>
6975
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6976
     *
6977
     * @psalm-pure
6978
     *
6979
     * @return bool
6980
     *              <p>Whether or not the index exists.</p>
6981
     */
6982
    public static function str_offset_exists(string $str, int $offset, string $encoding = 'UTF-8'): bool
6983
    {
6984
        // init
6985 6
        $length = (int) self::strlen($str, $encoding);
6986
6987 6
        if ($offset >= 0) {
6988 3
            return $length > $offset;
6989
        }
6990
6991 3
        return $length >= \abs($offset);
6992
    }
6993
6994
    /**
6995
     * Returns the character at the given index. Offsets may be negative to
6996
     * count from the last character in the string. Implements part of the
6997
     * ArrayAccess interface, and throws an OutOfBoundsException if the index
6998
     * does not exist.
6999
     *
7000
     * @param string $str      <p>The input string.</p>
7001
     * @param int    $index    <p>The <strong>index</strong> from which to retrieve the char.</p>
7002
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
7003
     *
7004
     * @throws \OutOfBoundsException if the positive or negative offset does not exist
7005
     *
7006
     * @return string
7007
     *                <p>The character at the specified index.</p>
7008
     *
7009
     * @psalm-pure
7010
     */
7011
    public static function str_offset_get(string $str, int $index, string $encoding = 'UTF-8'): string
7012
    {
7013
        // init
7014 2
        $length = (int) self::strlen($str);
7015
7016
        if (
7017 2
            ($index >= 0 && $length <= $index)
7018
            ||
7019 2
            $length < \abs($index)
7020
        ) {
7021 1
            throw new \OutOfBoundsException('No character exists at the index');
7022
        }
7023
7024 1
        return self::char_at($str, $index, $encoding);
7025
    }
7026
7027
    /**
7028
     * Pad a UTF-8 string to a given length with another string.
7029
     *
7030
     * EXAMPLE: <code>UTF8::str_pad('中文空白', 10, '_', STR_PAD_BOTH); // '___中文空白___'</code>
7031
     *
7032
     * @param string     $str        <p>The input string.</p>
7033
     * @param int        $pad_length <p>The length of return string.</p>
7034
     * @param string     $pad_string [optional] <p>String to use for padding the input string.</p>
7035
     * @param int|string $pad_type   [optional] <p>
7036
     *                               Can be <strong>STR_PAD_RIGHT</strong> (default), [or string "right"]<br>
7037
     *                               <strong>STR_PAD_LEFT</strong> [or string "left"] or<br>
7038
     *                               <strong>STR_PAD_BOTH</strong> [or string "both"]
7039
     *                               </p>
7040
     * @param string     $encoding   [optional] <p>Default: 'UTF-8'</p>
7041
     *
7042
     * @psalm-pure
7043
     *
7044
     * @return string
7045
     *                <p>Returns the padded string.</p>
7046
     */
7047
    public static function str_pad(
7048
        string $str,
7049
        int $pad_length,
7050
        string $pad_string = ' ',
7051
        $pad_type = \STR_PAD_RIGHT,
7052
        string $encoding = 'UTF-8'
7053
    ): string {
7054 41
        if ($pad_length === 0 || $pad_string === '') {
7055 1
            return $str;
7056
        }
7057
7058 41
        if ($pad_type !== (int) $pad_type) {
7059 13
            if ($pad_type === 'left') {
7060 3
                $pad_type = \STR_PAD_LEFT;
7061 10
            } elseif ($pad_type === 'right') {
7062 6
                $pad_type = \STR_PAD_RIGHT;
7063 4
            } elseif ($pad_type === 'both') {
7064 3
                $pad_type = \STR_PAD_BOTH;
7065
            } else {
7066 1
                throw new \InvalidArgumentException(
7067 1
                    'Pad expects $pad_type to be "STR_PAD_*" or ' . "to be one of 'left', 'right' or 'both'"
7068
                );
7069
            }
7070
        }
7071
7072 40
        if ($encoding === 'UTF-8') {
7073 25
            $str_length = (int) \mb_strlen($str);
7074
7075 25
            if ($pad_length >= $str_length) {
7076
                switch ($pad_type) {
7077 25
                    case \STR_PAD_LEFT:
7078 8
                        $ps_length = (int) \mb_strlen($pad_string);
7079
7080 8
                        $diff = ($pad_length - $str_length);
7081
7082 8
                        $pre = (string) \mb_substr(
7083 8
                            \str_repeat($pad_string, (int) \ceil($diff / $ps_length)),
7084 8
                            0,
7085 8
                            $diff
7086
                        );
7087 8
                        $post = '';
7088
7089 8
                        break;
7090
7091 20
                    case \STR_PAD_BOTH:
7092 14
                        $diff = ($pad_length - $str_length);
7093
7094 14
                        $ps_length_left = (int) \floor($diff / 2);
7095
7096 14
                        $ps_length_right = (int) \ceil($diff / 2);
7097
7098 14
                        $pre = (string) \mb_substr(
7099 14
                            \str_repeat($pad_string, $ps_length_left),
7100 14
                            0,
7101 14
                            $ps_length_left
7102
                        );
7103 14
                        $post = (string) \mb_substr(
7104 14
                            \str_repeat($pad_string, $ps_length_right),
7105 14
                            0,
7106 14
                            $ps_length_right
7107
                        );
7108
7109 14
                        break;
7110
7111 9
                    case \STR_PAD_RIGHT:
7112
                    default:
7113 9
                        $ps_length = (int) \mb_strlen($pad_string);
7114
7115 9
                        $diff = ($pad_length - $str_length);
7116
7117 9
                        $post = (string) \mb_substr(
7118 9
                            \str_repeat($pad_string, (int) \ceil($diff / $ps_length)),
7119 9
                            0,
7120 9
                            $diff
7121
                        );
7122 9
                        $pre = '';
7123
                }
7124
7125 25
                return $pre . $str . $post;
7126
            }
7127
7128 3
            return $str;
7129
        }
7130
7131 15
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
7132
7133 15
        $str_length = (int) self::strlen($str, $encoding);
7134
7135 15
        if ($pad_length >= $str_length) {
7136
            switch ($pad_type) {
7137 14
                case \STR_PAD_LEFT:
7138 5
                    $ps_length = (int) self::strlen($pad_string, $encoding);
7139
7140 5
                    $diff = ($pad_length - $str_length);
7141
7142 5
                    $pre = (string) self::substr(
7143 5
                        \str_repeat($pad_string, (int) \ceil($diff / $ps_length)),
7144 5
                        0,
7145 5
                        $diff,
7146 5
                        $encoding
7147
                    );
7148 5
                    $post = '';
7149
7150 5
                    break;
7151
7152 9
                case \STR_PAD_BOTH:
7153 3
                    $diff = ($pad_length - $str_length);
7154
7155 3
                    $ps_length_left = (int) \floor($diff / 2);
7156
7157 3
                    $ps_length_right = (int) \ceil($diff / 2);
7158
7159 3
                    $pre = (string) self::substr(
7160 3
                        \str_repeat($pad_string, $ps_length_left),
7161 3
                        0,
7162 3
                        $ps_length_left,
7163 3
                        $encoding
7164
                    );
7165 3
                    $post = (string) self::substr(
7166 3
                        \str_repeat($pad_string, $ps_length_right),
7167 3
                        0,
7168 3
                        $ps_length_right,
7169 3
                        $encoding
7170
                    );
7171
7172 3
                    break;
7173
7174 6
                case \STR_PAD_RIGHT:
7175
                default:
7176 6
                    $ps_length = (int) self::strlen($pad_string, $encoding);
7177
7178 6
                    $diff = ($pad_length - $str_length);
7179
7180 6
                    $post = (string) self::substr(
7181 6
                        \str_repeat($pad_string, (int) \ceil($diff / $ps_length)),
7182 6
                        0,
7183 6
                        $diff,
7184 6
                        $encoding
7185
                    );
7186 6
                    $pre = '';
7187
            }
7188
7189 14
            return $pre . $str . $post;
7190
        }
7191
7192 1
        return $str;
7193
    }
7194
7195
    /**
7196
     * Returns a new string of a given length such that both sides of the
7197
     * string are padded. Alias for "UTF8::str_pad()" with a $pad_type of 'both'.
7198
     *
7199
     * @param string $str
7200
     * @param int    $length   <p>Desired string length after padding.</p>
7201
     * @param string $pad_str  [optional] <p>String used to pad, defaults to space. Default: ' '</p>
7202
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
7203
     *
7204
     * @psalm-pure
7205
     *
7206
     * @return string
7207
     *                <p>The string with padding applied.</p>
7208
     */
7209
    public static function str_pad_both(
7210
        string $str,
7211
        int $length,
7212
        string $pad_str = ' ',
7213
        string $encoding = 'UTF-8'
7214
    ): string {
7215 11
        return self::str_pad(
7216 11
            $str,
7217 11
            $length,
7218 11
            $pad_str,
7219 11
            \STR_PAD_BOTH,
7220 11
            $encoding
7221
        );
7222
    }
7223
7224
    /**
7225
     * Returns a new string of a given length such that the beginning of the
7226
     * string is padded. Alias for "UTF8::str_pad()" with a $pad_type of 'left'.
7227
     *
7228
     * @param string $str
7229
     * @param int    $length   <p>Desired string length after padding.</p>
7230
     * @param string $pad_str  [optional] <p>String used to pad, defaults to space. Default: ' '</p>
7231
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
7232
     *
7233
     * @psalm-pure
7234
     *
7235
     * @return string
7236
     *                <p>The string with left padding.</p>
7237
     */
7238
    public static function str_pad_left(
7239
        string $str,
7240
        int $length,
7241
        string $pad_str = ' ',
7242
        string $encoding = 'UTF-8'
7243
    ): string {
7244 7
        return self::str_pad(
7245 7
            $str,
7246 7
            $length,
7247 7
            $pad_str,
7248 7
            \STR_PAD_LEFT,
7249 7
            $encoding
7250
        );
7251
    }
7252
7253
    /**
7254
     * Returns a new string of a given length such that the end of the string
7255
     * is padded. Alias for "UTF8::str_pad()" with a $pad_type of 'right'.
7256
     *
7257
     * @param string $str
7258
     * @param int    $length   <p>Desired string length after padding.</p>
7259
     * @param string $pad_str  [optional] <p>String used to pad, defaults to space. Default: ' '</p>
7260
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
7261
     *
7262
     * @psalm-pure
7263
     *
7264
     * @return string
7265
     *                <p>The string with right padding.</p>
7266
     */
7267
    public static function str_pad_right(
7268
        string $str,
7269
        int $length,
7270
        string $pad_str = ' ',
7271
        string $encoding = 'UTF-8'
7272
    ): string {
7273 7
        return self::str_pad(
7274 7
            $str,
7275 7
            $length,
7276 7
            $pad_str,
7277 7
            \STR_PAD_RIGHT,
7278 7
            $encoding
7279
        );
7280
    }
7281
7282
    /**
7283
     * Repeat a string.
7284
     *
7285
     * EXAMPLE: <code>UTF8::str_repeat("°~\xf0\x90\x28\xbc", 2); // '°~ð(¼°~ð(¼'</code>
7286
     *
7287
     * @param string $str        <p>
7288
     *                           The string to be repeated.
7289
     *                           </p>
7290
     * @param int    $multiplier <p>
7291
     *                           Number of time the input string should be
7292
     *                           repeated.
7293
     *                           </p>
7294
     *                           <p>
7295
     *                           multiplier has to be greater than or equal to 0.
7296
     *                           If the multiplier is set to 0, the function
7297
     *                           will return an empty string.
7298
     *                           </p>
7299
     *
7300
     * @psalm-pure
7301
     *
7302
     * @return string
7303
     *                <p>The repeated string.</p>
7304
     */
7305
    public static function str_repeat(string $str, int $multiplier): string
7306
    {
7307 9
        $str = self::filter($str);
7308
7309 9
        return \str_repeat($str, $multiplier);
7310
    }
7311
7312
    /**
7313
     * INFO: This is only a wrapper for "str_replace()"  -> the original functions is already UTF-8 safe.
7314
     *
7315
     * Replace all occurrences of the search string with the replacement string
7316
     *
7317
     * @see http://php.net/manual/en/function.str-replace.php
7318
     *
7319
     * @param string|string[] $search  <p>
7320
     *                                 The value being searched for, otherwise known as the needle.
7321
     *                                 An array may be used to designate multiple needles.
7322
     *                                 </p>
7323
     * @param string|string[] $replace <p>
7324
     *                                 The replacement value that replaces found search
7325
     *                                 values. An array may be used to designate multiple replacements.
7326
     *                                 </p>
7327
     * @param string|string[] $subject <p>
7328
     *                                 The string or array of strings being searched and replaced on,
7329
     *                                 otherwise known as the haystack.
7330
     *                                 </p>
7331
     *                                 <p>
7332
     *                                 If subject is an array, then the search and
7333
     *                                 replace is performed with every entry of
7334
     *                                 subject, and the return value is an array as
7335
     *                                 well.
7336
     *                                 </p>
7337
     * @param int|null        $count   [optional] <p>
7338
     *                                 If passed, this will hold the number of matched and replaced needles.
7339
     *                                 </p>
7340
     *
7341
     * @psalm-pure
7342
     *
7343
     * @return string|string[]
7344
     *                         <p>This function returns a string or an array with the replaced values.</p>
7345
     *
7346
     * @template TStrReplaceSubject
7347
     * @phpstan-param TStrReplaceSubject $subject
7348
     * @phpstan-return TStrReplaceSubject
7349
     *
7350
     * @deprecated please use \str_replace() instead
7351
     */
7352
    public static function str_replace(
7353
        $search,
7354
        $replace,
7355
        $subject,
7356
        int &$count = null
7357
    ) {
7358
        /**
7359
         * @psalm-suppress PossiblyNullArgument
7360
         * @phpstan-var TStrReplaceSubject $return;
7361
         */
7362 12
        $return = \str_replace(
7363 12
            $search,
7364 12
            $replace,
7365 12
            $subject,
7366 12
            $count
7367
        );
7368
7369 12
        return $return;
7370
    }
7371
7372
    /**
7373
     * Replaces $search from the beginning of string with $replacement.
7374
     *
7375
     * @param string $str         <p>The input string.</p>
7376
     * @param string $search      <p>The string to search for.</p>
7377
     * @param string $replacement <p>The replacement.</p>
7378
     *
7379
     * @psalm-pure
7380
     *
7381
     * @return string
7382
     *                <p>A string after the replacements.</p>
7383
     */
7384
    public static function str_replace_beginning(
7385
        string $str,
7386
        string $search,
7387
        string $replacement
7388
    ): string {
7389 17
        if ($str === '') {
7390 4
            if ($replacement === '') {
7391 2
                return '';
7392
            }
7393
7394 2
            if ($search === '') {
7395 2
                return $replacement;
7396
            }
7397
        }
7398
7399 13
        if ($search === '') {
7400 2
            return $str . $replacement;
7401
        }
7402
7403 11
        $searchLength = \strlen($search);
7404 11
        if (\strncmp($str, $search, $searchLength) === 0) {
7405 9
            return $replacement . \substr($str, $searchLength);
7406
        }
7407
7408 2
        return $str;
7409
    }
7410
7411
    /**
7412
     * Replaces $search from the ending of string with $replacement.
7413
     *
7414
     * @param string $str         <p>The input string.</p>
7415
     * @param string $search      <p>The string to search for.</p>
7416
     * @param string $replacement <p>The replacement.</p>
7417
     *
7418
     * @psalm-pure
7419
     *
7420
     * @return string
7421
     *                <p>A string after the replacements.</p>
7422
     */
7423
    public static function str_replace_ending(
7424
        string $str,
7425
        string $search,
7426
        string $replacement
7427
    ): string {
7428 17
        if ($str === '') {
7429 4
            if ($replacement === '') {
7430 2
                return '';
7431
            }
7432
7433 2
            if ($search === '') {
7434 2
                return $replacement;
7435
            }
7436
        }
7437
7438 13
        if ($search === '') {
7439 2
            return $str . $replacement;
7440
        }
7441
7442 11
        if (\strpos($str, $search, \strlen($str) - \strlen($search)) !== false) {
7443 8
            $str = \substr($str, 0, -\strlen($search)) . $replacement;
7444
        }
7445
7446 11
        return $str;
7447
    }
7448
7449
    /**
7450
     * Replace the first "$search"-term with the "$replace"-term.
7451
     *
7452
     * @param string $search
7453
     * @param string $replace
7454
     * @param string $subject
7455
     *
7456
     * @psalm-pure
7457
     *
7458
     * @return string
7459
     *
7460
     * @psalm-suppress InvalidReturnType
7461
     */
7462
    public static function str_replace_first(
7463
        string $search,
7464
        string $replace,
7465
        string $subject
7466
    ): string {
7467 2
        $pos = self::strpos($subject, $search);
7468
7469 2
        if ($pos !== false) {
7470
            /**
7471
             * @psalm-suppress InvalidReturnStatement
7472
             */
7473 2
            return self::substr_replace(
7474 2
                $subject,
7475 2
                $replace,
7476 2
                $pos,
7477 2
                (int) self::strlen($search)
7478
            );
7479
        }
7480
7481 2
        return $subject;
7482
    }
7483
7484
    /**
7485
     * Replace the last "$search"-term with the "$replace"-term.
7486
     *
7487
     * @param string $search
7488
     * @param string $replace
7489
     * @param string $subject
7490
     *
7491
     * @psalm-pure
7492
     *
7493
     * @return string
7494
     *
7495
     * @psalm-suppress InvalidReturnType
7496
     */
7497
    public static function str_replace_last(
7498
        string $search,
7499
        string $replace,
7500
        string $subject
7501
    ): string {
7502 2
        $pos = self::strrpos($subject, $search);
7503 2
        if ($pos !== false) {
7504
            /**
7505
             * @psalm-suppress InvalidReturnStatement
7506
             */
7507 2
            return self::substr_replace(
7508 2
                $subject,
7509 2
                $replace,
7510 2
                $pos,
7511 2
                (int) self::strlen($search)
7512
            );
7513
        }
7514
7515 2
        return $subject;
7516
    }
7517
7518
    /**
7519
     * Shuffles all the characters in the string.
7520
     *
7521
     * INFO: uses random algorithm which is weak for cryptography purposes
7522
     *
7523
     * EXAMPLE: <code>UTF8::str_shuffle('fòô bàř fòô'); // 'àòôřb ffòô '</code>
7524
     *
7525
     * @param string $str      <p>The input string</p>
7526
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
7527
     *
7528
     * @return string
7529
     *                <p>The shuffled string.</p>
7530
     */
7531
    public static function str_shuffle(string $str, string $encoding = 'UTF-8'): string
7532
    {
7533 5
        if ($encoding === 'UTF-8') {
7534 5
            $indexes = \range(0, (int) \mb_strlen($str) - 1);
7535 5
            \shuffle($indexes);
7536
7537
            // init
7538 5
            $shuffled_str = '';
7539
7540 5
            foreach ($indexes as &$i) {
7541 5
                $tmp_sub_str = \mb_substr($str, $i, 1);
7542 5
                if ($tmp_sub_str !== false) {
7543 5
                    $shuffled_str .= $tmp_sub_str;
7544
                }
7545
            }
7546
        } else {
7547
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
7548
7549
            $indexes = \range(0, (int) self::strlen($str, $encoding) - 1);
7550
            \shuffle($indexes);
7551
7552
            // init
7553
            $shuffled_str = '';
7554
7555
            foreach ($indexes as &$i) {
7556
                $tmp_sub_str = self::substr($str, $i, 1, $encoding);
7557
                if ($tmp_sub_str !== false) {
7558
                    $shuffled_str .= $tmp_sub_str;
7559
                }
7560
            }
7561
        }
7562
7563 5
        return $shuffled_str;
7564
    }
7565
7566
    /**
7567
     * Returns the substring beginning at $start, and up to, but not including
7568
     * the index specified by $end. If $end is omitted, the function extracts
7569
     * the remaining string. If $end is negative, it is computed from the end
7570
     * of the string.
7571
     *
7572
     * @param string   $str
7573
     * @param int      $start    <p>Initial index from which to begin extraction.</p>
7574
     * @param int|null $end      [optional] <p>Index at which to end extraction. Default: null</p>
7575
     * @param string   $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
7576
     *
7577
     * @psalm-pure
7578
     *
7579
     * @return false|string
7580
     *                      <p>The extracted substring.</p><p>If <i>str</i> is shorter than <i>start</i>
7581
     *                      characters long, <b>FALSE</b> will be returned.
7582
     */
7583
    public static function str_slice(
7584
        string $str,
7585
        int $start,
7586
        int $end = null,
7587
        string $encoding = 'UTF-8'
7588
    ) {
7589 18
        if ($encoding === 'UTF-8') {
7590 7
            if ($end === null) {
7591 1
                $length = (int) \mb_strlen($str);
7592 6
            } elseif ($end >= 0 && $end <= $start) {
7593 2
                return '';
7594 4
            } elseif ($end < 0) {
7595 1
                $length = (int) \mb_strlen($str) + $end - $start;
7596
            } else {
7597 3
                $length = $end - $start;
7598
            }
7599
7600 5
            return \mb_substr($str, $start, $length);
7601
        }
7602
7603 11
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
7604
7605 11
        if ($end === null) {
7606 5
            $length = (int) self::strlen($str, $encoding);
7607 6
        } elseif ($end >= 0 && $end <= $start) {
7608 2
            return '';
7609 4
        } elseif ($end < 0) {
7610 1
            $length = (int) self::strlen($str, $encoding) + $end - $start;
7611
        } else {
7612 3
            $length = $end - $start;
7613
        }
7614
7615 9
        return self::substr($str, $start, $length, $encoding);
7616
    }
7617
7618
    /**
7619
     * Convert a string to e.g.: "snake_case"
7620
     *
7621
     * @param string $str
7622
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
7623
     *
7624
     * @psalm-pure
7625
     *
7626
     * @return string
7627
     *                <p>A string in snake_case.</p>
7628
     */
7629
    public static function str_snakeize(string $str, string $encoding = 'UTF-8'): string
7630
    {
7631 22
        if ($str === '') {
7632
            return '';
7633
        }
7634
7635 22
        $str = \str_replace(
7636 22
            '-',
7637 22
            '_',
7638 22
            self::normalize_whitespace($str)
7639
        );
7640
7641 22
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
7642 19
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
7643
        }
7644
7645 22
        $str = (string) \preg_replace_callback(
7646 22
            '/([\\p{N}|\\p{Lu}])/u',
7647
            /**
7648
             * @param string[] $matches
7649
             *
7650
             * @psalm-pure
7651
             *
7652
             * @return string
7653
             */
7654
            static function (array $matches) use ($encoding): string {
7655 9
                $match = $matches[1];
7656 9
                $match_int = (int) $match;
7657
7658 9
                if ((string) $match_int === $match) {
7659 4
                    return '_' . $match . '_';
7660
                }
7661
7662 5
                if ($encoding === 'UTF-8') {
7663 5
                    return '_' . \mb_strtolower($match);
7664
                }
7665
7666
                return '_' . self::strtolower($match, $encoding);
7667 22
            },
7668 22
            $str
7669
        );
7670
7671 22
        $str = (string) \preg_replace(
7672
            [
7673 22
                '/\\s+/u',           // convert spaces to "_"
7674
                '/^\\s+|\\s+$/u', // trim leading & trailing spaces
7675
                '/_+/',                 // remove double "_"
7676
            ],
7677
            [
7678 22
                '_',
7679
                '',
7680
                '_',
7681
            ],
7682 22
            $str
7683
        );
7684
7685 22
        return \trim(\trim($str, '_')); // trim leading & trailing "_" + whitespace
7686
    }
7687
7688
    /**
7689
     * Sort all characters according to code points.
7690
     *
7691
     * EXAMPLE: <code>UTF8::str_sort('  -ABC-中文空白-  '); // '    ---ABC中文白空'</code>
7692
     *
7693
     * @param string $str    <p>A UTF-8 string.</p>
7694
     * @param bool   $unique <p>Sort unique. If <strong>true</strong>, repeated characters are ignored.</p>
7695
     * @param bool   $desc   <p>If <strong>true</strong>, will sort characters in reverse code point order.</p>
7696
     *
7697
     * @psalm-pure
7698
     *
7699
     * @return string
7700
     *                <p>A string of sorted characters.</p>
7701
     */
7702
    public static function str_sort(string $str, bool $unique = false, bool $desc = false): string
7703
    {
7704
        /** @var int[] $array */
7705 2
        $array = self::codepoints($str);
7706
7707 2
        if ($unique) {
7708 2
            $array = \array_flip(\array_flip($array));
7709
        }
7710
7711 2
        if ($desc) {
7712 2
            \arsort($array);
7713
        } else {
7714 2
            \asort($array);
7715
        }
7716
7717 2
        return self::string($array);
7718
    }
7719
7720
    /**
7721
     * Convert a string to an array of Unicode characters.
7722
     *
7723
     * EXAMPLE: <code>
7724
     * UTF8::str_split_array(['中文空白', 'test'], 2); // [['中文', '空白'], ['te', 'st']]
7725
     * </code>
7726
     *
7727
     * @param int[]|string[] $input                   <p>The string[] or int[] to split into array.</p>
7728
     * @param int            $length                  [optional] <p>Max character length of each array
7729
     *                                                lement.</p>
7730
     * @param bool           $clean_utf8              [optional] <p>Remove non UTF-8 chars from the
7731
     *                                                string.</p>
7732
     * @param bool           $try_to_use_mb_functions [optional] <p>Set to false, if you don't want to use
7733
     *                                                "mb_substr"</p>
7734
     *
7735
     * @psalm-pure
7736
     *
7737
     * @return string[][]
7738
     *                    <p>An array containing chunks of the input.</p>
7739
     */
7740
    public static function str_split_array(
7741
        array $input,
7742
        int $length = 1,
7743
        bool $clean_utf8 = false,
7744
        bool $try_to_use_mb_functions = true
7745
    ): array {
7746 1
        foreach ($input as &$v) {
7747 1
            $v = self::str_split(
7748 1
                $v,
7749 1
                $length,
7750 1
                $clean_utf8,
7751 1
                $try_to_use_mb_functions
7752
            );
7753
        }
7754
7755
        /** @var string[][] $input */
7756 1
        return $input;
7757
    }
7758
7759
    /**
7760
     * Convert a string to an array of unicode characters.
7761
     *
7762
     * EXAMPLE: <code>UTF8::str_split('中文空白'); // array('中', '文', '空', '白')</code>
7763
     *
7764
     * @param int|string $input                   <p>The string or int to split into array.</p>
7765
     * @param int        $length                  [optional] <p>Max character length of each array
7766
     *                                            element.</p>
7767
     * @param bool       $clean_utf8              [optional] <p>Remove non UTF-8 chars from the
7768
     *                                            string.</p>
7769
     * @param bool       $try_to_use_mb_functions [optional] <p>Set to false, if you don't want to use
7770
     *                                            "mb_substr"</p>
7771
     *
7772
     * @psalm-pure
7773
     *
7774
     * @return string[]
7775
     *                  <p>An array containing chunks of chars from the input.</p>
7776
     */
7777
    public static function str_split(
7778
        $input,
7779
        int $length = 1,
7780
        bool $clean_utf8 = false,
7781
        bool $try_to_use_mb_functions = true
7782
    ): array {
7783 90
        if ($length <= 0) {
7784 3
            return [];
7785
        }
7786
7787
        // this is only an old fallback
7788
        /** @noinspection PhpSillyAssignmentInspection - hack for phpstan */
7789
        /** @var int|int[]|string|string[] $input */
7790 89
        $input = $input;
7791 89
        if (\is_array($input)) {
7792
            /** @psalm-suppress InvalidReturnStatement */
7793
            /** @phpstan-ignore-next-line - old code :/ */
7794
            return self::str_split_array(
0 ignored issues
show
Bug Best Practice introduced by
The expression return self::str_split_a...ry_to_use_mb_functions) returns the type array<mixed,string[]> which is incompatible with the documented return type string[].
Loading history...
7795
                $input,
7796
                $length,
7797
                $clean_utf8,
7798
                $try_to_use_mb_functions
7799
            );
7800
        }
7801
7802
        // init
7803 89
        $input = (string) $input;
7804
7805 89
        if ($input === '') {
7806 14
            return [];
7807
        }
7808
7809 86
        if ($clean_utf8) {
7810 19
            $input = self::clean($input);
7811
        }
7812
7813
        if (
7814 86
            $try_to_use_mb_functions
7815
            &&
7816 86
            self::$SUPPORT['mbstring'] === true
7817
        ) {
7818 82
            if (\function_exists('mb_str_split')) {
7819
                /**
7820
                 * @psalm-suppress ImpureFunctionCall - why?
7821
                 */
7822 82
                $return = \mb_str_split($input, $length);
7823 82
                if ($return !== false) {
7824 82
                    return $return;
7825
                }
7826
            }
7827
7828
            $i_max = \mb_strlen($input);
7829
            if ($i_max <= 127) {
7830
                $ret = [];
7831
                for ($i = 0; $i < $i_max; ++$i) {
7832
                    $ret[] = \mb_substr($input, $i, 1);
7833
                }
7834
            } else {
7835
                $return_array = [];
7836
                \preg_match_all('/./us', $input, $return_array);
7837
                $ret = $return_array[0] ?? [];
7838
            }
7839 23
        } elseif (self::$SUPPORT['pcre_utf8'] === true) {
7840 17
            $return_array = [];
7841 17
            \preg_match_all('/./us', $input, $return_array);
7842 17
            $ret = $return_array[0] ?? [];
7843
        } else {
7844
7845
            // fallback
7846
7847 8
            $ret = [];
7848 8
            $len = \strlen($input);
7849
7850 8
            for ($i = 0; $i < $len; ++$i) {
7851 8
                if (($input[$i] & "\x80") === "\x00") {
7852 8
                    $ret[] = $input[$i];
7853
                } elseif (
7854 8
                    isset($input[$i + 1])
7855
                    &&
7856 8
                    ($input[$i] & "\xE0") === "\xC0"
7857
                ) {
7858 4
                    if (($input[$i + 1] & "\xC0") === "\x80") {
7859 4
                        $ret[] = $input[$i] . $input[$i + 1];
7860
7861 4
                        ++$i;
7862
                    }
7863
                } elseif (
7864 6
                    isset($input[$i + 2])
7865
                    &&
7866 6
                    ($input[$i] & "\xF0") === "\xE0"
7867
                ) {
7868
                    if (
7869 6
                        ($input[$i + 1] & "\xC0") === "\x80"
7870
                        &&
7871 6
                        ($input[$i + 2] & "\xC0") === "\x80"
7872
                    ) {
7873 6
                        $ret[] = $input[$i] . $input[$i + 1] . $input[$i + 2];
7874
7875 6
                        $i += 2;
7876
                    }
7877
                } elseif (
7878
                    isset($input[$i + 3])
7879
                    &&
7880
                    ($input[$i] & "\xF8") === "\xF0"
7881
                ) {
7882
                    if (
7883
                        ($input[$i + 1] & "\xC0") === "\x80"
7884
                        &&
7885
                        ($input[$i + 2] & "\xC0") === "\x80"
7886
                        &&
7887
                        ($input[$i + 3] & "\xC0") === "\x80"
7888
                    ) {
7889
                        $ret[] = $input[$i] . $input[$i + 1] . $input[$i + 2] . $input[$i + 3];
7890
7891
                        $i += 3;
7892
                    }
7893
                }
7894
            }
7895
        }
7896
7897 23
        if ($length > 1) {
7898 2
            return \array_map(
7899
                static function (array $item): string {
7900 2
                    return \implode('', $item);
7901 2
                },
7902 2
                \array_chunk($ret, $length)
7903
            );
7904
        }
7905
7906 23
        if (isset($ret[0]) && $ret[0] === '') {
7907
            return [];
7908
        }
7909
7910 23
        return $ret;
7911
    }
7912
7913
    /**
7914
     * Splits the string with the provided regular expression, returning an
7915
     * array of strings. An optional integer $limit will truncate the
7916
     * results.
7917
     *
7918
     * @param string $str
7919
     * @param string $pattern <p>The regex with which to split the string.</p>
7920
     * @param int    $limit   [optional] <p>Maximum number of results to return. Default: -1 === no limit</p>
7921
     *
7922
     * @psalm-pure
7923
     *
7924
     * @return string[]
7925
     *                  <p>An array of strings.</p>
7926
     */
7927
    public static function str_split_pattern(string $str, string $pattern, int $limit = -1): array
7928
    {
7929 16
        if ($limit === 0) {
7930 2
            return [];
7931
        }
7932
7933 14
        if ($pattern === '') {
7934 1
            return [$str];
7935
        }
7936
7937 13
        if (self::$SUPPORT['mbstring'] === true) {
7938 13
            if ($limit >= 0) {
7939 8
                $result_tmp = \mb_split($pattern, $str);
7940 8
                if ($result_tmp === false) {
7941
                    return [];
7942
                }
7943
7944 8
                $result = [];
7945 8
                foreach ($result_tmp as $item_tmp) {
7946 8
                    if ($limit === 0) {
7947 4
                        break;
7948
                    }
7949 8
                    --$limit;
7950
7951 8
                    $result[] = $item_tmp;
7952
                }
7953
7954 8
                return $result;
7955
            }
7956
7957 5
            $result = \mb_split($pattern, $str);
7958 5
            if ($result === false) {
7959
                return [];
7960
            }
7961
7962 5
            return $result;
7963
        }
7964
7965
        if ($limit > 0) {
7966
            ++$limit;
7967
        } else {
7968
            $limit = -1;
7969
        }
7970
7971
        $array = \preg_split('/' . \preg_quote($pattern, '/') . '/u', $str, $limit);
7972
        if ($array === false) {
7973
            return [];
7974
        }
7975
7976
        if ($limit > 0 && \count($array) === $limit) {
7977
            \array_pop($array);
7978
        }
7979
7980
        return $array;
7981
    }
7982
7983
    /**
7984
     * Check if the string starts with the given substring.
7985
     *
7986
     * EXAMPLE: <code>
7987
     * UTF8::str_starts_with('ΚόσμεMiddleEnd', 'Κόσμε'); // true
7988
     * UTF8::str_starts_with('ΚόσμεMiddleEnd', 'κόσμε'); // false
7989
     * </code>
7990
     *
7991
     * @param string $haystack <p>The string to search in.</p>
7992
     * @param string $needle   <p>The substring to search for.</p>
7993
     *
7994
     * @psalm-pure
7995
     *
7996
     * @return bool
7997
     */
7998
    public static function str_starts_with(string $haystack, string $needle): bool
7999
    {
8000 19
        if ($needle === '') {
8001 2
            return true;
8002
        }
8003
8004 19
        if ($haystack === '') {
8005
            return false;
8006
        }
8007
8008 19
        if (\PHP_VERSION_ID >= 80000) {
8009
            /** @phpstan-ignore-next-line - only for PHP8 */
8010
            return \str_starts_with($haystack, $needle);
8011
        }
8012
8013 19
        return \strncmp($haystack, $needle, \strlen($needle)) === 0;
8014
    }
8015
8016
    /**
8017
     * Returns true if the string begins with any of $substrings, false otherwise.
8018
     *
8019
     * - case-sensitive
8020
     *
8021
     * @param string $str        <p>The input string.</p>
8022
     * @param array  $substrings <p>Substrings to look for.</p>
8023
     *
8024
     * @psalm-pure
8025
     *
8026
     * @return bool
8027
     *              <p>Whether or not $str starts with $substring.</p>
8028
     */
8029
    public static function str_starts_with_any(string $str, array $substrings): bool
8030
    {
8031 8
        if ($str === '') {
8032
            return false;
8033
        }
8034
8035 8
        if ($substrings === []) {
8036
            return false;
8037
        }
8038
8039 8
        foreach ($substrings as &$substring) {
8040 8
            if (self::str_starts_with($str, $substring)) {
8041 8
                return true;
8042
            }
8043
        }
8044
8045 6
        return false;
8046
    }
8047
8048
    /**
8049
     * Gets the substring after the first occurrence of a separator.
8050
     *
8051
     * @param string $str       <p>The input string.</p>
8052
     * @param string $separator <p>The string separator.</p>
8053
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
8054
     *
8055
     * @psalm-pure
8056
     *
8057
     * @return string
8058
     */
8059
    public static function str_substr_after_first_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
8060
    {
8061 1
        if ($separator === '' || $str === '') {
8062 1
            return '';
8063
        }
8064
8065 1
        if ($encoding === 'UTF-8') {
8066 1
            $offset = \mb_strpos($str, $separator);
8067 1
            if ($offset === false) {
8068 1
                return '';
8069
            }
8070
8071 1
            return (string) \mb_substr(
8072 1
                $str,
8073 1
                $offset + (int) \mb_strlen($separator)
8074
            );
8075
        }
8076
8077
        $offset = self::strpos($str, $separator, 0, $encoding);
8078
        if ($offset === false) {
8079
            return '';
8080
        }
8081
8082
        return (string) \mb_substr(
8083
            $str,
8084
            $offset + (int) self::strlen($separator, $encoding),
8085
            null,
8086
            $encoding
8087
        );
8088
    }
8089
8090
    /**
8091
     * Gets the substring after the last occurrence of a separator.
8092
     *
8093
     * @param string $str       <p>The input string.</p>
8094
     * @param string $separator <p>The string separator.</p>
8095
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
8096
     *
8097
     * @psalm-pure
8098
     *
8099
     * @return string
8100
     */
8101
    public static function str_substr_after_last_separator(
8102
        string $str,
8103
        string $separator,
8104
        string $encoding = 'UTF-8'
8105
    ): string {
8106 1
        if ($separator === '' || $str === '') {
8107 1
            return '';
8108
        }
8109
8110 1
        if ($encoding === 'UTF-8') {
8111 1
            $offset = \mb_strrpos($str, $separator);
8112 1
            if ($offset === false) {
8113 1
                return '';
8114
            }
8115
8116 1
            return (string) \mb_substr(
8117 1
                $str,
8118 1
                $offset + (int) \mb_strlen($separator)
8119
            );
8120
        }
8121
8122
        $offset = self::strrpos($str, $separator, 0, $encoding);
8123
        if ($offset === false) {
8124
            return '';
8125
        }
8126
8127
        return (string) self::substr(
8128
            $str,
8129
            $offset + (int) self::strlen($separator, $encoding),
8130
            null,
8131
            $encoding
8132
        );
8133
    }
8134
8135
    /**
8136
     * Gets the substring before the first occurrence of a separator.
8137
     *
8138
     * @param string $str       <p>The input string.</p>
8139
     * @param string $separator <p>The string separator.</p>
8140
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
8141
     *
8142
     * @psalm-pure
8143
     *
8144
     * @return string
8145
     */
8146
    public static function str_substr_before_first_separator(
8147
        string $str,
8148
        string $separator,
8149
        string $encoding = 'UTF-8'
8150
    ): string {
8151 1
        if ($separator === '' || $str === '') {
8152 1
            return '';
8153
        }
8154
8155 1
        if ($encoding === 'UTF-8') {
8156 1
            $offset = \mb_strpos($str, $separator);
8157 1
            if ($offset === false) {
8158 1
                return '';
8159
            }
8160
8161 1
            return (string) \mb_substr(
8162 1
                $str,
8163 1
                0,
8164 1
                $offset
8165
            );
8166
        }
8167
8168
        $offset = self::strpos($str, $separator, 0, $encoding);
8169
        if ($offset === false) {
8170
            return '';
8171
        }
8172
8173
        return (string) self::substr(
8174
            $str,
8175
            0,
8176
            $offset,
8177
            $encoding
8178
        );
8179
    }
8180
8181
    /**
8182
     * Gets the substring before the last occurrence of a separator.
8183
     *
8184
     * @param string $str       <p>The input string.</p>
8185
     * @param string $separator <p>The string separator.</p>
8186
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
8187
     *
8188
     * @psalm-pure
8189
     *
8190
     * @return string
8191
     */
8192
    public static function str_substr_before_last_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
8193
    {
8194 1
        if ($separator === '' || $str === '') {
8195 1
            return '';
8196
        }
8197
8198 1
        if ($encoding === 'UTF-8') {
8199 1
            $offset = \mb_strrpos($str, $separator);
8200 1
            if ($offset === false) {
8201 1
                return '';
8202
            }
8203
8204 1
            return (string) \mb_substr(
8205 1
                $str,
8206 1
                0,
8207 1
                $offset
8208
            );
8209
        }
8210
8211
        $offset = self::strrpos($str, $separator, 0, $encoding);
8212
        if ($offset === false) {
8213
            return '';
8214
        }
8215
8216
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
8217
8218
        return (string) self::substr(
8219
            $str,
8220
            0,
8221
            $offset,
8222
            $encoding
8223
        );
8224
    }
8225
8226
    /**
8227
     * Gets the substring after (or before via "$before_needle") the first occurrence of the "$needle".
8228
     *
8229
     * @param string $str           <p>The input string.</p>
8230
     * @param string $needle        <p>The string to look for.</p>
8231
     * @param bool   $before_needle [optional] <p>Default: false</p>
8232
     * @param string $encoding      [optional] <p>Default: 'UTF-8'</p>
8233
     *
8234
     * @psalm-pure
8235
     *
8236
     * @return string
8237
     */
8238
    public static function str_substr_first(
8239
        string $str,
8240
        string $needle,
8241
        bool $before_needle = false,
8242
        string $encoding = 'UTF-8'
8243
    ): string {
8244 2
        if ($str === '' || $needle === '') {
8245 2
            return '';
8246
        }
8247
8248 2
        if ($encoding === 'UTF-8') {
8249 2
            if ($before_needle) {
8250 1
                $part = \mb_strstr(
8251 1
                    $str,
8252 1
                    $needle,
8253 1
                    $before_needle
8254
                );
8255
            } else {
8256 1
                $part = \mb_strstr(
8257 1
                    $str,
8258 2
                    $needle
8259
                );
8260
            }
8261
        } else {
8262
            $part = self::strstr(
8263
                $str,
8264
                $needle,
8265
                $before_needle,
8266
                $encoding
8267
            );
8268
        }
8269
8270 2
        return $part === false ? '' : $part;
8271
    }
8272
8273
    /**
8274
     * Gets the substring after (or before via "$before_needle") the last occurrence of the "$needle".
8275
     *
8276
     * @param string $str           <p>The input string.</p>
8277
     * @param string $needle        <p>The string to look for.</p>
8278
     * @param bool   $before_needle [optional] <p>Default: false</p>
8279
     * @param string $encoding      [optional] <p>Default: 'UTF-8'</p>
8280
     *
8281
     * @psalm-pure
8282
     *
8283
     * @return string
8284
     */
8285
    public static function str_substr_last(
8286
        string $str,
8287
        string $needle,
8288
        bool $before_needle = false,
8289
        string $encoding = 'UTF-8'
8290
    ): string {
8291 2
        if ($str === '' || $needle === '') {
8292 2
            return '';
8293
        }
8294
8295 2
        if ($encoding === 'UTF-8') {
8296 2
            if ($before_needle) {
8297 1
                $part = \mb_strrchr(
8298 1
                    $str,
8299 1
                    $needle,
8300 1
                    $before_needle
8301
                );
8302
            } else {
8303 1
                $part = \mb_strrchr(
8304 1
                    $str,
8305 2
                    $needle
8306
                );
8307
            }
8308
        } else {
8309
            $part = self::strrchr(
8310
                $str,
8311
                $needle,
8312
                $before_needle,
8313
                $encoding
8314
            );
8315
        }
8316
8317 2
        return $part === false ? '' : $part;
8318
    }
8319
8320
    /**
8321
     * Surrounds $str with the given substring.
8322
     *
8323
     * @param string $str
8324
     * @param string $substring <p>The substring to add to both sides.</p>
8325
     *
8326
     * @psalm-pure
8327
     *
8328
     * @return string
8329
     *                <p>A string with the substring both prepended and appended.</p>
8330
     */
8331
    public static function str_surround(string $str, string $substring): string
8332
    {
8333 5
        return $substring . $str . $substring;
8334
    }
8335
8336
    /**
8337
     * Returns a trimmed string with the first letter of each word capitalized.
8338
     * Also accepts an array, $ignore, allowing you to list words not to be
8339
     * capitalized.
8340
     *
8341
     * @param string              $str
8342
     * @param array|string[]|null $ignore                        [optional] <p>An array of words not to capitalize or
8343
     *                                                           null. Default: null</p>
8344
     * @param string              $encoding                      [optional] <p>Default: 'UTF-8'</p>
8345
     * @param bool                $clean_utf8                    [optional] <p>Remove non UTF-8 chars from the
8346
     *                                                           string.</p>
8347
     * @param string|null         $lang                          [optional] <p>Set the language for special cases: az,
8348
     *                                                           el, lt, tr</p>
8349
     * @param bool                $try_to_keep_the_string_length [optional] <p>true === try to keep the string length:
8350
     *                                                           e.g. ẞ -> ß</p>
8351
     * @param bool                $use_trim_first                [optional] <p>true === trim the input string,
8352
     *                                                           first</p>
8353
     * @param string|null         $word_define_chars             [optional] <p>An string of chars that will be used as
8354
     *                                                           whitespace separator === words.</p>
8355
     *
8356
     * @psalm-pure
8357
     *
8358
     * @return string
8359
     *                <p>The titleized string.</p>
8360
     */
8361
    public static function str_titleize(
8362
        string $str,
8363
        array $ignore = null,
8364
        string $encoding = 'UTF-8',
8365
        bool $clean_utf8 = false,
8366
        string $lang = null,
8367
        bool $try_to_keep_the_string_length = false,
8368
        bool $use_trim_first = true,
8369
        string $word_define_chars = null
8370
    ): string {
8371 10
        if ($str === '') {
8372
            return '';
8373
        }
8374
8375 10
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
8376 9
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
8377
        }
8378
8379 10
        if ($use_trim_first) {
8380 10
            $str = \trim($str);
8381
        }
8382
8383 10
        if ($clean_utf8) {
8384
            $str = self::clean($str);
8385
        }
8386
8387 10
        $use_mb_functions = $lang === null && !$try_to_keep_the_string_length;
8388
8389 10
        if ($word_define_chars) {
8390 4
            $word_define_chars = \preg_quote($word_define_chars, '/');
8391
        } else {
8392 6
            $word_define_chars = '';
8393
        }
8394
8395 10
        $str = (string) \preg_replace_callback(
8396 10
            '/([^\\s' . $word_define_chars . ']+)/u',
8397
            static function (array $match) use ($try_to_keep_the_string_length, $lang, $ignore, $use_mb_functions, $encoding): string {
8398 10
                if ($ignore !== null && \in_array($match[0], $ignore, true)) {
8399 4
                    return $match[0];
8400
                }
8401
8402 10
                if ($use_mb_functions) {
8403 10
                    if ($encoding === 'UTF-8') {
8404 10
                        return \mb_strtoupper(\mb_substr($match[0], 0, 1))
8405 10
                               . \mb_strtolower(\mb_substr($match[0], 1));
8406
                    }
8407
8408
                    return \mb_strtoupper(\mb_substr($match[0], 0, 1, $encoding), $encoding)
8409
                           . \mb_strtolower(\mb_substr($match[0], 1, null, $encoding), $encoding);
8410
                }
8411
8412
                return self::ucfirst(
8413
                    self::strtolower(
8414
                        $match[0],
8415
                        $encoding,
8416
                        false,
8417
                        $lang,
8418
                        $try_to_keep_the_string_length
8419
                    ),
8420
                    $encoding,
8421
                    false,
8422
                    $lang,
8423
                    $try_to_keep_the_string_length
8424
                );
8425 10
            },
8426 10
            $str
8427
        );
8428
8429 10
        return $str;
8430
    }
8431
8432
    /**
8433
     * Convert a string into a obfuscate string.
8434
     *
8435
     * EXAMPLE: <code>
8436
     *
8437
     * UTF8::str_obfuscate('[email protected]', 0.5, '*', ['@', '.']); // e.g. "l***@m**lleke*.*r*"
8438
     * </code>
8439
     *
8440
     * @param string   $str
8441
     * @param float    $percent
8442
     * @param string   $obfuscateChar
8443
     * @param string[] $keepChars
8444
     *
8445
     * @psalm-pure
8446
     *
8447
     * @return string
8448
     *                <p>The obfuscate string.</p>
8449
     */
8450
    public static function str_obfuscate(
8451
        string $str,
8452
        float $percent = 0.5,
8453
        string $obfuscateChar = '*',
8454
        array $keepChars = []
8455
    ): string {
8456 1
        $obfuscateCharHelper = "\u{2603}";
8457 1
        $str = \str_replace($obfuscateChar, $obfuscateCharHelper, $str);
8458
8459 1
        $chars = self::chars($str);
8460 1
        $charsMax = \count($chars);
8461 1
        $charsMaxChange = \round($charsMax * $percent);
8462 1
        $charsCounter = 0;
8463 1
        $charKeyDone = [];
8464
8465 1
        while ($charsCounter < $charsMaxChange) {
8466 1
            foreach ($chars as $charKey => $char) {
8467 1
                if (isset($charKeyDone[$charKey])) {
8468 1
                    continue;
8469
                }
8470
8471 1
                if (\random_int(0, 100) > 50) {
8472 1
                    continue;
8473
                }
8474
8475 1
                if ($char === $obfuscateChar) {
8476
                    continue;
8477
                }
8478
8479 1
                ++$charsCounter;
8480 1
                $charKeyDone[$charKey] = true;
8481
8482 1
                if ($charsCounter > $charsMaxChange) {
8483
                    break;
8484
                }
8485
8486 1
                if (\in_array($char, $keepChars, true)) {
8487 1
                    continue;
8488
                }
8489
8490 1
                $chars[$charKey] = $obfuscateChar;
8491
            }
8492
        }
8493
8494 1
        $str = \implode('', $chars);
8495
8496 1
        return \str_replace($obfuscateCharHelper, $obfuscateChar, $str);
8497
    }
8498
8499
    /**
8500
     * Returns a trimmed string in proper title case.
8501
     *
8502
     * Also accepts an array, $ignore, allowing you to list words not to be
8503
     * capitalized.
8504
     *
8505
     * Adapted from John Gruber's script.
8506
     *
8507
     * @see https://gist.github.com/gruber/9f9e8650d68b13ce4d78
8508
     *
8509
     * @param string $str
8510
     * @param array  $ignore   <p>An array of words not to capitalize.</p>
8511
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
8512
     *
8513
     * @psalm-pure
8514
     *
8515
     * @return string
8516
     *                <p>The titleized string.</p>
8517
     */
8518
    public static function str_titleize_for_humans(
8519
        string $str,
8520
        array $ignore = [],
8521
        string $encoding = 'UTF-8'
8522
    ): string {
8523 35
        if ($str === '') {
8524
            return '';
8525
        }
8526
8527
        $small_words = [
8528 35
            '(?<!q&)a',
8529
            'an',
8530
            'and',
8531
            'as',
8532
            'at(?!&t)',
8533
            'but',
8534
            'by',
8535
            'en',
8536
            'for',
8537
            'if',
8538
            'in',
8539
            'of',
8540
            'on',
8541
            'or',
8542
            'the',
8543
            'to',
8544
            'v[.]?',
8545
            'via',
8546
            'vs[.]?',
8547
        ];
8548
8549 35
        if ($ignore !== []) {
8550 1
            $small_words = \array_merge($small_words, $ignore);
8551
        }
8552
8553 35
        $small_words_rx = \implode('|', $small_words);
8554 35
        $apostrophe_rx = '(?x: [\'’] [[:lower:]]* )?';
8555
8556 35
        $str = \trim($str);
8557
8558 35
        if (!self::has_lowercase($str)) {
8559 2
            $str = self::strtolower($str, $encoding);
8560
        }
8561
8562
        // the main substitutions
8563 35
        $str = (string) \preg_replace_callback(
8564
            '~\\b (_*) (?:                                                           # 1. Leading underscore and
8565
                        ( (?<=[ ][/\\\\]) [[:alpha:]]+ [-_[:alpha:]/\\\\]+ |                # 2. file path or 
8566 35
                          [-_[:alpha:]]+ [@.:] [-_[:alpha:]@.:/]+ ' . $apostrophe_rx . ' )  #    URL, domain, or email
8567
                        |
8568 35
                        ( (?i: ' . $small_words_rx . ' ) ' . $apostrophe_rx . ' )           # 3. or small word (case-insensitive)
8569
                        |
8570 35
                        ( [[:alpha:]] [[:lower:]\'’()\[\]{}]* ' . $apostrophe_rx . ' )     # 4. or word w/o internal caps
8571
                        |
8572 35
                        ( [[:alpha:]] [[:alpha:]\'’()\[\]{}]* ' . $apostrophe_rx . ' )     # 5. or some other word
8573
                      ) (_*) \\b                                                          # 6. With trailing underscore
8574
                    ~ux',
8575
            /**
8576
             * @param string[] $matches
8577
             *
8578
             * @psalm-pure
8579
             *
8580
             * @return string
8581
             */
8582
            static function (array $matches) use ($encoding): string {
8583
                // preserve leading underscore
8584 35
                $str = $matches[1];
8585 35
                if ($matches[2]) {
8586
                    // preserve URLs, domains, emails and file paths
8587 5
                    $str .= $matches[2];
8588 35
                } elseif ($matches[3]) {
8589
                    // lower-case small words
8590 25
                    $str .= self::strtolower($matches[3], $encoding);
8591 35
                } elseif ($matches[4]) {
8592
                    // capitalize word w/o internal caps
8593 34
                    $str .= static::ucfirst($matches[4], $encoding);
8594
                } else {
8595
                    // preserve other kinds of word (iPhone)
8596 7
                    $str .= $matches[5];
8597
                }
8598
                // preserve trailing underscore
8599 35
                $str .= $matches[6];
8600
8601 35
                return $str;
8602 35
            },
8603 35
            $str
8604
        );
8605
8606
        // Exceptions for small words: capitalize at start of title...
8607 35
        $str = (string) \preg_replace_callback(
8608
            '~(  \\A [[:punct:]]*            # start of title...
8609
                      |  [:.;?!][ ]+                # or of subsentence...
8610
                      |  [ ][\'"“‘(\[][ ]* )        # or of inserted subphrase...
8611 35
                      ( ' . $small_words_rx . ' ) \\b # ...followed by small word
8612
                     ~uxi',
8613
            /**
8614
             * @param string[] $matches
8615
             *
8616
             * @psalm-pure
8617
             *
8618
             * @return string
8619
             */
8620
            static function (array $matches) use ($encoding): string {
8621 11
                return $matches[1] . static::ucfirst($matches[2], $encoding);
8622 35
            },
8623 35
            $str
8624
        );
8625
8626
        // ...and end of title
8627 35
        $str = (string) \preg_replace_callback(
8628 35
            '~\\b ( ' . $small_words_rx . ' ) # small word...
8629
                      (?= [[:punct:]]* \Z          # ...at the end of the title...
8630
                      |   [\'"’”)\]] [ ] )         # ...or of an inserted subphrase?
8631
                     ~uxi',
8632
            /**
8633
             * @param string[] $matches
8634
             *
8635
             * @psalm-pure
8636
             *
8637
             * @return string
8638
             */
8639
            static function (array $matches) use ($encoding): string {
8640 3
                return static::ucfirst($matches[1], $encoding);
8641 35
            },
8642 35
            $str
8643
        );
8644
8645
        // Exceptions for small words in hyphenated compound words.
8646
        // e.g. "in-flight" -> In-Flight
8647 35
        $str = (string) \preg_replace_callback(
8648
            '~\\b
8649
                        (?<! -)                   # Negative lookbehind for a hyphen; we do not want to match man-in-the-middle but do want (in-flight)
8650 35
                        ( ' . $small_words_rx . ' )
8651
                        (?= -[[:alpha:]]+)        # lookahead for "-someword"
8652
                       ~uxi',
8653
            /**
8654
             * @param string[] $matches
8655
             *
8656
             * @psalm-pure
8657
             *
8658
             * @return string
8659
             */
8660
            static function (array $matches) use ($encoding): string {
8661
                return static::ucfirst($matches[1], $encoding);
8662 35
            },
8663 35
            $str
8664
        );
8665
8666
        // e.g. "Stand-in" -> "Stand-In" (Stand is already capped at this point)
8667 35
        $str = (string) \preg_replace_callback(
8668
            '~\\b
8669
                      (?<!…)                    # Negative lookbehind for a hyphen; we do not want to match man-in-the-middle but do want (stand-in)
8670
                      ( [[:alpha:]]+- )         # $1 = first word and hyphen, should already be properly capped
8671 35
                      ( ' . $small_words_rx . ' ) # ...followed by small word
8672
                      (?!	- )                 # Negative lookahead for another -
8673
                     ~uxi',
8674
            /**
8675
             * @param string[] $matches
8676
             *
8677
             * @psalm-pure
8678
             *
8679
             * @return string
8680
             */
8681
            static function (array $matches) use ($encoding): string {
8682
                return $matches[1] . static::ucfirst($matches[2], $encoding);
8683 35
            },
8684 35
            $str
8685
        );
8686
8687 35
        return $str;
8688
    }
8689
8690
    /**
8691
     * Get a binary representation of a specific string.
8692
     *
8693
     * EXAPLE: <code>UTF8::str_to_binary('😃'); // '11110000100111111001100010000011'</code>
8694
     *
8695
     * @param string $str <p>The input string.</p>
8696
     *
8697
     * @psalm-pure
8698
     *
8699
     * @return false|string
8700
     *                      <p>false on error</p>
8701
     */
8702
    public static function str_to_binary(string $str)
8703
    {
8704
        /** @var array|false $value - needed for PhpStan (stubs error) */
8705 2
        $value = \unpack('H*', $str);
8706 2
        if ($value === false) {
8707
            return false;
8708
        }
8709
8710
        /** @noinspection OffsetOperationsInspection */
8711 2
        return \base_convert($value[1], 16, 2);
8712
    }
8713
8714
    /**
8715
     * @param string   $str
8716
     * @param bool     $remove_empty_values <p>Remove empty values.</p>
8717
     * @param int|null $remove_short_values <p>The min. string length or null to disable</p>
8718
     *
8719
     * @psalm-pure
8720
     *
8721
     * @return string[]
8722
     */
8723
    public static function str_to_lines(string $str, bool $remove_empty_values = false, int $remove_short_values = null): array
8724
    {
8725 17
        if ($str === '') {
8726 1
            return $remove_empty_values ? [] : [''];
8727
        }
8728
8729 16
        if (self::$SUPPORT['mbstring'] === true) {
8730 16
            $return = \mb_split("[\r\n]{1,2}", $str);
8731
        } else {
8732
            $return = \preg_split("/[\r\n]{1,2}/u", $str);
8733
        }
8734
8735 16
        if ($return === false) {
8736
            return $remove_empty_values ? [] : [''];
8737
        }
8738
8739
        if (
8740 16
            $remove_short_values === null
8741
            &&
8742 16
            !$remove_empty_values
8743
        ) {
8744 16
            return $return;
8745
        }
8746
8747
        return self::reduce_string_array(
8748
            $return,
8749
            $remove_empty_values,
8750
            $remove_short_values
8751
        );
8752
    }
8753
8754
    /**
8755
     * Convert a string into an array of words.
8756
     *
8757
     * EXAMPLE: <code>UTF8::str_to_words('中文空白 oöäü#s', '#') // array('', '中文空白', ' ', 'oöäü#s', '')</code>
8758
     *
8759
     * @param string   $str
8760
     * @param string   $char_list           <p>Additional chars for the definition of "words".</p>
8761
     * @param bool     $remove_empty_values <p>Remove empty values.</p>
8762
     * @param int|null $remove_short_values <p>The min. string length or null to disable</p>
8763
     *
8764
     * @psalm-pure
8765
     *
8766
     * @return string[]
8767
     */
8768
    public static function str_to_words(
8769
        string $str,
8770
        string $char_list = '',
8771
        bool $remove_empty_values = false,
8772
        int $remove_short_values = null
8773
    ): array {
8774 16
        if ($str === '') {
8775 4
            return $remove_empty_values ? [] : [''];
8776
        }
8777
8778 16
        $char_list = self::rxClass($char_list, '\pL');
8779
8780 16
        $return = \preg_split("/({$char_list}+(?:[\p{Pd}’']{$char_list}+)*)/u", $str, -1, \PREG_SPLIT_DELIM_CAPTURE);
8781 16
        if ($return === false) {
8782
            return $remove_empty_values ? [] : [''];
8783
        }
8784
8785
        if (
8786 16
            $remove_short_values === null
8787
            &&
8788 16
            !$remove_empty_values
8789
        ) {
8790 16
            return $return;
0 ignored issues
show
Bug Best Practice introduced by
The expression return $return returns an array which contains values of type array which are incompatible with the documented value type string.
Loading history...
8791
        }
8792
8793 2
        $tmp_return = self::reduce_string_array(
8794 2
            $return,
8795 2
            $remove_empty_values,
8796 2
            $remove_short_values
8797
        );
8798
8799 2
        foreach ($tmp_return as &$item) {
8800 2
            $item = (string) $item;
8801
        }
8802
8803 2
        return $tmp_return;
8804
    }
8805
8806
    /**
8807
     * Truncates the string to a given length. If $substring is provided, and
8808
     * truncating occurs, the string is further truncated so that the substring
8809
     * may be appended without exceeding the desired length.
8810
     *
8811
     * @param string $str
8812
     * @param int    $length    <p>Desired length of the truncated string.</p>
8813
     * @param string $substring [optional] <p>The substring to append if it can fit. Default: ''</p>
8814
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
8815
     *
8816
     * @psalm-pure
8817
     *
8818
     * @return string
8819
     *                <p>A string after truncating.</p>
8820
     */
8821
    public static function str_truncate(
8822
        string $str,
8823
        int $length,
8824
        string $substring = '',
8825
        string $encoding = 'UTF-8'
8826
    ): string {
8827 22
        if ($str === '') {
8828
            return '';
8829
        }
8830
8831 22
        if ($encoding === 'UTF-8') {
8832 10
            if ($length >= (int) \mb_strlen($str)) {
8833 2
                return $str;
8834
            }
8835
8836 8
            if ($substring !== '') {
8837 4
                $length -= (int) \mb_strlen($substring);
8838
8839
                /** @noinspection UnnecessaryCastingInspection */
8840 4
                return (string) \mb_substr($str, 0, $length) . $substring;
8841
            }
8842
8843 4
            return (string) \mb_substr($str, 0, $length);
8844
        }
8845
8846 12
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
8847
8848 12
        if ($length >= (int) self::strlen($str, $encoding)) {
8849 2
            return $str;
8850
        }
8851
8852 10
        if ($substring !== '') {
8853 6
            $length -= (int) self::strlen($substring, $encoding);
8854
        }
8855
8856
        return (
8857 10
               (string) self::substr(
8858 10
                   $str,
8859 10
                   0,
8860 10
                   $length,
8861 10
                   $encoding
8862
               )
8863 10
               ) . $substring;
8864
    }
8865
8866
    /**
8867
     * Truncates the string to a given length, while ensuring that it does not
8868
     * split words. If $substring is provided, and truncating occurs, the
8869
     * string is further truncated so that the substring may be appended without
8870
     * exceeding the desired length.
8871
     *
8872
     * @param string $str
8873
     * @param int    $length                                 <p>Desired length of the truncated string.</p>
8874
     * @param string $substring                              [optional] <p>The substring to append if it can fit.
8875
     *                                                       Default:
8876
     *                                                       ''</p>
8877
     * @param string $encoding                               [optional] <p>Default: 'UTF-8'</p>
8878
     * @param bool   $ignore_do_not_split_words_for_one_word [optional] <p>Default: false</p>
8879
     *
8880
     * @psalm-pure
8881
     *
8882
     * @return string
8883
     *                <p>A string after truncating.</p>
8884
     */
8885
    public static function str_truncate_safe(
8886
        string $str,
8887
        int $length,
8888
        string $substring = '',
8889
        string $encoding = 'UTF-8',
8890
        bool $ignore_do_not_split_words_for_one_word = false
8891
    ): string {
8892 47
        if ($str === '' || $length <= 0) {
8893 1
            return $substring;
8894
        }
8895
8896 47
        if ($encoding === 'UTF-8') {
8897 21
            if ($length >= (int) \mb_strlen($str)) {
8898 5
                return $str;
8899
            }
8900
8901
            // need to further trim the string so we can append the substring
8902 17
            $length -= (int) \mb_strlen($substring);
8903 17
            if ($length <= 0) {
8904 1
                return $substring;
8905
            }
8906
8907
            /** @var false|string $truncated - needed for PhpStan (stubs error) */
8908 17
            $truncated = \mb_substr($str, 0, $length);
8909 17
            if ($truncated === false) {
8910
                return '';
8911
            }
8912
8913
            // if the last word was truncated
8914 17
            $space_position = \mb_strpos($str, ' ', $length - 1);
8915 17
            if ($space_position !== $length) {
8916
                // find pos of the last occurrence of a space, get up to that
8917 13
                $last_position = \mb_strrpos($truncated, ' ', 0);
8918
8919
                if (
8920 13
                    $last_position !== false
8921
                    ||
8922
                    (
8923 3
                        $space_position !== false
8924
                        &&
8925 13
                        !$ignore_do_not_split_words_for_one_word
8926
                    )
8927
                ) {
8928 17
                    $truncated = (string) \mb_substr($truncated, 0, (int) $last_position);
8929
                }
8930
            }
8931
        } else {
8932 26
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
8933
8934 26
            if ($length >= (int) self::strlen($str, $encoding)) {
8935 4
                return $str;
8936
            }
8937
8938
            // need to further trim the string so we can append the substring
8939 22
            $length -= (int) self::strlen($substring, $encoding);
8940 22
            if ($length <= 0) {
8941
                return $substring;
8942
            }
8943
8944 22
            $truncated = self::substr($str, 0, $length, $encoding);
8945
8946 22
            if ($truncated === false) {
8947
                return '';
8948
            }
8949
8950
            // if the last word was truncated
8951 22
            $space_position = self::strpos($str, ' ', $length - 1, $encoding);
8952 22
            if ($space_position !== $length) {
8953
                // find pos of the last occurrence of a space, get up to that
8954 12
                $last_position = self::strrpos($truncated, ' ', 0, $encoding);
8955
8956
                if (
8957 12
                    $last_position !== false
8958
                    ||
8959
                    (
8960 4
                        $space_position !== false
8961
                        &&
8962 12
                        !$ignore_do_not_split_words_for_one_word
8963
                    )
8964
                ) {
8965 9
                    $truncated = (string) self::substr($truncated, 0, (int) $last_position, $encoding);
8966
                }
8967
            }
8968
        }
8969
8970 39
        return $truncated . $substring;
8971
    }
8972
8973
    /**
8974
     * Returns a lowercase and trimmed string separated by underscores.
8975
     * Underscores are inserted before uppercase characters (with the exception
8976
     * of the first character of the string), and in place of spaces as well as
8977
     * dashes.
8978
     *
8979
     * @param string $str
8980
     *
8981
     * @psalm-pure
8982
     *
8983
     * @return string
8984
     *                <p>The underscored string.</p>
8985
     */
8986
    public static function str_underscored(string $str): string
8987
    {
8988 16
        return self::str_delimit($str, '_');
8989
    }
8990
8991
    /**
8992
     * Returns an UpperCamelCase version of the supplied string. It trims
8993
     * surrounding spaces, capitalizes letters following digits, spaces, dashes
8994
     * and underscores, and removes spaces, dashes, underscores.
8995
     *
8996
     * @param string      $str                           <p>The input string.</p>
8997
     * @param string      $encoding                      [optional] <p>Default: 'UTF-8'</p>
8998
     * @param bool        $clean_utf8                    [optional] <p>Remove non UTF-8 chars from the string.</p>
8999
     * @param string|null $lang                          [optional] <p>Set the language for special cases: az, el, lt,
9000
     *                                                   tr</p>
9001
     * @param bool        $try_to_keep_the_string_length [optional] <p>true === try to keep the string length: e.g. ẞ
9002
     *                                                   -> ß</p>
9003
     *
9004
     * @psalm-pure
9005
     *
9006
     * @return string
9007
     *                <p>A string in UpperCamelCase.</p>
9008
     */
9009
    public static function str_upper_camelize(
9010
        string $str,
9011
        string $encoding = 'UTF-8',
9012
        bool $clean_utf8 = false,
9013
        string $lang = null,
9014
        bool $try_to_keep_the_string_length = false
9015
    ): string {
9016 13
        return self::ucfirst(self::str_camelize($str, $encoding), $encoding, $clean_utf8, $lang, $try_to_keep_the_string_length);
9017
    }
9018
9019
    /**
9020
     * Get the number of words in a specific string.
9021
     *
9022
     * EXAMPLES: <code>
9023
     * // format: 0 -> return only word count (int)
9024
     * //
9025
     * UTF8::str_word_count('中文空白 öäü abc#c'); // 4
9026
     * UTF8::str_word_count('中文空白 öäü abc#c', 0, '#'); // 3
9027
     *
9028
     * // format: 1 -> return words (array)
9029
     * //
9030
     * UTF8::str_word_count('中文空白 öäü abc#c', 1); // array('中文空白', 'öäü', 'abc', 'c')
9031
     * UTF8::str_word_count('中文空白 öäü abc#c', 1, '#'); // array('中文空白', 'öäü', 'abc#c')
9032
     *
9033
     * // format: 2 -> return words with offset (array)
9034
     * //
9035
     * UTF8::str_word_count('中文空白 öäü ab#c', 2); // array(0 => '中文空白', 5 => 'öäü', 9 => 'abc', 13 => 'c')
9036
     * UTF8::str_word_count('中文空白 öäü ab#c', 2, '#'); // array(0 => '中文空白', 5 => 'öäü', 9 => 'abc#c')
9037
     * </code>
9038
     *
9039
     * @param string $str       <p>The input string.</p>
9040
     * @param int    $format    [optional] <p>
9041
     *                          <strong>0</strong> => return a number of words (default)<br>
9042
     *                          <strong>1</strong> => return an array of words<br>
9043
     *                          <strong>2</strong> => return an array of words with word-offset as key
9044
     *                          </p>
9045
     * @param string $char_list [optional] <p>Additional chars that contains to words and do not start a new word.</p>
9046
     *
9047
     * @psalm-pure
9048
     *
9049
     * @return int|string[]
9050
     *                      <p>The number of words in the string.</p>
9051
     */
9052
    public static function str_word_count(string $str, int $format = 0, string $char_list = '')
9053
    {
9054 2
        $str_parts = self::str_to_words($str, $char_list);
9055
9056 2
        $len = \count($str_parts);
9057
9058 2
        if ($format === 1) {
9059 2
            $number_of_words = [];
9060 2
            for ($i = 1; $i < $len; $i += 2) {
9061 2
                $number_of_words[] = $str_parts[$i];
9062
            }
9063 2
        } elseif ($format === 2) {
9064 2
            $number_of_words = [];
9065 2
            $offset = (int) self::strlen($str_parts[0]);
9066 2
            for ($i = 1; $i < $len; $i += 2) {
9067 2
                $number_of_words[$offset] = $str_parts[$i];
9068 2
                $offset += (int) self::strlen($str_parts[$i]) + (int) self::strlen($str_parts[$i + 1]);
9069
            }
9070
        } else {
9071 2
            $number_of_words = (int) (($len - 1) / 2);
9072
        }
9073
9074 2
        return $number_of_words;
9075
    }
9076
9077
    /**
9078
     * Case-insensitive string comparison.
9079
     *
9080
     * INFO: Case-insensitive version of UTF8::strcmp()
9081
     *
9082
     * EXAMPLE: <code>UTF8::strcasecmp("iñtërnâtiôn\nàlizætiøn", "Iñtërnâtiôn\nàlizætiøn"); // 0</code>
9083
     *
9084
     * @param string $str1     <p>The first string.</p>
9085
     * @param string $str2     <p>The second string.</p>
9086
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
9087
     *
9088
     * @psalm-pure
9089
     *
9090
     * @return int
9091
     *             <strong>&lt; 0</strong> if str1 is less than str2;<br>
9092
     *             <strong>&gt; 0</strong> if str1 is greater than str2,<br>
9093
     *             <strong>0</strong> if they are equal
9094
     */
9095
    public static function strcasecmp(
9096
        string $str1,
9097
        string $str2,
9098
        string $encoding = 'UTF-8'
9099
    ): int {
9100 23
        return self::strcmp(
9101 23
            self::strtocasefold(
9102 23
                $str1,
9103 23
                true,
9104 23
                false,
9105 23
                $encoding,
9106 23
                null,
9107 23
                false
9108
            ),
9109 23
            self::strtocasefold(
9110 23
                $str2,
9111 23
                true,
9112 23
                false,
9113 23
                $encoding,
9114 23
                null,
9115 23
                false
9116
            )
9117
        );
9118
    }
9119
9120
    /**
9121
     * Case-sensitive string comparison.
9122
     *
9123
     * EXAMPLE: <code>UTF8::strcmp("iñtërnâtiôn\nàlizætiøn", "iñtërnâtiôn\nàlizætiøn"); // 0</code>
9124
     *
9125
     * @param string $str1 <p>The first string.</p>
9126
     * @param string $str2 <p>The second string.</p>
9127
     *
9128
     * @psalm-pure
9129
     *
9130
     * @return int
9131
     *             <strong>&lt; 0</strong> if str1 is less than str2<br>
9132
     *             <strong>&gt; 0</strong> if str1 is greater than str2<br>
9133
     *             <strong>0</strong> if they are equal
9134
     */
9135
    public static function strcmp(string $str1, string $str2): int
9136
    {
9137 29
        if ($str1 === $str2) {
9138 21
            return 0;
9139
        }
9140
9141 24
        return \strcmp(
9142
            /** @phpstan-ignore-next-line - we use only NFD */
9143 24
            \Normalizer::normalize($str1, \Normalizer::NFD),
9144
            /** @phpstan-ignore-next-line - we use only NFD */
9145 24
            \Normalizer::normalize($str2, \Normalizer::NFD)
9146
        );
9147
    }
9148
9149
    /**
9150
     * Find length of initial segment not matching mask.
9151
     *
9152
     * @param string   $str
9153
     * @param string   $char_list
9154
     * @param int      $offset
9155
     * @param int|null $length
9156
     * @param string   $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
9157
     *
9158
     * @psalm-pure
9159
     *
9160
     * @return int
9161
     */
9162
    public static function strcspn(
9163
        string $str,
9164
        string $char_list,
9165
        int $offset = 0,
9166
        int $length = null,
9167
        string $encoding = 'UTF-8'
9168
    ): int {
9169 12
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
9170
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
9171
        }
9172
9173 12
        if ($char_list === '') {
9174 2
            return (int) self::strlen($str, $encoding);
9175
        }
9176
9177 11
        if ($offset || $length !== null) {
9178 3
            if ($encoding === 'UTF-8') {
9179 3
                if ($length === null) {
9180 2
                    $str_tmp = \mb_substr($str, $offset);
9181
                } else {
9182 3
                    $str_tmp = \mb_substr($str, $offset, $length);
9183
                }
9184
            } else {
9185
                $str_tmp = self::substr($str, $offset, $length, $encoding);
9186
            }
9187
9188 3
            if ($str_tmp === false) {
9189
                return 0;
9190
            }
9191
9192 3
            $str = $str_tmp;
9193
        }
9194
9195 11
        if ($str === '') {
9196 2
            return 0;
9197
        }
9198
9199 10
        $matches = [];
9200 10
        if (\preg_match('/^(.*?)' . self::rxClass($char_list) . '/us', $str, $matches)) {
9201 9
            $return = self::strlen($matches[1], $encoding);
9202 9
            if ($return === false) {
9203
                return 0;
9204
            }
9205
9206 9
            return $return;
9207
        }
9208
9209 2
        return (int) self::strlen($str, $encoding);
9210
    }
9211
9212
    /**
9213
     * Create a UTF-8 string from code points.
9214
     *
9215
     * INFO: opposite to UTF8::codepoints()
9216
     *
9217
     * EXAMPLE: <code>UTF8::string(array(246, 228, 252)); // 'öäü'</code>
9218
     *
9219
     * @param int|int[]|string|string[] $intOrHex <p>Integer or Hexadecimal codepoints.</p>
9220
     *
9221
     * @phpstan-param int[]|numeric-string[]|int|numeric-string $intOrHex
9222
     *
9223
     * @psalm-pure
9224
     *
9225
     * @return string
9226
     *                <p>A UTF-8 encoded string.</p>
9227
     */
9228
    public static function string($intOrHex): string
9229
    {
9230 4
        if ($intOrHex === []) {
9231 4
            return '';
9232
        }
9233
9234 4
        if (!\is_array($intOrHex)) {
9235 1
            $intOrHex = [$intOrHex];
9236
        }
9237
9238 4
        $str = '';
9239 4
        foreach ($intOrHex as $strPart) {
9240 4
            $str .= '&#' . (int) $strPart . ';';
9241
        }
9242
9243
        // We cannot use html_entity_decode() here, as it will not return
9244
        // characters for many values < 160.
9245 4
        return mb_convert_encoding($str, 'UTF-8', 'HTML-ENTITIES');
0 ignored issues
show
Bug Best Practice introduced by
The expression return mb_convert_encodi...TF-8', 'HTML-ENTITIES') could return the type array which is incompatible with the type-hinted return string. Consider adding an additional type-check to rule them out.
Loading history...
9246
    }
9247
9248
    /**
9249
     * Checks if string starts with "BOM" (Byte Order Mark Character) character.
9250
     *
9251
     * EXAMPLE: <code>UTF8::string_has_bom("\xef\xbb\xbf foobar"); // true</code>
9252
     *
9253
     * @param string $str <p>The input string.</p>
9254
     *
9255
     * @psalm-pure
9256
     *
9257
     * @return bool
9258
     *              <p>
9259
     *              <strong>true</strong> if the string has BOM at the start,<br>
9260
     *              <strong>false</strong> otherwise
9261
     *              </p>
9262
     */
9263
    public static function string_has_bom(string $str): bool
9264
    {
9265 40
        foreach (self::$BOM as $bom_string => &$bom_byte_length) {
9266 40
            if (\strncmp($str, $bom_string, $bom_byte_length) === 0) {
9267 40
                return true;
9268
            }
9269
        }
9270
9271 40
        return false;
9272
    }
9273
9274
    /**
9275
     * Strip HTML and PHP tags from a string + clean invalid UTF-8.
9276
     *
9277
     * EXAMPLE: <code>UTF8::strip_tags("<span>κόσμε\xa0\xa1</span>"); // 'κόσμε'</code>
9278
     *
9279
     * @see http://php.net/manual/en/function.strip-tags.php
9280
     *
9281
     * @param string      $str            <p>
9282
     *                                    The input string.
9283
     *                                    </p>
9284
     * @param string|null $allowable_tags [optional] <p>
9285
     *                                    You can use the optional second parameter to specify tags which should
9286
     *                                    not be stripped.
9287
     *                                    </p>
9288
     *                                    <p>
9289
     *                                    HTML comments and PHP tags are also stripped. This is hardcoded and
9290
     *                                    can not be changed with allowable_tags.
9291
     *                                    </p>
9292
     * @param bool        $clean_utf8     [optional] <p>Remove non UTF-8 chars from the string.</p>
9293
     *
9294
     * @psalm-pure
9295
     *
9296
     * @return string
9297
     *                <p>The stripped string.</p>
9298
     */
9299
    public static function strip_tags(
9300
        string $str,
9301
        string $allowable_tags = null,
9302
        bool $clean_utf8 = false
9303
    ): string {
9304 4
        if ($str === '') {
9305 1
            return '';
9306
        }
9307
9308 4
        if ($clean_utf8) {
9309 2
            $str = self::clean($str);
9310
        }
9311
9312 4
        if ($allowable_tags === null) {
9313 4
            return \strip_tags($str);
9314
        }
9315
9316 2
        return \strip_tags($str, $allowable_tags);
9317
    }
9318
9319
    /**
9320
     * Strip all whitespace characters. This includes tabs and newline
9321
     * characters, as well as multibyte whitespace such as the thin space
9322
     * and ideographic space.
9323
     *
9324
     * EXAMPLE: <code>UTF8::strip_whitespace('   Ο     συγγραφέας  '); // 'Οσυγγραφέας'</code>
9325
     *
9326
     * @param string $str
9327
     *
9328
     * @psalm-pure
9329
     *
9330
     * @return string
9331
     */
9332
    public static function strip_whitespace(string $str): string
9333
    {
9334 36
        if ($str === '') {
9335 3
            return '';
9336
        }
9337
9338 33
        return (string) \preg_replace('/[[:space:]]+/u', '', $str);
9339
    }
9340
9341
    /**
9342
     * Find the position of the first occurrence of a substring in a string, case-insensitive.
9343
     *
9344
     * INFO: use UTF8::stripos_in_byte() for the byte-length
9345
     *
9346
     * EXAMPLE: <code>UTF8::stripos('aσσb', 'ΣΣ'); // 1</code> (σσ == ΣΣ)
9347
     *
9348
     * @see http://php.net/manual/en/function.mb-stripos.php
9349
     *
9350
     * @param string $haystack   <p>The string from which to get the position of the first occurrence of needle.</p>
9351
     * @param string $needle     <p>The string to find in haystack.</p>
9352
     * @param int    $offset     [optional] <p>The position in haystack to start searching.</p>
9353
     * @param string $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
9354
     * @param bool   $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
9355
     *
9356
     * @psalm-pure
9357
     *
9358
     * @return false|int
9359
     *                   Return the <strong>(int)</strong> numeric position of the first occurrence of needle in the
9360
     *                   haystack string,<br> or <strong>false</strong> if needle is not found
9361
     */
9362
    public static function stripos(
9363
        string $haystack,
9364
        string $needle,
9365
        int $offset = 0,
9366
        string $encoding = 'UTF-8',
9367
        bool $clean_utf8 = false
9368
    ) {
9369 25
        if ($haystack === '') {
9370 5
            if (\PHP_VERSION_ID >= 80000 && $needle === '') {
9371
                return 0;
9372
            }
9373
9374 5
            return false;
9375
        }
9376
9377 24
        if ($needle === '' && \PHP_VERSION_ID < 80000) {
9378 2
            return false;
9379
        }
9380
9381 24
        if ($clean_utf8) {
9382
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
9383
            // if invalid characters are found in $haystack before $needle
9384 1
            $haystack = self::clean($haystack);
9385 1
            $needle = self::clean($needle);
9386
        }
9387
9388 24
        if (self::$SUPPORT['mbstring'] === true) {
9389 24
            if ($encoding === 'UTF-8') {
9390 24
                return \mb_stripos($haystack, $needle, $offset);
9391
            }
9392
9393 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
9394
9395 2
            return \mb_stripos($haystack, $needle, $offset, $encoding);
9396
        }
9397
9398 2
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
9399
9400
        if (
9401 2
            $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
9402
            &&
9403 2
            $offset >= 0 // grapheme_stripos() can't handle negative offset
9404
            &&
9405 2
            self::$SUPPORT['intl'] === true
9406
        ) {
9407
            $return_tmp = \grapheme_stripos($haystack, $needle, $offset);
9408
            if ($return_tmp !== false) {
9409
                return $return_tmp;
9410
            }
9411
        }
9412
9413
        //
9414
        // fallback for ascii only
9415
        //
9416
9417 2
        if (ASCII::is_ascii($haystack . $needle)) {
9418 2
            return \stripos($haystack, $needle, $offset);
9419
        }
9420
9421
        //
9422
        // fallback via vanilla php
9423
        //
9424
9425 2
        $haystack = self::strtocasefold($haystack, true, false, $encoding, null, false);
9426 2
        $needle = self::strtocasefold($needle, true, false, $encoding, null, false);
9427
9428 2
        return self::strpos($haystack, $needle, $offset, $encoding);
9429
    }
9430
9431
    /**
9432
     * Returns all of haystack starting from and including the first occurrence of needle to the end.
9433
     *
9434
     * EXAMPLE: <code>
9435
     * $str = 'iñtërnâtiônàlizætiøn';
9436
     * $search = 'NÂT';
9437
     *
9438
     * UTF8::stristr($str, $search)); // 'nâtiônàlizætiøn'
9439
     * UTF8::stristr($str, $search, true)); // 'iñtër'
9440
     * </code>
9441
     *
9442
     * @param string $haystack      <p>The input string. Must be valid UTF-8.</p>
9443
     * @param string $needle        <p>The string to look for. Must be valid UTF-8.</p>
9444
     * @param bool   $before_needle [optional] <p>
9445
     *                              If <b>TRUE</b>, it returns the part of the
9446
     *                              haystack before the first occurrence of the needle (excluding the needle).
9447
     *                              </p>
9448
     * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
9449
     * @param bool   $clean_utf8    [optional] <p>Remove non UTF-8 chars from the string.</p>
9450
     *
9451
     * @psalm-pure
9452
     *
9453
     * @return false|string
9454
     *                      <p>A sub-string,<br>or <strong>false</strong> if needle is not found.</p>
9455
     */
9456
    public static function stristr(
9457
        string $haystack,
9458
        string $needle,
9459
        bool $before_needle = false,
9460
        string $encoding = 'UTF-8',
9461
        bool $clean_utf8 = false
9462
    ) {
9463 13
        if ($haystack === '') {
9464 3
            if (\PHP_VERSION_ID >= 80000 && $needle === '') {
9465
                return '';
9466
            }
9467
9468 3
            return false;
9469
        }
9470
9471 11
        if ($clean_utf8) {
9472
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
9473
            // if invalid characters are found in $haystack before $needle
9474 1
            $needle = self::clean($needle);
9475 1
            $haystack = self::clean($haystack);
9476
        }
9477
9478 11
        if ($needle === '') {
9479 2
            if (\PHP_VERSION_ID >= 80000) {
9480
                return $haystack;
9481
            }
9482
9483 2
            return false;
9484
        }
9485
9486 10
        if (self::$SUPPORT['mbstring'] === true) {
9487 10
            if ($encoding === 'UTF-8') {
9488 10
                return \mb_stristr($haystack, $needle, $before_needle);
9489
            }
9490
9491 1
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
9492
9493 1
            return \mb_stristr($haystack, $needle, $before_needle, $encoding);
9494
        }
9495
9496
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
9497
9498
        if (
9499
            $encoding !== 'UTF-8'
9500
            &&
9501
            self::$SUPPORT['mbstring'] === false
9502
        ) {
9503
            /**
9504
             * @psalm-suppress ImpureFunctionCall - is is only a warning
9505
             */
9506
            \trigger_error('UTF8::stristr() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
9507
        }
9508
9509
        if (
9510
            $encoding === 'UTF-8' // INFO: "grapheme_stristr()" can't handle other encodings
9511
            &&
9512
            self::$SUPPORT['intl'] === true
9513
        ) {
9514
            $return_tmp = \grapheme_stristr($haystack, $needle, $before_needle);
9515
            if ($return_tmp !== false) {
9516
                return $return_tmp;
9517
            }
9518
        }
9519
9520
        if (ASCII::is_ascii($needle . $haystack)) {
9521
            return \stristr($haystack, $needle, $before_needle);
9522
        }
9523
9524
        \preg_match('/^(.*?)' . \preg_quote($needle, '/') . '/usi', $haystack, $match);
9525
9526
        if (!isset($match[1])) {
9527
            return false;
9528
        }
9529
9530
        if ($before_needle) {
9531
            return $match[1];
9532
        }
9533
9534
        return self::substr($haystack, (int) self::strlen($match[1], $encoding), null, $encoding);
9535
    }
9536
9537
    /**
9538
     * Get the string length, not the byte-length!
9539
     *
9540
     * INFO: use UTF8::strwidth() for the char-length
9541
     *
9542
     * EXAMPLE: <code>UTF8::strlen("Iñtërnâtiôn\xE9àlizætiøn")); // 20</code>
9543
     *
9544
     * @see http://php.net/manual/en/function.mb-strlen.php
9545
     *
9546
     * @param string $str        <p>The string being checked for length.</p>
9547
     * @param string $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
9548
     * @param bool   $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
9549
     *
9550
     * @psalm-pure
9551
     *
9552
     * @return false|int
9553
     *                   <p>
9554
     *                   The number <strong>(int)</strong> of characters in the string $str having character encoding
9555
     *                   $encoding.
9556
     *                   (One multi-byte character counted as +1).
9557
     *                   <br>
9558
     *                   Can return <strong>false</strong>, if e.g. mbstring is not installed and we process invalid
9559
     *                   chars.
9560
     *                   </p>
9561
     */
9562
    public static function strlen(
9563
        string $str,
9564
        string $encoding = 'UTF-8',
9565
        bool $clean_utf8 = false
9566
    ) {
9567 174
        if ($str === '') {
9568 21
            return 0;
9569
        }
9570
9571 172
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
9572 12
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
9573
        }
9574
9575 172
        if ($clean_utf8) {
9576
            // "mb_strlen" and "\iconv_strlen" returns wrong length,
9577
            // if invalid characters are found in $str
9578 5
            $str = self::clean($str);
9579
        }
9580
9581
        //
9582
        // fallback via mbstring
9583
        //
9584
9585 172
        if (self::$SUPPORT['mbstring'] === true) {
9586 166
            if ($encoding === 'UTF-8') {
9587
                /** @noinspection PhpUsageOfSilenceOperatorInspection - ignore warnings, it's working anyway */
9588 166
                return @\mb_strlen($str);
9589
            }
9590
9591
            /** @noinspection PhpUsageOfSilenceOperatorInspection - ignore warnings, it's working anyway */
9592 4
            return @\mb_strlen($str, $encoding);
9593
        }
9594
9595
        //
9596
        // fallback for binary || ascii only
9597
        //
9598
9599
        if (
9600 8
            $encoding === 'CP850'
9601
            ||
9602 8
            $encoding === 'ASCII'
9603
        ) {
9604
            return \strlen($str);
9605
        }
9606
9607
        if (
9608 8
            $encoding !== 'UTF-8'
9609
            &&
9610 8
            self::$SUPPORT['mbstring'] === false
9611
            &&
9612 8
            self::$SUPPORT['iconv'] === false
9613
        ) {
9614
            /**
9615
             * @psalm-suppress ImpureFunctionCall - is is only a warning
9616
             */
9617 2
            \trigger_error('UTF8::strlen() without mbstring / iconv cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
9618
        }
9619
9620
        //
9621
        // fallback via iconv
9622
        //
9623
9624 8
        if (self::$SUPPORT['iconv'] === true) {
9625
            $return_tmp = \iconv_strlen($str, $encoding);
9626
            if ($return_tmp !== false) {
9627
                return $return_tmp;
9628
            }
9629
        }
9630
9631
        //
9632
        // fallback via intl
9633
        //
9634
9635
        if (
9636 8
            $encoding === 'UTF-8' // INFO: "grapheme_strlen()" can't handle other encodings
9637
            &&
9638 8
            self::$SUPPORT['intl'] === true
9639
        ) {
9640
            $return_tmp = \grapheme_strlen($str);
9641
            if ($return_tmp !== null) {
9642
                return $return_tmp;
9643
            }
9644
        }
9645
9646
        //
9647
        // fallback for ascii only
9648
        //
9649
9650 8
        if (ASCII::is_ascii($str)) {
9651 4
            return \strlen($str);
9652
        }
9653
9654
        //
9655
        // fallback via vanilla php
9656
        //
9657
9658 8
        \preg_match_all('/./us', $str, $parts);
9659
9660 8
        $return_tmp = \count($parts[0]);
9661 8
        if ($return_tmp === 0) {
9662
            return false;
9663
        }
9664
9665 8
        return $return_tmp;
9666
    }
9667
9668
    /**
9669
     * Get string length in byte.
9670
     *
9671
     * @param string $str
9672
     *
9673
     * @psalm-pure
9674
     *
9675
     * @return int
9676
     */
9677
    public static function strlen_in_byte(string $str): int
9678
    {
9679 1
        if ($str === '') {
9680
            return 0;
9681
        }
9682
9683 1
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
9684
            // "mb_" is available if overload is used, so use it ...
9685
            return \mb_strlen($str, 'CP850'); // 8-BIT
9686
        }
9687
9688 1
        return \strlen($str);
9689
    }
9690
9691
    /**
9692
     * Case-insensitive string comparisons using a "natural order" algorithm.
9693
     *
9694
     * INFO: natural order version of UTF8::strcasecmp()
9695
     *
9696
     * EXAMPLES: <code>
9697
     * UTF8::strnatcasecmp('2', '10Hello WORLD 中文空白!'); // -1
9698
     * UTF8::strcasecmp('2Hello world 中文空白!', '10Hello WORLD 中文空白!'); // 1
9699
     *
9700
     * UTF8::strnatcasecmp('10Hello world 中文空白!', '2Hello WORLD 中文空白!'); // 1
9701
     * UTF8::strcasecmp('10Hello world 中文空白!', '2Hello WORLD 中文空白!'); // -1
9702
     * </code>
9703
     *
9704
     * @param string $str1     <p>The first string.</p>
9705
     * @param string $str2     <p>The second string.</p>
9706
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
9707
     *
9708
     * @psalm-pure
9709
     *
9710
     * @return int
9711
     *             <strong>&lt; 0</strong> if str1 is less than str2<br>
9712
     *             <strong>&gt; 0</strong> if str1 is greater than str2<br>
9713
     *             <strong>0</strong> if they are equal
9714
     */
9715
    public static function strnatcasecmp(string $str1, string $str2, string $encoding = 'UTF-8'): int
9716
    {
9717 2
        return self::strnatcmp(
9718 2
            self::strtocasefold($str1, true, false, $encoding, null, false),
9719 2
            self::strtocasefold($str2, true, false, $encoding, null, false)
9720
        );
9721
    }
9722
9723
    /**
9724
     * String comparisons using a "natural order" algorithm
9725
     *
9726
     * INFO: natural order version of UTF8::strcmp()
9727
     *
9728
     * EXAMPLES: <code>
9729
     * UTF8::strnatcmp('2Hello world 中文空白!', '10Hello WORLD 中文空白!'); // -1
9730
     * UTF8::strcmp('2Hello world 中文空白!', '10Hello WORLD 中文空白!'); // 1
9731
     *
9732
     * UTF8::strnatcmp('10Hello world 中文空白!', '2Hello WORLD 中文空白!'); // 1
9733
     * UTF8::strcmp('10Hello world 中文空白!', '2Hello WORLD 中文空白!'); // -1
9734
     * </code>
9735
     *
9736
     * @see http://php.net/manual/en/function.strnatcmp.php
9737
     *
9738
     * @param string $str1 <p>The first string.</p>
9739
     * @param string $str2 <p>The second string.</p>
9740
     *
9741
     * @psalm-pure
9742
     *
9743
     * @return int
9744
     *             <strong>&lt; 0</strong> if str1 is less than str2;<br>
9745
     *             <strong>&gt; 0</strong> if str1 is greater than str2;<br>
9746
     *             <strong>0</strong> if they are equal
9747
     */
9748
    public static function strnatcmp(string $str1, string $str2): int
9749
    {
9750 4
        if ($str1 === $str2) {
9751 4
            return 0;
9752
        }
9753
9754 4
        return \strnatcmp(
9755 4
            (string) self::strtonatfold($str1),
9756 4
            (string) self::strtonatfold($str2)
9757
        );
9758
    }
9759
9760
    /**
9761
     * Case-insensitive string comparison of the first n characters.
9762
     *
9763
     * EXAMPLE: <code>
9764
     * UTF8::strcasecmp("iñtërnâtiôn\nàlizætiøn321", "iñtërnâtiôn\nàlizætiøn123", 5); // 0
9765
     * </code>
9766
     *
9767
     * @see http://php.net/manual/en/function.strncasecmp.php
9768
     *
9769
     * @param string $str1     <p>The first string.</p>
9770
     * @param string $str2     <p>The second string.</p>
9771
     * @param int    $len      <p>The length of strings to be used in the comparison.</p>
9772
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
9773
     *
9774
     * @psalm-pure
9775
     *
9776
     * @return int
9777
     *             <strong>&lt; 0</strong> if <i>str1</i> is less than <i>str2</i>;<br>
9778
     *             <strong>&gt; 0</strong> if <i>str1</i> is greater than <i>str2</i>;<br>
9779
     *             <strong>0</strong> if they are equal
9780
     */
9781
    public static function strncasecmp(
9782
        string $str1,
9783
        string $str2,
9784
        int $len,
9785
        string $encoding = 'UTF-8'
9786
    ): int {
9787 2
        return self::strncmp(
9788 2
            self::strtocasefold($str1, true, false, $encoding, null, false),
9789 2
            self::strtocasefold($str2, true, false, $encoding, null, false),
9790 2
            $len
9791
        );
9792
    }
9793
9794
    /**
9795
     * String comparison of the first n characters.
9796
     *
9797
     * EXAMPLE: <code>
9798
     * UTF8::strncmp("Iñtërnâtiôn\nàlizætiøn321", "Iñtërnâtiôn\nàlizætiøn123", 5); // 0
9799
     * </code>
9800
     *
9801
     * @see http://php.net/manual/en/function.strncmp.php
9802
     *
9803
     * @param string $str1     <p>The first string.</p>
9804
     * @param string $str2     <p>The second string.</p>
9805
     * @param int    $len      <p>Number of characters to use in the comparison.</p>
9806
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
9807
     *
9808
     * @psalm-pure
9809
     *
9810
     * @return int
9811
     *             <strong>&lt; 0</strong> if <i>str1</i> is less than <i>str2</i>;<br>
9812
     *             <strong>&gt; 0</strong> if <i>str1</i> is greater than <i>str2</i>;<br>
9813
     *             <strong>0</strong> if they are equal
9814
     */
9815
    public static function strncmp(
9816
        string $str1,
9817
        string $str2,
9818
        int $len,
9819
        string $encoding = 'UTF-8'
9820
    ): int {
9821 4
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
9822
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
9823
        }
9824
9825 4
        if ($encoding === 'UTF-8') {
9826 4
            $str1 = (string) \mb_substr($str1, 0, $len);
9827 4
            $str2 = (string) \mb_substr($str2, 0, $len);
9828
        } else {
9829
            $str1 = (string) self::substr($str1, 0, $len, $encoding);
9830
            $str2 = (string) self::substr($str2, 0, $len, $encoding);
9831
        }
9832
9833 4
        return self::strcmp($str1, $str2);
9834
    }
9835
9836
    /**
9837
     * Search a string for any of a set of characters.
9838
     *
9839
     * EXAMPLE: <code>UTF8::strpbrk('-中文空白-', '白'); // '白-'</code>
9840
     *
9841
     * @see http://php.net/manual/en/function.strpbrk.php
9842
     *
9843
     * @param string $haystack  <p>The string where char_list is looked for.</p>
9844
     * @param string $char_list <p>This parameter is case-sensitive.</p>
9845
     *
9846
     * @psalm-pure
9847
     *
9848
     * @return false|string
9849
     *                      <p>The string starting from the character found, or false if it is not found.</p>
9850
     */
9851
    public static function strpbrk(string $haystack, string $char_list)
9852
    {
9853 2
        if ($haystack === '' || $char_list === '') {
9854 2
            return false;
9855
        }
9856
9857 2
        if (\preg_match('/' . self::rxClass($char_list) . '/us', $haystack, $m)) {
9858 2
            return \substr($haystack, (int) \strpos($haystack, $m[0]));
9859
        }
9860
9861 2
        return false;
9862
    }
9863
9864
    /**
9865
     * Find the position of the first occurrence of a substring in a string.
9866
     *
9867
     * INFO: use UTF8::strpos_in_byte() for the byte-length
9868
     *
9869
     * EXAMPLE: <code>UTF8::strpos('ABC-ÖÄÜ-中文空白-中文空白', '中'); // 8</code>
9870
     *
9871
     * @see http://php.net/manual/en/function.mb-strpos.php
9872
     *
9873
     * @param string     $haystack   <p>The string from which to get the position of the first occurrence of needle.</p>
9874
     * @param int|string $needle     <p>The string to find in haystack.<br>Or a code point as int.</p>
9875
     * @param int        $offset     [optional] <p>The search offset. If it is not specified, 0 is used.</p>
9876
     * @param string     $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
9877
     * @param bool       $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
9878
     *
9879
     * @psalm-pure
9880
     *
9881
     * @return false|int
9882
     *                   The <strong>(int)</strong> numeric position of the first occurrence of needle in the haystack
9883
     *                   string.<br> If needle is not found it returns false.
9884
     */
9885
    public static function strpos(
9886
        string $haystack,
9887
        $needle,
9888
        int $offset = 0,
9889
        string $encoding = 'UTF-8',
9890
        bool $clean_utf8 = false
9891
    ) {
9892 52
        if ($haystack === '') {
9893 4
            if (\PHP_VERSION_ID >= 80000) {
9894
                if ($needle === '') {
9895
                    return 0;
9896
                }
9897
            } else {
9898 4
                return false;
9899
            }
9900
        }
9901
9902
        // iconv and mbstring do not support integer $needle
9903 51
        if ((int) $needle === $needle) {
9904
            $needle = (string) self::chr($needle);
9905
        }
9906 51
        $needle = (string) $needle;
9907
9908 51
        if ($haystack === '') {
9909
            if (\PHP_VERSION_ID >= 80000 && $needle === '') {
9910
                return 0;
9911
            }
9912
9913
            return false;
9914
        }
9915
9916 51
        if ($needle === '' && \PHP_VERSION_ID < 80000) {
9917 2
            return false;
9918
        }
9919
9920 51
        if ($clean_utf8) {
9921
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
9922
            // if invalid characters are found in $haystack before $needle
9923 3
            $needle = self::clean($needle);
9924 3
            $haystack = self::clean($haystack);
9925
        }
9926
9927 51
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
9928 10
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
9929
        }
9930
9931
        //
9932
        // fallback via mbstring
9933
        //
9934
9935 51
        if (self::$SUPPORT['mbstring'] === true) {
9936 49
            if ($encoding === 'UTF-8') {
9937
                /** @noinspection PhpUsageOfSilenceOperatorInspection - Offset not contained in string */
9938 49
                return @\mb_strpos($haystack, $needle, $offset);
9939
            }
9940
9941
            /** @noinspection PhpUsageOfSilenceOperatorInspection - Offset not contained in string */
9942 2
            return @\mb_strpos($haystack, $needle, $offset, $encoding);
9943
        }
9944
9945
        //
9946
        // fallback for binary || ascii only
9947
        //
9948
        if (
9949 4
            $encoding === 'CP850'
9950
            ||
9951 4
            $encoding === 'ASCII'
9952
        ) {
9953 2
            return \strpos($haystack, $needle, $offset);
9954
        }
9955
9956
        if (
9957 4
            $encoding !== 'UTF-8'
9958
            &&
9959 4
            self::$SUPPORT['iconv'] === false
9960
            &&
9961 4
            self::$SUPPORT['mbstring'] === false
9962
        ) {
9963
            /**
9964
             * @psalm-suppress ImpureFunctionCall - is is only a warning
9965
             */
9966 2
            \trigger_error('UTF8::strpos() without mbstring / iconv cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
9967
        }
9968
9969
        //
9970
        // fallback via intl
9971
        //
9972
9973
        if (
9974 4
            $encoding === 'UTF-8' // INFO: "grapheme_strpos()" can't handle other encodings
9975
            &&
9976 4
            $offset >= 0 // grapheme_strpos() can't handle negative offset
9977
            &&
9978 4
            self::$SUPPORT['intl'] === true
9979
        ) {
9980
            $return_tmp = \grapheme_strpos($haystack, $needle, $offset);
9981
            if ($return_tmp !== false) {
9982
                return $return_tmp;
9983
            }
9984
        }
9985
9986
        //
9987
        // fallback via iconv
9988
        //
9989
9990
        if (
9991 4
            $offset >= 0 // iconv_strpos() can't handle negative offset
9992
            &&
9993 4
            self::$SUPPORT['iconv'] === true
9994
        ) {
9995
            // ignore invalid negative offset to keep compatibility
9996
            // with php < 5.5.35, < 5.6.21, < 7.0.6
9997
            $return_tmp = \iconv_strpos($haystack, $needle, $offset > 0 ? $offset : 0, $encoding);
9998
            if ($return_tmp !== false) {
9999
                return $return_tmp;
10000
            }
10001
        }
10002
10003
        //
10004
        // fallback for ascii only
10005
        //
10006
10007 4
        if (ASCII::is_ascii($haystack . $needle)) {
10008
            /** @noinspection PhpUsageOfSilenceOperatorInspection - Offset not contained in string */
10009 2
            return @\strpos($haystack, $needle, $offset);
10010
        }
10011
10012
        //
10013
        // fallback via vanilla php
10014
        //
10015
10016 4
        $haystack_tmp = self::substr($haystack, $offset, null, $encoding);
10017 4
        if ($haystack_tmp === false) {
10018
            $haystack_tmp = '';
10019
        }
10020 4
        $haystack = (string) $haystack_tmp;
10021
10022 4
        if ($offset < 0) {
10023
            $offset = 0;
10024
        }
10025
10026 4
        $pos = \strpos($haystack, $needle);
10027 4
        if ($pos === false) {
10028 3
            return false;
10029
        }
10030
10031 4
        if ($pos) {
10032 4
            return $offset + (int) self::strlen(\substr($haystack, 0, $pos), $encoding);
10033
        }
10034
10035 2
        return $offset + 0;
10036
    }
10037
10038
    /**
10039
     * Find the position of the first occurrence of a substring in a string.
10040
     *
10041
     * @param string $haystack <p>
10042
     *                         The string being checked.
10043
     *                         </p>
10044
     * @param string $needle   <p>
10045
     *                         The position counted from the beginning of haystack.
10046
     *                         </p>
10047
     * @param int    $offset   [optional] <p>
10048
     *                         The search offset. If it is not specified, 0 is used.
10049
     *                         </p>
10050
     *
10051
     * @psalm-pure
10052
     *
10053
     * @return false|int
10054
     *                   <p>The numeric position of the first occurrence of needle in the
10055
     *                   haystack string. If needle is not found, it returns false.</p>
10056
     */
10057
    public static function strpos_in_byte(string $haystack, string $needle, int $offset = 0)
10058
    {
10059 2
        if ($haystack === '' || $needle === '') {
10060
            return false;
10061
        }
10062
10063 2
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
10064
            // "mb_" is available if overload is used, so use it ...
10065
            return \mb_strpos($haystack, $needle, $offset, 'CP850'); // 8-BIT
10066
        }
10067
10068 2
        return \strpos($haystack, $needle, $offset);
10069
    }
10070
10071
    /**
10072
     * Find the position of the first occurrence of a substring in a string, case-insensitive.
10073
     *
10074
     * @param string $haystack <p>
10075
     *                         The string being checked.
10076
     *                         </p>
10077
     * @param string $needle   <p>
10078
     *                         The position counted from the beginning of haystack.
10079
     *                         </p>
10080
     * @param int    $offset   [optional] <p>
10081
     *                         The search offset. If it is not specified, 0 is used.
10082
     *                         </p>
10083
     *
10084
     * @psalm-pure
10085
     *
10086
     * @return false|int
10087
     *                   <p>The numeric position of the first occurrence of needle in the
10088
     *                   haystack string. If needle is not found, it returns false.</p>
10089
     */
10090
    public static function stripos_in_byte(string $haystack, string $needle, int $offset = 0)
10091
    {
10092 2
        if ($haystack === '' || $needle === '') {
10093
            return false;
10094
        }
10095
10096 2
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
10097
            // "mb_" is available if overload is used, so use it ...
10098
            return \mb_stripos($haystack, $needle, $offset, 'CP850'); // 8-BIT
10099
        }
10100
10101 2
        return \stripos($haystack, $needle, $offset);
10102
    }
10103
10104
    /**
10105
     * Find the last occurrence of a character in a string within another.
10106
     *
10107
     * EXAMPLE: <code>UTF8::strrchr('κόσμεκόσμε-äöü', 'κόσμε'); // 'κόσμε-äöü'</code>
10108
     *
10109
     * @see http://php.net/manual/en/function.mb-strrchr.php
10110
     *
10111
     * @param string $haystack      <p>The string from which to get the last occurrence of needle.</p>
10112
     * @param string $needle        <p>The string to find in haystack</p>
10113
     * @param bool   $before_needle [optional] <p>
10114
     *                              Determines which portion of haystack
10115
     *                              this function returns.
10116
     *                              If set to true, it returns all of haystack
10117
     *                              from the beginning to the last occurrence of needle.
10118
     *                              If set to false, it returns all of haystack
10119
     *                              from the last occurrence of needle to the end,
10120
     *                              </p>
10121
     * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
10122
     * @param bool   $clean_utf8    [optional] <p>Remove non UTF-8 chars from the string.</p>
10123
     *
10124
     * @psalm-pure
10125
     *
10126
     * @return false|string
10127
     *                      <p>The portion of haystack or false if needle is not found.</p>
10128
     */
10129
    public static function strrchr(
10130
        string $haystack,
10131
        string $needle,
10132
        bool $before_needle = false,
10133
        string $encoding = 'UTF-8',
10134
        bool $clean_utf8 = false
10135
    ) {
10136 2
        if ($haystack === '' || $needle === '') {
10137 2
            return false;
10138
        }
10139
10140 2
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
10141 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
10142
        }
10143
10144 2
        if ($clean_utf8) {
10145
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
10146
            // if invalid characters are found in $haystack before $needle
10147 2
            $needle = self::clean($needle);
10148 2
            $haystack = self::clean($haystack);
10149
        }
10150
10151
        //
10152
        // fallback via mbstring
10153
        //
10154
10155 2
        if (self::$SUPPORT['mbstring'] === true) {
10156 2
            if ($encoding === 'UTF-8') {
10157 2
                return \mb_strrchr($haystack, $needle, $before_needle);
10158
            }
10159
10160 2
            return \mb_strrchr($haystack, $needle, $before_needle, $encoding);
10161
        }
10162
10163
        //
10164
        // fallback for binary || ascii only
10165
        //
10166
10167
        if (
10168
            !$before_needle
10169
            &&
10170
            (
10171
                $encoding === 'CP850'
10172
                ||
10173
                $encoding === 'ASCII'
10174
            )
10175
        ) {
10176
            return \strrchr($haystack, $needle);
10177
        }
10178
10179
        if (
10180
            $encoding !== 'UTF-8'
10181
            &&
10182
            self::$SUPPORT['mbstring'] === false
10183
        ) {
10184
            /**
10185
             * @psalm-suppress ImpureFunctionCall - is is only a warning
10186
             */
10187
            \trigger_error('UTF8::strrchr() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
10188
        }
10189
10190
        //
10191
        // fallback via iconv
10192
        //
10193
10194
        if (self::$SUPPORT['iconv'] === true) {
10195
            $needle_tmp = self::substr($needle, 0, 1, $encoding);
10196
            if ($needle_tmp === false) {
10197
                return false;
10198
            }
10199
            $needle = $needle_tmp;
10200
10201
            $pos = \iconv_strrpos($haystack, $needle, $encoding);
10202
            if ($pos === false) {
10203
                return false;
10204
            }
10205
10206
            if ($before_needle) {
10207
                return self::substr($haystack, 0, $pos, $encoding);
10208
            }
10209
10210
            return self::substr($haystack, $pos, null, $encoding);
10211
        }
10212
10213
        //
10214
        // fallback via vanilla php
10215
        //
10216
10217
        $needle_tmp = self::substr($needle, 0, 1, $encoding);
10218
        if ($needle_tmp === false) {
10219
            return false;
10220
        }
10221
        $needle = $needle_tmp;
10222
10223
        $pos = self::strrpos($haystack, $needle, 0, $encoding);
10224
        if ($pos === false) {
10225
            return false;
10226
        }
10227
10228
        if ($before_needle) {
10229
            return self::substr($haystack, 0, $pos, $encoding);
10230
        }
10231
10232
        return self::substr($haystack, $pos, null, $encoding);
10233
    }
10234
10235
    /**
10236
     * Reverses characters order in the string.
10237
     *
10238
     * EXAMPLE: <code>UTF8::strrev('κ-öäü'); // 'üäö-κ'</code>
10239
     *
10240
     * @param string $str      <p>The input string.</p>
10241
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
10242
     *
10243
     * @psalm-pure
10244
     *
10245
     * @return string
10246
     *                <p>The string with characters in the reverse sequence.</p>
10247
     */
10248
    public static function strrev(string $str, string $encoding = 'UTF-8'): string
10249
    {
10250 10
        if ($str === '') {
10251 4
            return '';
10252
        }
10253
10254
        // init
10255 8
        $reversed = '';
10256
10257 8
        $str = self::emoji_encode($str, true);
10258
10259 8
        if ($encoding === 'UTF-8') {
10260 8
            if (self::$SUPPORT['intl'] === true) {
10261
                // try "grapheme" first: https://stackoverflow.com/questions/17496493/strrev-dosent-support-utf-8
10262 8
                $i = (int) \grapheme_strlen($str);
10263 8
                while ($i--) {
10264 8
                    $reversed_tmp = \grapheme_substr($str, $i, 1);
10265 8
                    if ($reversed_tmp !== false) {
10266 8
                        $reversed .= $reversed_tmp;
10267
                    }
10268
                }
10269
            } else {
10270
                $i = (int) \mb_strlen($str);
10271 8
                while ($i--) {
10272
                    $reversed_tmp = \mb_substr($str, $i, 1);
10273
                    if ($reversed_tmp !== false) {
10274
                        $reversed .= $reversed_tmp;
10275
                    }
10276
                }
10277
            }
10278
        } else {
10279
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
10280
10281
            $i = (int) self::strlen($str, $encoding);
10282
            while ($i--) {
10283
                $reversed_tmp = self::substr($str, $i, 1, $encoding);
10284
                if ($reversed_tmp !== false) {
10285
                    $reversed .= $reversed_tmp;
10286
                }
10287
            }
10288
        }
10289
10290 8
        return self::emoji_decode($reversed, true);
10291
    }
10292
10293
    /**
10294
     * Find the last occurrence of a character in a string within another, case-insensitive.
10295
     *
10296
     * EXAMPLE: <code>UTF8::strrichr('Aκόσμεκόσμε-äöü', 'aκόσμε'); // 'Aκόσμεκόσμε-äöü'</code>
10297
     *
10298
     * @see http://php.net/manual/en/function.mb-strrichr.php
10299
     *
10300
     * @param string $haystack      <p>The string from which to get the last occurrence of needle.</p>
10301
     * @param string $needle        <p>The string to find in haystack.</p>
10302
     * @param bool   $before_needle [optional] <p>
10303
     *                              Determines which portion of haystack
10304
     *                              this function returns.
10305
     *                              If set to true, it returns all of haystack
10306
     *                              from the beginning to the last occurrence of needle.
10307
     *                              If set to false, it returns all of haystack
10308
     *                              from the last occurrence of needle to the end,
10309
     *                              </p>
10310
     * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
10311
     * @param bool   $clean_utf8    [optional] <p>Remove non UTF-8 chars from the string.</p>
10312
     *
10313
     * @psalm-pure
10314
     *
10315
     * @return false|string
10316
     *                      <p>The portion of haystack or<br>false if needle is not found.</p>
10317
     */
10318
    public static function strrichr(
10319
        string $haystack,
10320
        string $needle,
10321
        bool $before_needle = false,
10322
        string $encoding = 'UTF-8',
10323
        bool $clean_utf8 = false
10324
    ) {
10325 3
        if ($haystack === '' || $needle === '') {
10326 2
            return false;
10327
        }
10328
10329 3
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
10330 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
10331
        }
10332
10333 3
        if ($clean_utf8) {
10334
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
10335
            // if invalid characters are found in $haystack before $needle
10336 2
            $needle = self::clean($needle);
10337 2
            $haystack = self::clean($haystack);
10338
        }
10339
10340
        //
10341
        // fallback via mbstring
10342
        //
10343
10344 3
        if (self::$SUPPORT['mbstring'] === true) {
10345 3
            if ($encoding === 'UTF-8') {
10346 3
                return \mb_strrichr($haystack, $needle, $before_needle);
10347
            }
10348
10349 2
            return \mb_strrichr($haystack, $needle, $before_needle, $encoding);
10350
        }
10351
10352
        //
10353
        // fallback via vanilla php
10354
        //
10355
10356
        $needle_tmp = self::substr($needle, 0, 1, $encoding);
10357
        if ($needle_tmp === false) {
10358
            return false;
10359
        }
10360
        $needle = $needle_tmp;
10361
10362
        $pos = self::strripos($haystack, $needle, 0, $encoding);
10363
        if ($pos === false) {
10364
            return false;
10365
        }
10366
10367
        if ($before_needle) {
10368
            return self::substr($haystack, 0, $pos, $encoding);
10369
        }
10370
10371
        return self::substr($haystack, $pos, null, $encoding);
10372
    }
10373
10374
    /**
10375
     * Find the position of the last occurrence of a substring in a string, case-insensitive.
10376
     *
10377
     * EXAMPLE: <code>UTF8::strripos('ABC-ÖÄÜ-中文空白-中文空白', '中'); // 13</code>
10378
     *
10379
     * @param string     $haystack   <p>The string to look in.</p>
10380
     * @param int|string $needle     <p>The string to look for.</p>
10381
     * @param int        $offset     [optional] <p>Number of characters to ignore in the beginning or end.</p>
10382
     * @param string     $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
10383
     * @param bool       $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
10384
     *
10385
     * @psalm-pure
10386
     *
10387
     * @return false|int
10388
     *                   <p>The <strong>(int)</strong> numeric position of the last occurrence of needle in the haystack
10389
     *                   string.<br>If needle is not found, it returns false.</p>
10390
     */
10391
    public static function strripos(
10392
        string $haystack,
10393
        $needle,
10394
        int $offset = 0,
10395
        string $encoding = 'UTF-8',
10396
        bool $clean_utf8 = false
10397
    ) {
10398 14
        if ($haystack === '') {
10399 3
            if (\PHP_VERSION_ID >= 80000) {
10400
                if ($needle === '') {
10401
                    return 0;
10402
                }
10403
            } else {
10404 3
                return false;
10405
            }
10406
        }
10407
10408
        // iconv and mbstring do not support integer $needle
10409 14
        if ((int) $needle === $needle && $needle >= 0) {
10410
            $needle = (string) self::chr($needle);
10411
        }
10412 14
        $needle = (string) $needle;
10413
10414 14
        if ($haystack === '') {
10415
            if (\PHP_VERSION_ID >= 80000 && $needle === '') {
10416
                return 0;
10417
            }
10418
10419
            return false;
10420
        }
10421
10422 14
        if ($needle === '' && \PHP_VERSION_ID < 80000) {
10423 3
            return false;
10424
        }
10425
10426 14
        if ($clean_utf8) {
10427
            // mb_strripos() && iconv_strripos() is not tolerant to invalid characters
10428 3
            $needle = self::clean($needle);
10429 3
            $haystack = self::clean($haystack);
10430
        }
10431
10432 14
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
10433 9
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
10434
        }
10435
10436
        //
10437
        // fallback via mbstrig
10438
        //
10439
10440 14
        if (self::$SUPPORT['mbstring'] === true) {
10441 14
            if ($encoding === 'UTF-8') {
10442 14
                return \mb_strripos($haystack, $needle, $offset);
10443
            }
10444
10445
            return \mb_strripos($haystack, $needle, $offset, $encoding);
10446
        }
10447
10448
        //
10449
        // fallback for binary || ascii only
10450
        //
10451
10452
        if (
10453
            $encoding === 'CP850'
10454
            ||
10455
            $encoding === 'ASCII'
10456
        ) {
10457
            return \strripos($haystack, $needle, $offset);
10458
        }
10459
10460
        if (
10461
            $encoding !== 'UTF-8'
10462
            &&
10463
            self::$SUPPORT['mbstring'] === false
10464
        ) {
10465
            /**
10466
             * @psalm-suppress ImpureFunctionCall - is is only a warning
10467
             */
10468
            \trigger_error('UTF8::strripos() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
10469
        }
10470
10471
        //
10472
        // fallback via intl
10473
        //
10474
10475
        if (
10476
            $encoding === 'UTF-8' // INFO: "grapheme_strripos()" can't handle other encodings
10477
            &&
10478
            $offset >= 0 // grapheme_strripos() can't handle negative offset
10479
            &&
10480
            self::$SUPPORT['intl'] === true
10481
        ) {
10482
            $return_tmp = \grapheme_strripos($haystack, $needle, $offset);
10483
            if ($return_tmp !== false) {
10484
                return $return_tmp;
10485
            }
10486
        }
10487
10488
        //
10489
        // fallback for ascii only
10490
        //
10491
10492
        if (ASCII::is_ascii($haystack . $needle)) {
10493
            return \strripos($haystack, $needle, $offset);
10494
        }
10495
10496
        //
10497
        // fallback via vanilla php
10498
        //
10499
10500
        $haystack = self::strtocasefold($haystack, true, false, $encoding);
10501
        $needle = self::strtocasefold($needle, true, false, $encoding);
10502
10503
        return self::strrpos($haystack, $needle, $offset, $encoding, $clean_utf8);
10504
    }
10505
10506
    /**
10507
     * Finds position of last occurrence of a string within another, case-insensitive.
10508
     *
10509
     * @param string $haystack <p>
10510
     *                         The string from which to get the position of the last occurrence
10511
     *                         of needle.
10512
     *                         </p>
10513
     * @param string $needle   <p>
10514
     *                         The string to find in haystack.
10515
     *                         </p>
10516
     * @param int    $offset   [optional] <p>
10517
     *                         The position in haystack
10518
     *                         to start searching.
10519
     *                         </p>
10520
     *
10521
     * @psalm-pure
10522
     *
10523
     * @return false|int
10524
     *                   <p>eturn the numeric position of the last occurrence of needle in the
10525
     *                   haystack string, or false if needle is not found.</p>
10526
     */
10527
    public static function strripos_in_byte(string $haystack, string $needle, int $offset = 0)
10528
    {
10529 2
        if ($haystack === '' || $needle === '') {
10530
            return false;
10531
        }
10532
10533 2
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
10534
            // "mb_" is available if overload is used, so use it ...
10535
            return \mb_strripos($haystack, $needle, $offset, 'CP850'); // 8-BIT
10536
        }
10537
10538 2
        return \strripos($haystack, $needle, $offset);
10539
    }
10540
10541
    /**
10542
     * Find the position of the last occurrence of a substring in a string.
10543
     *
10544
     * EXAMPLE: <code>UTF8::strrpos('ABC-ÖÄÜ-中文空白-中文空白', '中'); // 13</code>
10545
     *
10546
     * @see http://php.net/manual/en/function.mb-strrpos.php
10547
     *
10548
     * @param string     $haystack   <p>The string being checked, for the last occurrence of needle</p>
10549
     * @param int|string $needle     <p>The string to find in haystack.<br>Or a code point as int.</p>
10550
     * @param int        $offset     [optional] <p>May be specified to begin searching an arbitrary number of characters
10551
     *                               into the string. Negative values will stop searching at an arbitrary point prior to
10552
     *                               the end of the string.
10553
     *                               </p>
10554
     * @param string     $encoding   [optional] <p>Set the charset.</p>
10555
     * @param bool       $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
10556
     *
10557
     * @psalm-pure
10558
     *
10559
     * @return false|int
10560
     *                   <p>The <strong>(int)</strong> numeric position of the last occurrence of needle in the haystack
10561
     *                   string.<br>If needle is not found, it returns false.</p>
10562
     */
10563
    public static function strrpos(
10564
        string $haystack,
10565
        $needle,
10566
        int $offset = 0,
10567
        string $encoding = 'UTF-8',
10568
        bool $clean_utf8 = false
10569
    ) {
10570 35
        if ($haystack === '') {
10571 4
            if (\PHP_VERSION_ID >= 80000) {
10572
                if ($needle === '') {
10573
                    return 0;
10574
                }
10575
            } else {
10576 4
                return false;
10577
            }
10578
        }
10579
10580
        // iconv and mbstring do not support integer $needle
10581 34
        if ((int) $needle === $needle && $needle >= 0) {
10582 1
            $needle = (string) self::chr($needle);
10583
        }
10584 34
        $needle = (string) $needle;
10585
10586 34
        if ($haystack === '') {
10587
            if (\PHP_VERSION_ID >= 80000 && $needle === '') {
10588
                return 0;
10589
            }
10590
10591
            return false;
10592
        }
10593
10594 34
        if ($needle === '' && \PHP_VERSION_ID < 80000) {
10595 2
            return false;
10596
        }
10597
10598 34
        if ($clean_utf8) {
10599
            // mb_strrpos && iconv_strrpos is not tolerant to invalid characters
10600 4
            $needle = self::clean($needle);
10601 4
            $haystack = self::clean($haystack);
10602
        }
10603
10604 34
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
10605 8
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
10606
        }
10607
10608
        //
10609
        // fallback via mbstring
10610
        //
10611
10612 34
        if (self::$SUPPORT['mbstring'] === true) {
10613 34
            if ($encoding === 'UTF-8') {
10614 34
                return \mb_strrpos($haystack, $needle, $offset);
10615
            }
10616
10617 2
            return \mb_strrpos($haystack, $needle, $offset, $encoding);
10618
        }
10619
10620
        //
10621
        // fallback for binary || ascii only
10622
        //
10623
10624
        if (
10625
            $encoding === 'CP850'
10626
            ||
10627
            $encoding === 'ASCII'
10628
        ) {
10629
            return \strrpos($haystack, $needle, $offset);
10630
        }
10631
10632
        if (
10633
            $encoding !== 'UTF-8'
10634
            &&
10635
            self::$SUPPORT['mbstring'] === false
10636
        ) {
10637
            /**
10638
             * @psalm-suppress ImpureFunctionCall - is is only a warning
10639
             */
10640
            \trigger_error('UTF8::strrpos() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
10641
        }
10642
10643
        //
10644
        // fallback via intl
10645
        //
10646
10647
        if (
10648
            $offset >= 0 // grapheme_strrpos() can't handle negative offset
10649
            &&
10650
            $encoding === 'UTF-8' // INFO: "grapheme_strrpos()" can't handle other encodings
10651
            &&
10652
            self::$SUPPORT['intl'] === true
10653
        ) {
10654
            $return_tmp = \grapheme_strrpos($haystack, $needle, $offset);
10655
            if ($return_tmp !== false) {
10656
                return $return_tmp;
10657
            }
10658
        }
10659
10660
        //
10661
        // fallback for ascii only
10662
        //
10663
10664
        if (ASCII::is_ascii($haystack . $needle)) {
10665
            return \strrpos($haystack, $needle, $offset);
10666
        }
10667
10668
        //
10669
        // fallback via vanilla php
10670
        //
10671
10672
        $haystack_tmp = null;
10673
        if ($offset > 0) {
10674
            $haystack_tmp = self::substr($haystack, $offset);
10675
        } elseif ($offset < 0) {
10676
            $haystack_tmp = self::substr($haystack, 0, $offset);
10677
            $offset = 0;
10678
        }
10679
10680
        if ($haystack_tmp !== null) {
10681
            if ($haystack_tmp === false) {
10682
                $haystack_tmp = '';
10683
            }
10684
            $haystack = (string) $haystack_tmp;
10685
        }
10686
10687
        $pos = \strrpos($haystack, $needle);
10688
        if ($pos === false) {
10689
            return false;
10690
        }
10691
10692
        /** @var false|string $str_tmp - needed for PhpStan (stubs error) */
10693
        $str_tmp = \substr($haystack, 0, $pos);
10694
        if ($str_tmp === false) {
10695
            return false;
10696
        }
10697
10698
        return $offset + (int) self::strlen($str_tmp);
10699
    }
10700
10701
    /**
10702
     * Find the position of the last occurrence of a substring in a string.
10703
     *
10704
     * @param string $haystack <p>
10705
     *                         The string being checked, for the last occurrence
10706
     *                         of needle.
10707
     *                         </p>
10708
     * @param string $needle   <p>
10709
     *                         The string to find in haystack.
10710
     *                         </p>
10711
     * @param int    $offset   [optional] <p>May be specified to begin searching an arbitrary number of characters into
10712
     *                         the string. Negative values will stop searching at an arbitrary point
10713
     *                         prior to the end of the string.
10714
     *                         </p>
10715
     *
10716
     * @psalm-pure
10717
     *
10718
     * @return false|int
10719
     *                   <p>The numeric position of the last occurrence of needle in the
10720
     *                   haystack string. If needle is not found, it returns false.</p>
10721
     */
10722
    public static function strrpos_in_byte(string $haystack, string $needle, int $offset = 0)
10723
    {
10724 2
        if ($haystack === '' || $needle === '') {
10725
            return false;
10726
        }
10727
10728 2
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
10729
            // "mb_" is available if overload is used, so use it ...
10730
            return \mb_strrpos($haystack, $needle, $offset, 'CP850'); // 8-BIT
10731
        }
10732
10733 2
        return \strrpos($haystack, $needle, $offset);
10734
    }
10735
10736
    /**
10737
     * Finds the length of the initial segment of a string consisting entirely of characters contained within a given
10738
     * mask.
10739
     *
10740
     * EXAMPLE: <code>UTF8::strspn('iñtërnâtiônàlizætiøn', 'itñ'); // '3'</code>
10741
     *
10742
     * @param string   $str      <p>The input string.</p>
10743
     * @param string   $mask     <p>The mask of chars</p>
10744
     * @param int      $offset   [optional]
10745
     * @param int|null $length   [optional]
10746
     * @param string   $encoding [optional] <p>Set the charset.</p>
10747
     *
10748
     * @psalm-pure
10749
     *
10750
     * @return false|int
10751
     */
10752
    public static function strspn(
10753
        string $str,
10754
        string $mask,
10755
        int $offset = 0,
10756
        int $length = null,
10757
        string $encoding = 'UTF-8'
10758
    ) {
10759 10
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
10760
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
10761
        }
10762
10763 10
        if ($offset || $length !== null) {
10764 2
            if ($encoding === 'UTF-8') {
10765 2
                if ($length === null) {
10766
                    $str = (string) \mb_substr($str, $offset);
10767
                } else {
10768 2
                    $str = (string) \mb_substr($str, $offset, $length);
10769
                }
10770
            } else {
10771
                $str = (string) self::substr($str, $offset, $length, $encoding);
10772
            }
10773
        }
10774
10775 10
        if ($str === '' || $mask === '') {
10776 2
            return 0;
10777
        }
10778
10779 8
        $matches = [];
10780
10781 8
        return \preg_match('/^' . self::rxClass($mask) . '+/u', $str, $matches) ? (int) self::strlen($matches[0], $encoding) : 0;
10782
    }
10783
10784
    /**
10785
     * Returns part of haystack string from the first occurrence of needle to the end of haystack.
10786
     *
10787
     * EXAMPLE: <code>
10788
     * $str = 'iñtërnâtiônàlizætiøn';
10789
     * $search = 'nât';
10790
     *
10791
     * UTF8::strstr($str, $search)); // 'nâtiônàlizætiøn'
10792
     * UTF8::strstr($str, $search, true)); // 'iñtër'
10793
     * </code>
10794
     *
10795
     * @param string $haystack      <p>The input string. Must be valid UTF-8.</p>
10796
     * @param string $needle        <p>The string to look for. Must be valid UTF-8.</p>
10797
     * @param bool   $before_needle [optional] <p>
10798
     *                              If <b>TRUE</b>, strstr() returns the part of the
10799
     *                              haystack before the first occurrence of the needle (excluding the needle).
10800
     *                              </p>
10801
     * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
10802
     * @param bool   $clean_utf8    [optional] <p>Remove non UTF-8 chars from the string.</p>
10803
     *
10804
     * @psalm-pure
10805
     *
10806
     * @return false|string
10807
     *                      <p>A sub-string,<br>or <strong>false</strong> if needle is not found.</p>
10808
     */
10809
    public static function strstr(
10810
        string $haystack,
10811
        string $needle,
10812
        bool $before_needle = false,
10813
        string $encoding = 'UTF-8',
10814
        bool $clean_utf8 = false
10815
    ) {
10816 3
        if ($haystack === '') {
10817 2
            if (\PHP_VERSION_ID >= 80000 && $needle === '') {
10818
                return '';
10819
            }
10820
10821 2
            return false;
10822
        }
10823
10824 3
        if ($clean_utf8) {
10825
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
10826
            // if invalid characters are found in $haystack before $needle
10827
            $needle = self::clean($needle);
10828
            $haystack = self::clean($haystack);
10829
        }
10830
10831 3
        if ($needle === '') {
10832 1
            if (\PHP_VERSION_ID >= 80000) {
10833
                return $haystack;
10834
            }
10835
10836 1
            return false;
10837
        }
10838
10839 3
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
10840 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
10841
        }
10842
10843
        //
10844
        // fallback via mbstring
10845
        //
10846
10847 3
        if (self::$SUPPORT['mbstring'] === true) {
10848 3
            if ($encoding === 'UTF-8') {
10849 3
                return \mb_strstr($haystack, $needle, $before_needle);
10850
            }
10851
10852 2
            return \mb_strstr($haystack, $needle, $before_needle, $encoding);
10853
        }
10854
10855
        //
10856
        // fallback for binary || ascii only
10857
        //
10858
10859
        if (
10860
            $encoding === 'CP850'
10861
            ||
10862
            $encoding === 'ASCII'
10863
        ) {
10864
            return \strstr($haystack, $needle, $before_needle);
10865
        }
10866
10867
        if (
10868
            $encoding !== 'UTF-8'
10869
            &&
10870
            self::$SUPPORT['mbstring'] === false
10871
        ) {
10872
            /**
10873
             * @psalm-suppress ImpureFunctionCall - is is only a warning
10874
             */
10875
            \trigger_error('UTF8::strstr() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
10876
        }
10877
10878
        //
10879
        // fallback via intl
10880
        //
10881
10882
        if (
10883
            $encoding === 'UTF-8' // INFO: "grapheme_strstr()" can't handle other encodings
10884
            &&
10885
            self::$SUPPORT['intl'] === true
10886
        ) {
10887
            $return_tmp = \grapheme_strstr($haystack, $needle, $before_needle);
10888
            if ($return_tmp !== false) {
10889
                return $return_tmp;
10890
            }
10891
        }
10892
10893
        //
10894
        // fallback for ascii only
10895
        //
10896
10897
        if (ASCII::is_ascii($haystack . $needle)) {
10898
            return \strstr($haystack, $needle, $before_needle);
10899
        }
10900
10901
        //
10902
        // fallback via vanilla php
10903
        //
10904
10905
        \preg_match('/^(.*?)' . \preg_quote($needle, '/') . '/us', $haystack, $match);
10906
10907
        if (!isset($match[1])) {
10908
            return false;
10909
        }
10910
10911
        if ($before_needle) {
10912
            return $match[1];
10913
        }
10914
10915
        return self::substr($haystack, (int) self::strlen($match[1]));
10916
    }
10917
10918
    /**
10919
     * Finds first occurrence of a string within another.
10920
     *
10921
     * @param string $haystack      <p>
10922
     *                              The string from which to get the first occurrence
10923
     *                              of needle.
10924
     *                              </p>
10925
     * @param string $needle        <p>
10926
     *                              The string to find in haystack.
10927
     *                              </p>
10928
     * @param bool   $before_needle [optional] <p>
10929
     *                              Determines which portion of haystack
10930
     *                              this function returns.
10931
     *                              If set to true, it returns all of haystack
10932
     *                              from the beginning to the first occurrence of needle.
10933
     *                              If set to false, it returns all of haystack
10934
     *                              from the first occurrence of needle to the end,
10935
     *                              </p>
10936
     *
10937
     * @psalm-pure
10938
     *
10939
     * @return false|string
10940
     *                      <p>The portion of haystack,
10941
     *                      or false if needle is not found.</p>
10942
     */
10943
    public static function strstr_in_byte(
10944
        string $haystack,
10945
        string $needle,
10946
        bool $before_needle = false
10947
    ) {
10948 2
        if ($haystack === '' || $needle === '') {
10949
            return false;
10950
        }
10951
10952 2
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
10953
            // "mb_" is available if overload is used, so use it ...
10954
            return \mb_strstr($haystack, $needle, $before_needle, 'CP850'); // 8-BIT
10955
        }
10956
10957 2
        return \strstr($haystack, $needle, $before_needle);
10958
    }
10959
10960
    /**
10961
     * Unicode transformation for case-less matching.
10962
     *
10963
     * EXAMPLE: <code>UTF8::strtocasefold('ǰ◌̱'); // 'ǰ◌̱'</code>
10964
     *
10965
     * @see http://unicode.org/reports/tr21/tr21-5.html
10966
     *
10967
     * @param string      $str        <p>The input string.</p>
10968
     * @param bool        $full       [optional] <p>
10969
     *                                <b>true</b>, replace full case folding chars (default)<br>
10970
     *                                <b>false</b>, use only limited static array [UTF8::$COMMON_CASE_FOLD]
10971
     *                                </p>
10972
     * @param bool        $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
10973
     * @param string      $encoding   [optional] <p>Set the charset.</p>
10974
     * @param string|null $lang       [optional] <p>Set the language for special cases: az, el, lt, tr</p>
10975
     * @param bool        $lower      [optional] <p>Use lowercase string, otherwise use uppercase string. PS: uppercase
10976
     *                                is for some languages better ...</p>
10977
     *
10978
     * @psalm-pure
10979
     *
10980
     * @return string
10981
     */
10982
    public static function strtocasefold(
10983
        string $str,
10984
        bool $full = true,
10985
        bool $clean_utf8 = false,
10986
        string $encoding = 'UTF-8',
10987
        string $lang = null,
10988
        bool $lower = true
10989
    ): string {
10990 32
        if ($str === '') {
10991 5
            return '';
10992
        }
10993
10994 31
        if ($clean_utf8) {
10995
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
10996
            // if invalid characters are found in $haystack before $needle
10997 2
            $str = self::clean($str);
10998
        }
10999
11000 31
        $str = self::fixStrCaseHelper($str, $lower, $full);
11001
11002 31
        if ($lang === null && $encoding === 'UTF-8') {
11003 31
            if ($lower) {
11004 2
                return \mb_strtolower($str);
11005
            }
11006
11007 29
            return \mb_strtoupper($str);
11008
        }
11009
11010 2
        if ($lower) {
11011
            return self::strtolower($str, $encoding, false, $lang);
11012
        }
11013
11014 2
        return self::strtoupper($str, $encoding, false, $lang);
11015
    }
11016
11017
    /**
11018
     * Make a string lowercase.
11019
     *
11020
     * EXAMPLE: <code>UTF8::strtolower('DÉJÀ Σσς Iıİi'); // 'déjà σσς iıii'</code>
11021
     *
11022
     * @see http://php.net/manual/en/function.mb-strtolower.php
11023
     *
11024
     * @param string      $str                           <p>The string being lowercased.</p>
11025
     * @param string      $encoding                      [optional] <p>Set the charset for e.g. "mb_" function</p>
11026
     * @param bool        $clean_utf8                    [optional] <p>Remove non UTF-8 chars from the string.</p>
11027
     * @param string|null $lang                          [optional] <p>Set the language for special cases: az, el, lt,
11028
     *                                                   tr</p>
11029
     * @param bool        $try_to_keep_the_string_length [optional] <p>true === try to keep the string length: e.g. ẞ
11030
     *                                                   -> ß</p>
11031
     *
11032
     * @psalm-pure
11033
     *
11034
     * @return string
11035
     *                <p>String with all alphabetic characters converted to lowercase.</p>
11036
     */
11037
    public static function strtolower(
11038
        $str,
11039
        string $encoding = 'UTF-8',
11040
        bool $clean_utf8 = false,
11041
        string $lang = null,
11042
        bool $try_to_keep_the_string_length = false
11043
    ): string {
11044
        // init
11045 73
        $str = (string) $str;
11046
11047 73
        if ($str === '') {
11048 1
            return '';
11049
        }
11050
11051 72
        if ($clean_utf8) {
11052
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
11053
            // if invalid characters are found in $haystack before $needle
11054 2
            $str = self::clean($str);
11055
        }
11056
11057
        // hack for old php version or for the polyfill ...
11058 72
        if ($try_to_keep_the_string_length) {
11059
            $str = self::fixStrCaseHelper($str, true);
11060
        }
11061
11062 72
        if ($lang === null && $encoding === 'UTF-8') {
11063 13
            return \mb_strtolower($str);
11064
        }
11065
11066 61
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
11067
11068 61
        if ($lang !== null) {
11069 2
            if (self::$SUPPORT['intl'] === true) {
11070 2
                if (self::$INTL_TRANSLITERATOR_LIST === null) {
11071
                    self::$INTL_TRANSLITERATOR_LIST = self::getData('transliterator_list');
11072
                }
11073
11074 2
                $language_code = $lang . '-Lower';
11075 2
                if (!\in_array($language_code, self::$INTL_TRANSLITERATOR_LIST, true)) {
11076
                    /**
11077
                     * @psalm-suppress ImpureFunctionCall - is is only a warning
11078
                     */
11079
                    \trigger_error('UTF8::strtolower() cannot handle special language: ' . $lang . ' | supported: ' . \print_r(self::$INTL_TRANSLITERATOR_LIST, true), \E_USER_WARNING);
0 ignored issues
show
Bug introduced by
Are you sure print_r(self::INTL_TRANSLITERATOR_LIST, true) of type string|true can be used in concatenation? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

11079
                    \trigger_error('UTF8::strtolower() cannot handle special language: ' . $lang . ' | supported: ' . /** @scrutinizer ignore-type */ \print_r(self::$INTL_TRANSLITERATOR_LIST, true), \E_USER_WARNING);
Loading history...
11080
11081
                    $language_code = 'Any-Lower';
11082
                }
11083
11084 2
                return (string) \transliterator_transliterate($language_code, $str);
11085
            }
11086
11087
            /**
11088
             * @psalm-suppress ImpureFunctionCall - is is only a warning
11089
             */
11090
            \trigger_error('UTF8::strtolower() without intl cannot handle the "lang" parameter: ' . $lang, \E_USER_WARNING);
11091
        }
11092
11093
        // always fallback via symfony polyfill
11094 61
        return \mb_strtolower($str, $encoding);
11095
    }
11096
11097
    /**
11098
     * Make a string uppercase.
11099
     *
11100
     * EXAMPLE: <code>UTF8::strtoupper('Déjà Σσς Iıİi'); // 'DÉJÀ ΣΣΣ IIİI'</code>
11101
     *
11102
     * @see http://php.net/manual/en/function.mb-strtoupper.php
11103
     *
11104
     * @param string      $str                           <p>The string being uppercased.</p>
11105
     * @param string      $encoding                      [optional] <p>Set the charset.</p>
11106
     * @param bool        $clean_utf8                    [optional] <p>Remove non UTF-8 chars from the string.</p>
11107
     * @param string|null $lang                          [optional] <p>Set the language for special cases: az, el, lt,
11108
     *                                                   tr</p>
11109
     * @param bool        $try_to_keep_the_string_length [optional] <p>true === try to keep the string length: e.g. ẞ
11110
     *                                                   -> ß</p>
11111
     *
11112
     * @psalm-pure
11113
     *
11114
     * @return string
11115
     *                <p>String with all alphabetic characters converted to uppercase.</p>
11116
     */
11117
    public static function strtoupper(
11118
        $str,
11119
        string $encoding = 'UTF-8',
11120
        bool $clean_utf8 = false,
11121
        string $lang = null,
11122
        bool $try_to_keep_the_string_length = false
11123
    ): string {
11124
        // init
11125 17
        $str = (string) $str;
11126
11127 17
        if ($str === '') {
11128 1
            return '';
11129
        }
11130
11131 16
        if ($clean_utf8) {
11132
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
11133
            // if invalid characters are found in $haystack before $needle
11134 2
            $str = self::clean($str);
11135
        }
11136
11137
        // hack for old php version or for the polyfill ...
11138 16
        if ($try_to_keep_the_string_length) {
11139 2
            $str = self::fixStrCaseHelper($str);
11140
        }
11141
11142 16
        if ($lang === null && $encoding === 'UTF-8') {
11143 8
            return \mb_strtoupper($str);
11144
        }
11145
11146 10
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
11147
11148 10
        if ($lang !== null) {
11149 2
            if (self::$SUPPORT['intl'] === true) {
11150 2
                if (self::$INTL_TRANSLITERATOR_LIST === null) {
11151
                    self::$INTL_TRANSLITERATOR_LIST = self::getData('transliterator_list');
11152
                }
11153
11154 2
                $language_code = $lang . '-Upper';
11155 2
                if (!\in_array($language_code, self::$INTL_TRANSLITERATOR_LIST, true)) {
11156
                    /**
11157
                     * @psalm-suppress ImpureFunctionCall - is is only a warning
11158
                     */
11159
                    \trigger_error('UTF8::strtoupper() without intl for special language: ' . $lang, \E_USER_WARNING);
11160
11161
                    $language_code = 'Any-Upper';
11162
                }
11163
11164 2
                return (string) \transliterator_transliterate($language_code, $str);
11165
            }
11166
11167
            /**
11168
             * @psalm-suppress ImpureFunctionCall - is is only a warning
11169
             */
11170
            \trigger_error('UTF8::strtolower() without intl cannot handle the "lang"-parameter: ' . $lang, \E_USER_WARNING);
11171
        }
11172
11173
        // always fallback via symfony polyfill
11174 10
        return \mb_strtoupper($str, $encoding);
11175
    }
11176
11177
    /**
11178
     * Translate characters or replace sub-strings.
11179
     *
11180
     * EXAMPLE:
11181
     * <code>
11182
     * $array = [
11183
     *     'Hello'   => '○●◎',
11184
     *     '中文空白' => 'earth',
11185
     * ];
11186
     * UTF8::strtr('Hello 中文空白', $array); // '○●◎ earth'
11187
     * </code>
11188
     *
11189
     * @see http://php.net/manual/en/function.strtr.php
11190
     *
11191
     * @param string          $str  <p>The string being translated.</p>
11192
     * @param string|string[] $from <p>The string replacing from.</p>
11193
     * @param string|string[] $to   [optional] <p>The string being translated to to.</p>
11194
     *
11195
     * @psalm-pure
11196
     *
11197
     * @return string
11198
     *                <p>This function returns a copy of str, translating all occurrences of each character in "from"
11199
     *                to the corresponding character in "to".</p>
11200
     */
11201
    public static function strtr(string $str, $from, $to = ''): string
11202
    {
11203 2
        if ($str === '') {
11204
            return '';
11205
        }
11206
11207 2
        if ($from === $to) {
11208
            return $str;
11209
        }
11210
11211 2
        if ($to !== '') {
11212 2
            if (!\is_array($from)) {
11213 2
                $from = self::str_split($from);
11214
            }
11215
11216 2
            if (!\is_array($to)) {
11217 2
                $to = self::str_split($to);
11218
            }
11219
11220 2
            $count_from = \count($from);
11221 2
            $count_to = \count($to);
11222
11223 2
            if ($count_from !== $count_to) {
11224 2
                if ($count_from > $count_to) {
11225 2
                    $from = \array_slice($from, 0, $count_to);
11226 2
                } elseif ($count_from < $count_to) {
11227 2
                    $to = \array_slice($to, 0, $count_from);
11228
                }
11229
            }
11230
11231 2
            $from = \array_combine($from, $to);
11232 2
            if ($from === false) {
11233
                throw new \InvalidArgumentException('The number of elements for each array isn\'t equal or the arrays are empty: (from: ' . \print_r($from, true) . ' | to: ' . \print_r($to, true) . ')');
0 ignored issues
show
Bug introduced by
Are you sure print_r($from, true) of type string|true can be used in concatenation? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

11233
                throw new \InvalidArgumentException('The number of elements for each array isn\'t equal or the arrays are empty: (from: ' . /** @scrutinizer ignore-type */ \print_r($from, true) . ' | to: ' . \print_r($to, true) . ')');
Loading history...
Bug introduced by
Are you sure print_r($to, true) of type string|true can be used in concatenation? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

11233
                throw new \InvalidArgumentException('The number of elements for each array isn\'t equal or the arrays are empty: (from: ' . \print_r($from, true) . ' | to: ' . /** @scrutinizer ignore-type */ \print_r($to, true) . ')');
Loading history...
11234
            }
11235
        }
11236
11237 2
        if (\is_string($from)) {
11238 2
            return \str_replace($from, $to, $str);
0 ignored issues
show
Bug introduced by
It seems like $to can also be of type array<mixed,string[]>; however, parameter $replace of str_replace() does only seem to accept string|string[], maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

11238
            return \str_replace($from, /** @scrutinizer ignore-type */ $to, $str);
Loading history...
11239
        }
11240
11241 2
        return \strtr($str, $from);
11242
    }
11243
11244
    /**
11245
     * Return the width of a string.
11246
     *
11247
     * INFO: use UTF8::strlen() for the byte-length
11248
     *
11249
     * EXAMPLE: <code>UTF8::strwidth("Iñtërnâtiôn\xE9àlizætiøn")); // 21</code>
11250
     *
11251
     * @param string $str        <p>The input string.</p>
11252
     * @param string $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
11253
     * @param bool   $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
11254
     *
11255
     * @psalm-pure
11256
     *
11257
     * @return int
11258
     */
11259
    public static function strwidth(
11260
        string $str,
11261
        string $encoding = 'UTF-8',
11262
        bool $clean_utf8 = false
11263
    ): int {
11264 2
        if ($str === '') {
11265 2
            return 0;
11266
        }
11267
11268 2
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
11269 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
11270
        }
11271
11272 2
        if ($clean_utf8) {
11273
            // iconv and mbstring are not tolerant to invalid encoding
11274
            // further, their behaviour is inconsistent with that of PHP's substr
11275 2
            $str = self::clean($str);
11276
        }
11277
11278
        //
11279
        // fallback via mbstring
11280
        //
11281
11282 2
        if (self::$SUPPORT['mbstring'] === true) {
11283 2
            if ($encoding === 'UTF-8') {
11284 2
                return \mb_strwidth($str);
11285
            }
11286
11287
            return \mb_strwidth($str, $encoding);
11288
        }
11289
11290
        //
11291
        // fallback via vanilla php
11292
        //
11293
11294
        if ($encoding !== 'UTF-8') {
11295
            $str = self::encode('UTF-8', $str, false, $encoding);
11296
        }
11297
11298
        $wide = 0;
11299
        $str = (string) \preg_replace('/[\x{1100}-\x{115F}\x{2329}\x{232A}\x{2E80}-\x{303E}\x{3040}-\x{A4CF}\x{AC00}-\x{D7A3}\x{F900}-\x{FAFF}\x{FE10}-\x{FE19}\x{FE30}-\x{FE6F}\x{FF00}-\x{FF60}\x{FFE0}-\x{FFE6}\x{20000}-\x{2FFFD}\x{30000}-\x{3FFFD}]/u', '', $str, -1, $wide);
11300
11301
        return ($wide << 1) + (int) self::strlen($str);
11302
    }
11303
11304
    /**
11305
     * Get part of a string.
11306
     *
11307
     * EXAMPLE: <code>UTF8::substr('中文空白', 1, 2); // '文空'</code>
11308
     *
11309
     * @see http://php.net/manual/en/function.mb-substr.php
11310
     *
11311
     * @param string   $str        <p>The string being checked.</p>
11312
     * @param int      $offset     <p>The first position used in str.</p>
11313
     * @param int|null $length     [optional] <p>The maximum length of the returned string.</p>
11314
     * @param string   $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
11315
     * @param bool     $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
11316
     *
11317
     * @psalm-pure
11318
     *
11319
     * @return false|string
11320
     *                      The portion of <i>str</i> specified by the <i>offset</i> and
11321
     *                      <i>length</i> parameters.</p><p>If <i>str</i> is shorter than <i>offset</i>
11322
     *                      characters long, <b>FALSE</b> will be returned.
11323
     */
11324
    public static function substr(
11325
        string $str,
11326
        int $offset = 0,
11327
        int $length = null,
11328
        string $encoding = 'UTF-8',
11329
        bool $clean_utf8 = false
11330
    ) {
11331
        // empty string
11332 172
        if ($str === '' || $length === 0) {
11333 8
            return '';
11334
        }
11335
11336 168
        if ($clean_utf8) {
11337
            // iconv and mbstring are not tolerant to invalid encoding
11338
            // further, their behaviour is inconsistent with that of PHP's substr
11339 2
            $str = self::clean($str);
11340
        }
11341
11342
        // whole string
11343 168
        if (!$offset && $length === null) {
11344 7
            return $str;
11345
        }
11346
11347 163
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
11348 19
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
11349
        }
11350
11351
        //
11352
        // fallback via mbstring
11353
        //
11354
11355 163
        if (self::$SUPPORT['mbstring'] === true && $encoding === 'UTF-8') {
11356 161
            if ($length === null) {
11357 64
                return \mb_substr($str, $offset);
11358
            }
11359
11360 102
            return \mb_substr($str, $offset, $length);
11361
        }
11362
11363
        //
11364
        // fallback for binary || ascii only
11365
        //
11366
11367
        if (
11368 4
            $encoding === 'CP850'
11369
            ||
11370 4
            $encoding === 'ASCII'
11371
        ) {
11372
            if ($length === null) {
11373
                return \substr($str, $offset);
11374
            }
11375
11376
            return \substr($str, $offset, $length);
11377
        }
11378
11379
        // otherwise we need the string-length
11380 4
        $str_length = 0;
11381 4
        if ($offset || $length === null) {
11382 4
            $str_length = self::strlen($str, $encoding);
11383
        }
11384
11385
        // e.g.: invalid chars + mbstring not installed
11386 4
        if ($str_length === false) {
11387
            return false;
11388
        }
11389
11390
        // empty string
11391 4
        if ($offset === $str_length && !$length) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $length of type integer|null is loosely compared to false; this is ambiguous if the integer can be 0. You might want to explicitly use === null instead.

In PHP, under loose comparison (like ==, or !=, or switch conditions), values of different types might be equal.

For integer values, zero is a special case, in particular the following results might be unexpected:

0   == false // true
0   == null  // true
123 == false // false
123 == null  // false

// It is often better to use strict comparison
0 === false // false
0 === null  // false
Loading history...
11392
            return '';
11393
        }
11394
11395
        // impossible
11396 4
        if ($offset && $offset > $str_length) {
11397
            return '';
11398
        }
11399
11400 4
        $length = $length ?? $str_length;
11401
11402
        if (
11403 4
            $encoding !== 'UTF-8'
11404
            &&
11405 4
            self::$SUPPORT['mbstring'] === false
11406
        ) {
11407
            /**
11408
             * @psalm-suppress ImpureFunctionCall - is is only a warning
11409
             */
11410 2
            \trigger_error('UTF8::substr() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
11411
        }
11412
11413
        //
11414
        // fallback via intl
11415
        //
11416
11417
        if (
11418 4
            $encoding === 'UTF-8' // INFO: "grapheme_substr()" can't handle other encodings
11419
            &&
11420 4
            $offset >= 0 // grapheme_substr() can't handle negative offset
11421
            &&
11422 4
            self::$SUPPORT['intl'] === true
11423
        ) {
11424
            $return_tmp = \grapheme_substr($str, $offset, $length);
11425
            if ($return_tmp !== false) {
11426
                return $return_tmp;
11427
            }
11428
        }
11429
11430
        //
11431
        // fallback via iconv
11432
        //
11433
11434
        if (
11435 4
            $length >= 0 // "iconv_substr()" can't handle negative length
11436
            &&
11437 4
            self::$SUPPORT['iconv'] === true
11438
        ) {
11439
            $return_tmp = \iconv_substr($str, $offset, $length);
11440
            if ($return_tmp !== false) {
11441
                return $return_tmp;
11442
            }
11443
        }
11444
11445
        //
11446
        // fallback for ascii only
11447
        //
11448
11449 4
        if (ASCII::is_ascii($str)) {
11450
            return \substr($str, $offset, $length);
11451
        }
11452
11453
        //
11454
        // fallback via vanilla php
11455
        //
11456
11457
        // split to array, and remove invalid characters
11458
        // &&
11459
        // extract relevant part, and join to make sting again
11460 4
        return \implode('', \array_slice(self::str_split($str), $offset, $length));
11461
    }
11462
11463
    /**
11464
     * Binary-safe comparison of two strings from an offset, up to a length of characters.
11465
     *
11466
     * EXAMPLE: <code>
11467
     * UTF8::substr_compare("○●◎\r", '●◎', 0, 2); // -1
11468
     * UTF8::substr_compare("○●◎\r", '◎●', 1, 2); // 1
11469
     * UTF8::substr_compare("○●◎\r", '●◎', 1, 2); // 0
11470
     * </code>
11471
     *
11472
     * @param string   $str1               <p>The main string being compared.</p>
11473
     * @param string   $str2               <p>The secondary string being compared.</p>
11474
     * @param int      $offset             [optional] <p>The start position for the comparison. If negative, it starts
11475
     *                                     counting from the end of the string.</p>
11476
     * @param int|null $length             [optional] <p>The length of the comparison. The default value is the largest
11477
     *                                     of the length of the str compared to the length of main_str less the
11478
     *                                     offset.</p>
11479
     * @param bool     $case_insensitivity [optional] <p>If case_insensitivity is TRUE, comparison is case
11480
     *                                     insensitive.</p>
11481
     * @param string   $encoding           [optional] <p>Set the charset for e.g. "mb_" function</p>
11482
     *
11483
     * @psalm-pure
11484
     *
11485
     * @return int
11486
     *             <strong>&lt; 0</strong> if str1 is less than str2;<br>
11487
     *             <strong>&gt; 0</strong> if str1 is greater than str2,<br>
11488
     *             <strong>0</strong> if they are equal
11489
     */
11490
    public static function substr_compare(
11491
        string $str1,
11492
        string $str2,
11493
        int $offset = 0,
11494
        int $length = null,
11495
        bool $case_insensitivity = false,
11496
        string $encoding = 'UTF-8'
11497
    ): int {
11498
        if (
11499 2
            $offset !== 0
11500
            ||
11501 2
            $length !== null
11502
        ) {
11503 2
            if ($encoding === 'UTF-8') {
11504 2
                if ($length === null) {
11505 2
                    $str1 = (string) \mb_substr($str1, $offset);
11506
                } else {
11507 2
                    $str1 = (string) \mb_substr($str1, $offset, $length);
11508
                }
11509 2
                $str2 = (string) \mb_substr($str2, 0, (int) self::strlen($str1));
11510
            } else {
11511
                $encoding = self::normalize_encoding($encoding, 'UTF-8');
11512
11513
                $str1 = (string) self::substr($str1, $offset, $length, $encoding);
11514
                $str2 = (string) self::substr($str2, 0, (int) self::strlen($str1), $encoding);
11515
            }
11516
        }
11517
11518 2
        if ($case_insensitivity) {
11519 2
            return self::strcasecmp($str1, $str2, $encoding);
11520
        }
11521
11522 2
        return self::strcmp($str1, $str2);
11523
    }
11524
11525
    /**
11526
     * Count the number of substring occurrences.
11527
     *
11528
     * EXAMPLE: <code>UTF8::substr_count('中文空白', '文空', 1, 2); // 1</code>
11529
     *
11530
     * @see http://php.net/manual/en/function.substr-count.php
11531
     *
11532
     * @param string   $haystack   <p>The string to search in.</p>
11533
     * @param string   $needle     <p>The substring to search for.</p>
11534
     * @param int      $offset     [optional] <p>The offset where to start counting.</p>
11535
     * @param int|null $length     [optional] <p>
11536
     *                             The maximum length after the specified offset to search for the
11537
     *                             substring. It outputs a warning if the offset plus the length is
11538
     *                             greater than the haystack length.
11539
     *                             </p>
11540
     * @param string   $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
11541
     * @param bool     $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
11542
     *
11543
     * @psalm-pure
11544
     *
11545
     * @return false|int
11546
     *                   <p>This functions returns an integer or false if there isn't a string.</p>
11547
     */
11548
    public static function substr_count(
11549
        string $haystack,
11550
        string $needle,
11551
        int $offset = 0,
11552
        int $length = null,
11553
        string $encoding = 'UTF-8',
11554
        bool $clean_utf8 = false
11555
    ) {
11556 5
        if ($needle === '') {
11557 2
            return false;
11558
        }
11559
11560 5
        if ($haystack === '') {
11561 2
            if (\PHP_VERSION_ID >= 80000) {
11562
                return 0;
11563
            }
11564
11565 2
            return 0;
11566
        }
11567
11568 5
        if ($length === 0) {
11569 2
            return 0;
11570
        }
11571
11572 5
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
11573 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
11574
        }
11575
11576 5
        if ($clean_utf8) {
11577
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
11578
            // if invalid characters are found in $haystack before $needle
11579
            $needle = self::clean($needle);
11580
            $haystack = self::clean($haystack);
11581
        }
11582
11583 5
        if ($offset || $length > 0) {
11584 2
            if ($length === null) {
11585 2
                $length_tmp = self::strlen($haystack, $encoding);
11586 2
                if ($length_tmp === false) {
11587
                    return false;
11588
                }
11589 2
                $length = $length_tmp;
11590
            }
11591
11592 2
            if ($encoding === 'UTF-8') {
11593 2
                $haystack = (string) \mb_substr($haystack, $offset, $length);
11594
            } else {
11595 2
                $haystack = (string) \mb_substr($haystack, $offset, $length, $encoding);
11596
            }
11597
        }
11598
11599
        if (
11600 5
            $encoding !== 'UTF-8'
11601
            &&
11602 5
            self::$SUPPORT['mbstring'] === false
11603
        ) {
11604
            /**
11605
             * @psalm-suppress ImpureFunctionCall - is is only a warning
11606
             */
11607
            \trigger_error('UTF8::substr_count() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
11608
        }
11609
11610 5
        if (self::$SUPPORT['mbstring'] === true) {
11611 5
            if ($encoding === 'UTF-8') {
11612 5
                return \mb_substr_count($haystack, $needle);
11613
            }
11614
11615 2
            return \mb_substr_count($haystack, $needle, $encoding);
11616
        }
11617
11618
        \preg_match_all('/' . \preg_quote($needle, '/') . '/us', $haystack, $matches, \PREG_SET_ORDER);
11619
11620
        return \count($matches);
11621
    }
11622
11623
    /**
11624
     * Count the number of substring occurrences.
11625
     *
11626
     * @param string   $haystack <p>
11627
     *                           The string being checked.
11628
     *                           </p>
11629
     * @param string   $needle   <p>
11630
     *                           The string being found.
11631
     *                           </p>
11632
     * @param int      $offset   [optional] <p>
11633
     *                           The offset where to start counting
11634
     *                           </p>
11635
     * @param int|null $length   [optional] <p>
11636
     *                           The maximum length after the specified offset to search for the
11637
     *                           substring. It outputs a warning if the offset plus the length is
11638
     *                           greater than the haystack length.
11639
     *                           </p>
11640
     *
11641
     * @psalm-pure
11642
     *
11643
     * @return false|int
11644
     *                   <p>The number of times the
11645
     *                   needle substring occurs in the
11646
     *                   haystack string.</p>
11647
     */
11648
    public static function substr_count_in_byte(
11649
        string $haystack,
11650
        string $needle,
11651
        int $offset = 0,
11652
        int $length = null
11653
    ) {
11654 4
        if ($haystack === '' || $needle === '') {
11655 1
            return 0;
11656
        }
11657
11658
        if (
11659 3
            ($offset || $length !== null)
11660
            &&
11661 3
            self::$SUPPORT['mbstring_func_overload'] === true
11662
        ) {
11663
            if ($length === null) {
11664
                $length_tmp = self::strlen($haystack);
11665
                if ($length_tmp === false) {
11666
                    return false;
11667
                }
11668
                $length = $length_tmp;
11669
            }
11670
11671
            if (
11672
                (
11673
                    $length !== 0
11674
                    &&
11675
                    $offset !== 0
11676
                )
11677
                &&
11678
                ($length + $offset) <= 0
11679
                &&
11680
                \PHP_VERSION_ID < 71000 // output from "substr_count()" have changed in PHP 7.1
11681
            ) {
11682
                return false;
11683
            }
11684
11685
            /** @var false|string $haystack_tmp - needed for PhpStan (stubs error) */
11686
            $haystack_tmp = \substr($haystack, $offset, $length);
11687
            if ($haystack_tmp === false) {
11688
                $haystack_tmp = '';
11689
            }
11690
            $haystack = (string) $haystack_tmp;
11691
        }
11692
11693 3
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
11694
            // "mb_" is available if overload is used, so use it ...
11695
            return \mb_substr_count($haystack, $needle, 'CP850'); // 8-BIT
11696
        }
11697
11698 3
        if ($length === null) {
11699 3
            return \substr_count($haystack, $needle, $offset);
11700
        }
11701
11702
        return \substr_count($haystack, $needle, $offset, $length);
11703
    }
11704
11705
    /**
11706
     * Returns the number of occurrences of $substring in the given string.
11707
     * By default, the comparison is case-sensitive, but can be made insensitive
11708
     * by setting $case_sensitive to false.
11709
     *
11710
     * @param string $str            <p>The input string.</p>
11711
     * @param string $substring      <p>The substring to search for.</p>
11712
     * @param bool   $case_sensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
11713
     * @param string $encoding       [optional] <p>Set the charset for e.g. "mb_" function</p>
11714
     *
11715
     * @psalm-pure
11716
     *
11717
     * @return int
11718
     */
11719
    public static function substr_count_simple(
11720
        string $str,
11721
        string $substring,
11722
        bool $case_sensitive = true,
11723
        string $encoding = 'UTF-8'
11724
    ): int {
11725 15
        if ($str === '' || $substring === '') {
11726 2
            return 0;
11727
        }
11728
11729 13
        if ($encoding === 'UTF-8') {
11730 7
            if ($case_sensitive) {
11731
                return (int) \mb_substr_count($str, $substring);
11732
            }
11733
11734 7
            return (int) \mb_substr_count(
11735 7
                \mb_strtoupper($str),
11736 7
                \mb_strtoupper($substring)
11737
            );
11738
        }
11739
11740 6
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
11741
11742 6
        if ($case_sensitive) {
11743 3
            return (int) \mb_substr_count($str, $substring, $encoding);
11744
        }
11745
11746 3
        return (int) \mb_substr_count(
11747 3
            self::strtocasefold($str, true, false, $encoding, null, false),
11748 3
            self::strtocasefold($substring, true, false, $encoding, null, false),
11749 3
            $encoding
11750
        );
11751
    }
11752
11753
    /**
11754
     * Removes a prefix ($needle) from the beginning of the string ($haystack), case-insensitive.
11755
     *
11756
     * EXMAPLE: <code>
11757
     * UTF8::substr_ileft('ΚόσμεMiddleEnd', 'Κόσμε'); // 'MiddleEnd'
11758
     * UTF8::substr_ileft('ΚόσμεMiddleEnd', 'κόσμε'); // 'MiddleEnd'
11759
     * </code>
11760
     *
11761
     * @param string $haystack <p>The string to search in.</p>
11762
     * @param string $needle   <p>The substring to search for.</p>
11763
     *
11764
     * @psalm-pure
11765
     *
11766
     * @return string
11767
     *                <p>Return the sub-string.</p>
11768
     */
11769
    public static function substr_ileft(string $haystack, string $needle): string
11770
    {
11771 2
        if ($haystack === '') {
11772 2
            return '';
11773
        }
11774
11775 2
        if ($needle === '') {
11776 2
            return $haystack;
11777
        }
11778
11779 2
        if (self::str_istarts_with($haystack, $needle)) {
11780 2
            $haystack = (string) \mb_substr($haystack, (int) self::strlen($needle));
11781
        }
11782
11783 2
        return $haystack;
11784
    }
11785
11786
    /**
11787
     * Get part of a string process in bytes.
11788
     *
11789
     * @param string   $str    <p>The string being checked.</p>
11790
     * @param int      $offset <p>The first position used in str.</p>
11791
     * @param int|null $length [optional] <p>The maximum length of the returned string.</p>
11792
     *
11793
     * @psalm-pure
11794
     *
11795
     * @return false|string
11796
     *                      The portion of <i>str</i> specified by the <i>offset</i> and
11797
     *                      <i>length</i> parameters.</p><p>If <i>str</i> is shorter than <i>offset</i>
11798
     *                      characters long, <b>FALSE</b> will be returned.
11799
     */
11800
    public static function substr_in_byte(string $str, int $offset = 0, int $length = null)
11801
    {
11802
        // empty string
11803 1
        if ($str === '' || $length === 0) {
11804
            return '';
11805
        }
11806
11807
        // whole string
11808 1
        if (!$offset && $length === null) {
11809
            return $str;
11810
        }
11811
11812 1
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
11813
            // "mb_" is available if overload is used, so use it ...
11814
            return \mb_substr($str, $offset, $length, 'CP850'); // 8-BIT
11815
        }
11816
11817 1
        return \substr($str, $offset, $length ?? 2147483647);
11818
    }
11819
11820
    /**
11821
     * Removes a suffix ($needle) from the end of the string ($haystack), case-insensitive.
11822
     *
11823
     * EXAMPLE: <code>
11824
     * UTF8::substr_iright('BeginMiddleΚόσμε', 'Κόσμε'); // 'BeginMiddle'
11825
     * UTF8::substr_iright('BeginMiddleΚόσμε', 'κόσμε'); // 'BeginMiddle'
11826
     * </code>
11827
     *
11828
     * @param string $haystack <p>The string to search in.</p>
11829
     * @param string $needle   <p>The substring to search for.</p>
11830
     *
11831
     * @psalm-pure
11832
     *
11833
     * @return string
11834
     *                <p>Return the sub-string.<p>
11835
     */
11836
    public static function substr_iright(string $haystack, string $needle): string
11837
    {
11838 2
        if ($haystack === '') {
11839 2
            return '';
11840
        }
11841
11842 2
        if ($needle === '') {
11843 2
            return $haystack;
11844
        }
11845
11846 2
        if (self::str_iends_with($haystack, $needle)) {
11847 2
            $haystack = (string) \mb_substr($haystack, 0, (int) self::strlen($haystack) - (int) self::strlen($needle));
11848
        }
11849
11850 2
        return $haystack;
11851
    }
11852
11853
    /**
11854
     * Removes a prefix ($needle) from the beginning of the string ($haystack).
11855
     *
11856
     * EXAMPLE: <code>
11857
     * UTF8::substr_left('ΚόσμεMiddleEnd', 'Κόσμε'); // 'MiddleEnd'
11858
     * UTF8::substr_left('ΚόσμεMiddleEnd', 'κόσμε'); // 'ΚόσμεMiddleEnd'
11859
     * </code>
11860
     *
11861
     * @param string $haystack <p>The string to search in.</p>
11862
     * @param string $needle   <p>The substring to search for.</p>
11863
     *
11864
     * @psalm-pure
11865
     *
11866
     * @return string
11867
     *                <p>Return the sub-string.</p>
11868
     */
11869
    public static function substr_left(string $haystack, string $needle): string
11870
    {
11871 2
        if ($haystack === '') {
11872 2
            return '';
11873
        }
11874
11875 2
        if ($needle === '') {
11876 2
            return $haystack;
11877
        }
11878
11879 2
        if (self::str_starts_with($haystack, $needle)) {
11880 2
            $haystack = (string) \mb_substr($haystack, (int) self::strlen($needle));
11881
        }
11882
11883 2
        return $haystack;
11884
    }
11885
11886
    /**
11887
     * Replace text within a portion of a string.
11888
     *
11889
     * EXAMPLE: <code>UTF8::substr_replace(array('Iñtërnâtiônàlizætiøn', 'foo'), 'æ', 1); // array('Iæñtërnâtiônàlizætiøn', 'fæoo')</code>
11890
     *
11891
     * source: https://gist.github.com/stemar/8287074
11892
     *
11893
     * @param string|string[] $str         <p>The input string or an array of stings.</p>
11894
     * @param string|string[] $replacement <p>The replacement string or an array of stings.</p>
11895
     * @param int|int[]       $offset      <p>
11896
     *                                     If start is positive, the replacing will begin at the start'th offset
11897
     *                                     into string.
11898
     *                                     <br><br>
11899
     *                                     If start is negative, the replacing will begin at the start'th character
11900
     *                                     from the end of string.
11901
     *                                     </p>
11902
     * @param int|int[]|null  $length      [optional] <p>If given and is positive, it represents the length of the
11903
     *                                     portion of string which is to be replaced. If it is negative, it
11904
     *                                     represents the number of characters from the end of string at which to
11905
     *                                     stop replacing. If it is not given, then it will default to strlen(
11906
     *                                     string ); i.e. end the replacing at the end of string. Of course, if
11907
     *                                     length is zero then this function will have the effect of inserting
11908
     *                                     replacement into string at the given start offset.</p>
11909
     * @param string          $encoding    [optional] <p>Set the charset for e.g. "mb_" function</p>
11910
     *
11911
     * @psalm-pure
11912
     *
11913
     * @return string|string[]
11914
     *                         <p>The result string is returned. If string is an array then array is returned.</p>
11915
     *
11916
     * @template TSubstrReplace
11917
     * @phpstan-param TSubstrReplace $str
11918
     * @phpstan-return TSubstrReplace
11919
     */
11920
    public static function substr_replace(
11921
        $str,
11922
        $replacement,
11923
        $offset,
11924
        $length = null,
11925
        string $encoding = 'UTF-8'
11926
    ) {
11927 10
        if (\is_array($str)) {
11928 1
            $num = \count($str);
11929
11930
            // the replacement
11931 1
            if (\is_array($replacement)) {
11932 1
                $replacement = \array_slice($replacement, 0, $num);
11933
            } else {
11934 1
                $replacement = \array_pad([$replacement], $num, $replacement);
11935
            }
11936
11937
            // the offset
11938 1
            if (\is_array($offset)) {
11939 1
                $offset = \array_slice($offset, 0, $num);
11940 1
                foreach ($offset as &$value_tmp) {
11941 1
                    $value_tmp = (int) $value_tmp === $value_tmp ? $value_tmp : 0;
11942
                }
11943 1
                unset($value_tmp);
11944
            } else {
11945 1
                $offset = \array_pad([$offset], $num, $offset);
11946
            }
11947
11948
            // the length
11949 1
            if ($length === null) {
11950 1
                $length = \array_fill(0, $num, 0);
11951 1
            } elseif (\is_array($length)) {
11952 1
                $length = \array_slice($length, 0, $num);
11953 1
                foreach ($length as &$value_tmp_V2) {
11954 1
                    $value_tmp_V2 = (int) $value_tmp_V2 === $value_tmp_V2 ? $value_tmp_V2 : $num;
11955
                }
11956 1
                unset($value_tmp_V2);
11957
            } else {
11958 1
                $length = \array_pad([$length], $num, $length);
11959
            }
11960
11961
            // recursive call
11962
            /** @phpstan-ignore-next-line - phpstan currently can't handle recursive calls */
11963 1
            return \array_map([self::class, 'substr_replace'], $str, $replacement, $offset, $length);
11964
        }
11965
11966 10
        if (\is_array($replacement)) {
11967 1
            if ($replacement !== []) {
11968 1
                $replacement = $replacement[0];
11969
            } else {
11970 1
                $replacement = '';
11971
            }
11972
        }
11973
11974
        // init
11975 10
        $str = (string) $str;
11976 10
        $replacement = (string) $replacement;
11977
11978 10
        if (\is_array($length)) {
11979
            throw new \InvalidArgumentException('Parameter "$length" can only be an array, if "$str" is also an array.');
11980
        }
11981
11982 10
        if (\is_array($offset)) {
11983
            throw new \InvalidArgumentException('Parameter "$offset" can only be an array, if "$str" is also an array.');
11984
        }
11985
11986 10
        if ($str === '') {
11987 1
            return $replacement;
11988
        }
11989
11990 9
        if (self::$SUPPORT['mbstring'] === true) {
11991 9
            $string_length = (int) self::strlen($str, $encoding);
11992
11993 9
            if ($offset < 0) {
11994 1
                $offset = (int) \max(0, $string_length + $offset);
11995 9
            } elseif ($offset > $string_length) {
11996 1
                $offset = $string_length;
11997
            }
11998
11999 9
            if ($length !== null && $length < 0) {
12000 1
                $length = (int) \max(0, $string_length - $offset + $length);
12001 9
            } elseif ($length === null || $length > $string_length) {
12002 4
                $length = $string_length;
12003
            }
12004
12005 9
            if (($offset + $length) > $string_length) {
12006 4
                $length = $string_length - $offset;
12007
            }
12008
12009 9
            return ((string) \mb_substr($str, 0, $offset, $encoding)) .
12010 9
                   $replacement .
12011 9
                   ((string) \mb_substr($str, $offset + $length, $string_length - $offset - $length, $encoding));
12012
        }
12013
12014
        //
12015
        // fallback for ascii only
12016
        //
12017
12018
        if (ASCII::is_ascii($str)) {
12019
            return ($length === null) ?
12020
                \substr_replace($str, $replacement, $offset) :
12021
                \substr_replace($str, $replacement, $offset, $length);
12022
        }
12023
12024
        //
12025
        // fallback via vanilla php
12026
        //
12027
12028
        \preg_match_all('/./us', $str, $str_matches);
12029
        \preg_match_all('/./us', $replacement, $replacement_matches);
12030
12031
        if ($length === null) {
12032
            $length_tmp = self::strlen($str, $encoding);
12033
            if ($length_tmp === false) {
12034
                // e.g.: non mbstring support + invalid chars
12035
                return '';
12036
            }
12037
            $length = $length_tmp;
12038
        }
12039
12040
        \array_splice($str_matches[0], $offset, $length, $replacement_matches[0]);
12041
12042
        return \implode('', $str_matches[0]);
12043
    }
12044
12045
    /**
12046
     * Removes a suffix ($needle) from the end of the string ($haystack).
12047
     *
12048
     * EXAMPLE: <code>
12049
     * UTF8::substr_right('BeginMiddleΚόσμε', 'Κόσμε'); // 'BeginMiddle'
12050
     * UTF8::substr_right('BeginMiddleΚόσμε', 'κόσμε'); // 'BeginMiddleΚόσμε'
12051
     * </code>
12052
     *
12053
     * @param string $haystack <p>The string to search in.</p>
12054
     * @param string $needle   <p>The substring to search for.</p>
12055
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
12056
     *
12057
     * @psalm-pure
12058
     *
12059
     * @return string
12060
     *                <p>Return the sub-string.</p>
12061
     */
12062
    public static function substr_right(
12063
        string $haystack,
12064
        string $needle,
12065
        string $encoding = 'UTF-8'
12066
    ): string {
12067 2
        if ($haystack === '') {
12068 2
            return '';
12069
        }
12070
12071 2
        if ($needle === '') {
12072 2
            return $haystack;
12073
        }
12074
12075
        if (
12076 2
            $encoding === 'UTF-8'
12077
            &&
12078 2
            \substr($haystack, -\strlen($needle)) === $needle
12079
        ) {
12080 2
            return (string) \mb_substr($haystack, 0, (int) \mb_strlen($haystack) - (int) \mb_strlen($needle));
12081
        }
12082
12083 2
        if (\substr($haystack, -\strlen($needle)) === $needle) {
12084
            return (string) self::substr(
12085
                $haystack,
12086
                0,
12087
                (int) self::strlen($haystack, $encoding) - (int) self::strlen($needle, $encoding),
12088
                $encoding
12089
            );
12090
        }
12091
12092 2
        return $haystack;
12093
    }
12094
12095
    /**
12096
     * Returns a case swapped version of the string.
12097
     *
12098
     * EXAMPLE: <code>UTF8::swapCase('déJÀ σσς iıII'); // 'DÉjà ΣΣΣ IIii'</code>
12099
     *
12100
     * @param string $str        <p>The input string.</p>
12101
     * @param string $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
12102
     * @param bool   $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
12103
     *
12104
     * @psalm-pure
12105
     *
12106
     * @return string
12107
     *                <p>Each character's case swapped.</p>
12108
     */
12109
    public static function swapCase(string $str, string $encoding = 'UTF-8', bool $clean_utf8 = false): string
12110
    {
12111 6
        if ($str === '') {
12112 1
            return '';
12113
        }
12114
12115 6
        if ($clean_utf8) {
12116
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
12117
            // if invalid characters are found in $haystack before $needle
12118 2
            $str = self::clean($str);
12119
        }
12120
12121 6
        if ($encoding === 'UTF-8') {
12122 4
            return (string) (\mb_strtolower($str) ^ \mb_strtoupper($str) ^ $str);
12123
        }
12124
12125 4
        return (string) (self::strtolower($str, $encoding) ^ self::strtoupper($str, $encoding) ^ $str);
12126
    }
12127
12128
    /**
12129
     * Checks whether symfony-polyfills are used.
12130
     *
12131
     * @psalm-pure
12132
     *
12133
     * @return bool
12134
     *              <p><strong>true</strong> if in use, <strong>false</strong> otherwise</p>
12135
     *
12136
     * @internal <p>Please do not use it anymore, we will make is private in next major version.</p>
12137
     */
12138
    public static function symfony_polyfill_used(): bool
12139
    {
12140
        // init
12141
        $return = false;
12142
12143
        $return_tmp = \extension_loaded('mbstring');
12144
        if (!$return_tmp && \function_exists('mb_strlen')) {
12145
            $return = true;
12146
        }
12147
12148
        $return_tmp = \extension_loaded('iconv');
12149
        if (!$return_tmp && \function_exists('iconv')) {
12150
            $return = true;
12151
        }
12152
12153
        return $return;
12154
    }
12155
12156
    /**
12157
     * @param string $str
12158
     * @param int    $tab_length
12159
     *
12160
     * @psalm-pure
12161
     *
12162
     * @return string
12163
     */
12164
    public static function tabs_to_spaces(string $str, int $tab_length = 4): string
12165
    {
12166 6
        if ($tab_length === 4) {
12167 3
            $spaces = '    ';
12168 3
        } elseif ($tab_length === 2) {
12169 1
            $spaces = '  ';
12170
        } else {
12171 2
            $spaces = \str_repeat(' ', $tab_length);
12172
        }
12173
12174 6
        return \str_replace("\t", $spaces, $str);
12175
    }
12176
12177
    /**
12178
     * Converts the first character of each word in the string to uppercase
12179
     * and all other chars to lowercase.
12180
     *
12181
     * @param string      $str                           <p>The input string.</p>
12182
     * @param string      $encoding                      [optional] <p>Set the charset for e.g. "mb_" function</p>
12183
     * @param bool        $clean_utf8                    [optional] <p>Remove non UTF-8 chars from the string.</p>
12184
     * @param string|null $lang                          [optional] <p>Set the language for special cases: az, el, lt,
12185
     *                                                   tr</p>
12186
     * @param bool        $try_to_keep_the_string_length [optional] <p>true === try to keep the string length: e.g. ẞ
12187
     *                                                   -> ß</p>
12188
     *
12189
     * @psalm-pure
12190
     *
12191
     * @return string
12192
     *                <p>A string with all characters of $str being title-cased.</p>
12193
     */
12194
    public static function titlecase(
12195
        string $str,
12196
        string $encoding = 'UTF-8',
12197
        bool $clean_utf8 = false,
12198
        string $lang = null,
12199
        bool $try_to_keep_the_string_length = false
12200
    ): string {
12201 5
        if ($clean_utf8) {
12202
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
12203
            // if invalid characters are found in $haystack before $needle
12204
            $str = self::clean($str);
12205
        }
12206
12207
        if (
12208 5
            $lang === null
12209
            &&
12210 5
            !$try_to_keep_the_string_length
12211
        ) {
12212 5
            if ($encoding === 'UTF-8') {
12213 3
                return \mb_convert_case($str, \MB_CASE_TITLE);
12214
            }
12215
12216 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
12217
12218 2
            return \mb_convert_case($str, \MB_CASE_TITLE, $encoding);
12219
        }
12220
12221
        return self::str_titleize(
12222
            $str,
12223
            null,
12224
            $encoding,
12225
            false,
12226
            $lang,
12227
            $try_to_keep_the_string_length,
12228
            false
12229
        );
12230
    }
12231
12232
    /**
12233
     * Convert a string into ASCII.
12234
     *
12235
     * EXAMPLE: <code>UTF8::to_ascii('déjà σσς iıii'); // 'deja sss iiii'</code>
12236
     *
12237
     * @param string $str     <p>The input string.</p>
12238
     * @param string $unknown [optional] <p>Character use if character unknown. (default is ?)</p>
12239
     * @param bool   $strict  [optional] <p>Use "transliterator_transliterate()" from PHP-Intl | WARNING: bad
12240
     *                        performance</p>
12241
     *
12242
     * @psalm-pure
12243
     *
12244
     * @return string
12245
     */
12246
    public static function to_ascii(
12247
        string $str,
12248
        string $unknown = '?',
12249
        bool $strict = false
12250
    ): string {
12251 37
        return ASCII::to_transliterate($str, $unknown, $strict);
12252
    }
12253
12254
    /**
12255
     * @param bool|float|int|string $str
12256
     *
12257
     * @psalm-pure
12258
     *
12259
     * @return bool
12260
     */
12261
    public static function to_boolean($str): bool
12262
    {
12263
        // init
12264 25
        $str = (string) $str;
12265
12266 25
        if ($str === '') {
12267 2
            return false;
12268
        }
12269
12270
        // Info: http://php.net/manual/en/filter.filters.validate.php
12271
        $map = [
12272 23
            'true'  => true,
12273
            '1'     => true,
12274
            'on'    => true,
12275
            'yes'   => true,
12276
            'false' => false,
12277
            '0'     => false,
12278
            'off'   => false,
12279
            'no'    => false,
12280
        ];
12281
12282 23
        if (isset($map[$str])) {
12283 13
            return $map[$str];
12284
        }
12285
12286 10
        $key = \strtolower($str);
12287 10
        if (isset($map[$key])) {
12288 2
            return $map[$key];
12289
        }
12290
12291 8
        if (\is_numeric($str)) {
12292 6
            return ((float) $str) > 0;
12293
        }
12294
12295 2
        return (bool) \trim($str);
12296
    }
12297
12298
    /**
12299
     * Convert given string to safe filename (and keep string case).
12300
     *
12301
     * @param string $str
12302
     * @param bool   $use_transliterate No transliteration, conversion etc. is done by default - unsafe characters are
12303
     *                                  simply replaced with hyphen.
12304
     * @param string $fallback_char
12305
     *
12306
     * @psalm-pure
12307
     *
12308
     * @return string
12309
     */
12310
    public static function to_filename(
12311
        string $str,
12312
        bool $use_transliterate = false,
12313
        string $fallback_char = '-'
12314
    ): string {
12315 1
        return ASCII::to_filename(
12316 1
            $str,
12317 1
            $use_transliterate,
12318 1
            $fallback_char
12319
        );
12320
    }
12321
12322
    /**
12323
     * Convert a string into "ISO-8859"-encoding (Latin-1).
12324
     *
12325
     * EXAMPLE: <code>UTF8::to_utf8(UTF8::to_iso8859('  -ABC-中文空白-  ')); // '  -ABC-????-  '</code>
12326
     *
12327
     * @param string|string[] $str
12328
     *
12329
     * @psalm-pure
12330
     *
12331
     * @return string|string[]
12332
     *
12333
     * @template TToIso8859
12334
     * @phpstan-param TToIso8859 $str
12335
     * @phpstan-return TToIso8859
12336
     */
12337
    public static function to_iso8859($str)
12338
    {
12339 8
        if (\is_array($str)) {
12340 2
            foreach ($str as &$v) {
12341 2
                $v = self::to_iso8859($v);
12342
            }
12343
12344 2
            return $str;
12345
        }
12346
12347 8
        $str = (string) $str;
12348 8
        if ($str === '') {
12349 2
            return '';
12350
        }
12351
12352 8
        return self::utf8_decode($str);
12353
    }
12354
12355
    /**
12356
     * This function leaves UTF-8 characters alone, while converting almost all non-UTF8 to UTF8.
12357
     *
12358
     * <ul>
12359
     * <li>It decode UTF-8 codepoints and Unicode escape sequences.</li>
12360
     * <li>It assumes that the encoding of the original string is either WINDOWS-1252 or ISO-8859.</li>
12361
     * <li>WARNING: It does not remove invalid UTF-8 characters, so you maybe need to use "UTF8::clean()" for this
12362
     * case.</li>
12363
     * </ul>
12364
     *
12365
     * EXAMPLE: <code>UTF8::to_utf8(["\u0063\u0061\u0074"]); // array('cat')</code>
12366
     *
12367
     * @param string|string[] $str                        <p>Any string or array of strings.</p>
12368
     * @param bool            $decode_html_entity_to_utf8 <p>Set to true, if you need to decode html-entities.</p>
12369
     *
12370
     * @psalm-pure
12371
     *
12372
     * @return string|string[]
12373
     *                         <p>The UTF-8 encoded string</p>
12374
     *
12375
     * @template TToUtf8
12376
     * @phpstan-param TToUtf8 $str
12377
     * @phpstan-return TToUtf8
12378
     */
12379
    public static function to_utf8($str, bool $decode_html_entity_to_utf8 = false)
12380
    {
12381 42
        if (\is_array($str)) {
12382 4
            foreach ($str as &$v) {
12383 4
                $v = self::to_utf8_string($v, $decode_html_entity_to_utf8);
12384
            }
12385
12386
            /** @phpstan-var TToUtf8 $str */
12387 4
            return $str;
12388
        }
12389
12390
        /** @phpstan-var TToUtf8 $str */
12391 42
        $str = self::to_utf8_string($str, $decode_html_entity_to_utf8);
12392
12393 42
        return $str;
12394
    }
12395
12396
    /**
12397
     * This function leaves UTF-8 characters alone, while converting almost all non-UTF8 to UTF8.
12398
     *
12399
     * <ul>
12400
     * <li>It decode UTF-8 codepoints and Unicode escape sequences.</li>
12401
     * <li>It assumes that the encoding of the original string is either WINDOWS-1252 or ISO-8859.</li>
12402
     * <li>WARNING: It does not remove invalid UTF-8 characters, so you maybe need to use "UTF8::clean()" for this
12403
     * case.</li>
12404
     * </ul>
12405
     *
12406
     * EXAMPLE: <code>UTF8::to_utf8_string("\u0063\u0061\u0074"); // 'cat'</code>
12407
     *
12408
     * @param string $str                        <p>Any string.</p>
12409
     * @param bool   $decode_html_entity_to_utf8 <p>Set to true, if you need to decode html-entities.</p>
12410
     *
12411
     * @psalm-pure
12412
     *
12413
     * @return string
12414
     *                <p>The UTF-8 encoded string</p>
12415
     */
12416
    public static function to_utf8_string(string $str, bool $decode_html_entity_to_utf8 = false): string
12417
    {
12418 42
        if ($str === '') {
12419 7
            return $str;
12420
        }
12421
12422 42
        $max = \strlen($str);
12423 42
        $buf = '';
12424
12425 42
        for ($i = 0; $i < $max; ++$i) {
12426 42
            $c1 = $str[$i];
12427
12428 42
            if ($c1 >= "\xC0") { // should be converted to UTF8, if it's not UTF8 already
12429
12430 38
                if ($c1 <= "\xDF") { // looks like 2 bytes UTF8
12431
12432 35
                    $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
12433
12434 35
                    if ($c2 >= "\x80" && $c2 <= "\xBF") { // yeah, almost sure it's UTF8 already
12435 21
                        $buf .= $c1 . $c2;
12436 21
                        ++$i;
12437
                    } else { // not valid UTF8 - convert it
12438 35
                        $buf .= self::to_utf8_convert_helper($c1);
12439
                    }
12440 35
                } elseif ($c1 >= "\xE0" && $c1 <= "\xEF") { // looks like 3 bytes UTF8
12441
12442 34
                    $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
12443 34
                    $c3 = $i + 2 >= $max ? "\x00" : $str[$i + 2];
12444
12445 34
                    if ($c2 >= "\x80" && $c2 <= "\xBF" && $c3 >= "\x80" && $c3 <= "\xBF") { // yeah, almost sure it's UTF8 already
12446 17
                        $buf .= $c1 . $c2 . $c3;
12447 17
                        $i += 2;
12448
                    } else { // not valid UTF8 - convert it
12449 34
                        $buf .= self::to_utf8_convert_helper($c1);
12450
                    }
12451 28
                } elseif ($c1 >= "\xF0" && $c1 <= "\xF7") { // looks like 4 bytes UTF8
12452
12453 28
                    $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
12454 28
                    $c3 = $i + 2 >= $max ? "\x00" : $str[$i + 2];
12455 28
                    $c4 = $i + 3 >= $max ? "\x00" : $str[$i + 3];
12456
12457 28
                    if ($c2 >= "\x80" && $c2 <= "\xBF" && $c3 >= "\x80" && $c3 <= "\xBF" && $c4 >= "\x80" && $c4 <= "\xBF") { // yeah, almost sure it's UTF8 already
12458 10
                        $buf .= $c1 . $c2 . $c3 . $c4;
12459 10
                        $i += 3;
12460
                    } else { // not valid UTF8 - convert it
12461 28
                        $buf .= self::to_utf8_convert_helper($c1);
12462
                    }
12463
                } else { // doesn't look like UTF8, but should be converted
12464
12465 38
                    $buf .= self::to_utf8_convert_helper($c1);
12466
                }
12467 40
            } elseif (($c1 & "\xC0") === "\x80") { // needs conversion
12468
12469 4
                $buf .= self::to_utf8_convert_helper($c1);
12470
            } else { // it doesn't need conversion
12471
12472 40
                $buf .= $c1;
12473
            }
12474
        }
12475
12476
        // decode unicode escape sequences + unicode surrogate pairs
12477 42
        $buf = \preg_replace_callback(
12478 42
            '/\\\\u([dD][89abAB][0-9a-fA-F]{2})\\\\u([dD][cdefCDEF][\da-fA-F]{2})|\\\\u([0-9a-fA-F]{4})/',
12479
            /**
12480
             * @param array $matches
12481
             *
12482
             * @psalm-pure
12483
             *
12484
             * @return string
12485
             */
12486
            static function (array $matches): string {
12487 13
                if (isset($matches[3])) {
12488 13
                    $cp = (int) \hexdec($matches[3]);
12489
                } else {
12490
                    // http://unicode.org/faq/utf_bom.html#utf16-4
12491 1
                    $cp = ((int) \hexdec($matches[1]) << 10)
12492 1
                          + (int) \hexdec($matches[2])
12493 1
                          + 0x10000
12494 1
                          - (0xD800 << 10)
12495 1
                          - 0xDC00;
12496
                }
12497
12498
                // https://github.com/php/php-src/blob/php-7.3.2/ext/standard/html.c#L471
12499
                //
12500
                // php_utf32_utf8(unsigned char *buf, unsigned k)
12501
12502 13
                if ($cp < 0x80) {
12503 8
                    return (string) self::chr($cp);
12504
                }
12505
12506 10
                if ($cp < 0xA0) {
12507
                    /** @noinspection UnnecessaryCastingInspection */
12508
                    return (string) self::chr(0xC0 | $cp >> 6) . (string) self::chr(0x80 | $cp & 0x3F);
12509
                }
12510
12511 10
                return self::decimal_to_chr($cp);
12512 42
            },
12513 42
            $buf
12514
        );
12515
12516 42
        if ($buf === null) {
12517
            return '';
12518
        }
12519
12520
        // decode UTF-8 codepoints
12521 42
        if ($decode_html_entity_to_utf8) {
12522 3
            $buf = self::html_entity_decode($buf);
12523
        }
12524
12525 42
        return $buf;
12526
    }
12527
12528
    /**
12529
     * Returns the given string as an integer, or null if the string isn't numeric.
12530
     *
12531
     * @param string $str
12532
     *
12533
     * @psalm-pure
12534
     *
12535
     * @return int|null
12536
     *                  <p>null if the string isn't numeric</p>
12537
     */
12538
    public static function to_int(string $str)
12539
    {
12540 1
        if (\is_numeric($str)) {
12541 1
            return (int) $str;
12542
        }
12543
12544 1
        return null;
12545
    }
12546
12547
    /**
12548
     * Returns the given input as string, or null if the input isn't int|float|string
12549
     * and do not implement the "__toString()" method.
12550
     *
12551
     * @param float|int|object|string|null $input
12552
     *
12553
     * @psalm-pure
12554
     *
12555
     * @return string|null
12556
     *                     <p>null if the input isn't int|float|string and has no "__toString()" method</p>
12557
     */
12558
    public static function to_string($input)
12559
    {
12560 1
        if ($input === null) {
12561
            return null;
12562
        }
12563
12564
        /** @var string $input_type - hack for psalm */
12565 1
        $input_type = \gettype($input);
12566
12567
        if (
12568 1
            $input_type === 'string'
12569
            ||
12570 1
            $input_type === 'integer'
12571
            ||
12572 1
            $input_type === 'float'
12573
            ||
12574 1
            $input_type === 'double'
12575
        ) {
12576 1
            return (string) $input;
12577
        }
12578
12579
        /** @phpstan-ignore-next-line - "gettype": FP? */
12580 1
        if ($input_type === 'object' && \method_exists($input, '__toString')) {
12581 1
            return (string) $input;
12582
        }
12583
12584 1
        return null;
12585
    }
12586
12587
    /**
12588
     * Strip whitespace or other characters from the beginning and end of a UTF-8 string.
12589
     *
12590
     * INFO: This is slower then "trim()"
12591
     *
12592
     * We can only use the original-function, if we use <= 7-Bit in the string / chars
12593
     * but the check for ASCII (7-Bit) cost more time, then we can safe here.
12594
     *
12595
     * EXAMPLE: <code>UTF8::trim('   -ABC-中文空白-  '); // '-ABC-中文空白-'</code>
12596
     *
12597
     * @param string      $str   <p>The string to be trimmed</p>
12598
     * @param string|null $chars [optional] <p>Optional characters to be stripped</p>
12599
     *
12600
     * @psalm-pure
12601
     *
12602
     * @return string
12603
     *                <p>The trimmed string.</p>
12604
     */
12605
    public static function trim(string $str = '', string $chars = null): string
12606
    {
12607 57
        if ($str === '') {
12608 9
            return '';
12609
        }
12610
12611 50
        if (self::$SUPPORT['mbstring'] === true) {
12612 50
            if ($chars !== null) {
12613
                /** @noinspection PregQuoteUsageInspection */
12614 28
                $chars = \preg_quote($chars);
12615 28
                $pattern = "^[${chars}]+|[${chars}]+\$";
12616
            } else {
12617 22
                $pattern = '^[\\s]+|[\\s]+$';
12618
            }
12619
12620 50
            return (string) \mb_ereg_replace($pattern, '', $str);
12621
        }
12622
12623 8
        if ($chars !== null) {
12624
            $chars = \preg_quote($chars, '/');
12625
            $pattern = "^[${chars}]+|[${chars}]+\$";
12626
        } else {
12627 8
            $pattern = '^[\\s]+|[\\s]+$';
12628
        }
12629
12630 8
        return self::regex_replace($str, $pattern, '');
12631
    }
12632
12633
    /**
12634
     * Makes string's first char uppercase.
12635
     *
12636
     * EXAMPLE: <code>UTF8::ucfirst('ñtërnâtiônàlizætiøn foo'); // 'Ñtërnâtiônàlizætiøn foo'</code>
12637
     *
12638
     * @param string      $str                           <p>The input string.</p>
12639
     * @param string      $encoding                      [optional] <p>Set the charset for e.g. "mb_" function</p>
12640
     * @param bool        $clean_utf8                    [optional] <p>Remove non UTF-8 chars from the string.</p>
12641
     * @param string|null $lang                          [optional] <p>Set the language for special cases: az, el, lt,
12642
     *                                                   tr</p>
12643
     * @param bool        $try_to_keep_the_string_length [optional] <p>true === try to keep the string length: e.g. ẞ
12644
     *                                                   -> ß</p>
12645
     *
12646
     * @psalm-pure
12647
     *
12648
     * @return string
12649
     *                <p>The resulting string with with char uppercase.</p>
12650
     */
12651
    public static function ucfirst(
12652
        string $str,
12653
        string $encoding = 'UTF-8',
12654
        bool $clean_utf8 = false,
12655
        string $lang = null,
12656
        bool $try_to_keep_the_string_length = false
12657
    ): string {
12658 69
        if ($str === '') {
12659 3
            return '';
12660
        }
12661
12662 68
        if ($clean_utf8) {
12663
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
12664
            // if invalid characters are found in $haystack before $needle
12665 1
            $str = self::clean($str);
12666
        }
12667
12668 68
        $use_mb_functions = $lang === null && !$try_to_keep_the_string_length;
12669
12670 68
        if ($encoding === 'UTF-8') {
12671 22
            $str_part_two = (string) \mb_substr($str, 1);
12672
12673 22
            if ($use_mb_functions) {
12674 22
                $str_part_one = \mb_strtoupper(
12675 22
                    (string) \mb_substr($str, 0, 1)
12676
                );
12677
            } else {
12678
                $str_part_one = self::strtoupper(
12679
                    (string) \mb_substr($str, 0, 1),
12680
                    $encoding,
12681
                    false,
12682
                    $lang,
12683 22
                    $try_to_keep_the_string_length
12684
                );
12685
            }
12686
        } else {
12687 47
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
12688
12689 47
            $str_part_two = (string) self::substr($str, 1, null, $encoding);
12690
12691 47
            if ($use_mb_functions) {
12692 47
                $str_part_one = \mb_strtoupper(
12693 47
                    (string) \mb_substr($str, 0, 1, $encoding),
12694 47
                    $encoding
12695
                );
12696
            } else {
12697
                $str_part_one = self::strtoupper(
12698
                    (string) self::substr($str, 0, 1, $encoding),
12699
                    $encoding,
12700
                    false,
12701
                    $lang,
12702
                    $try_to_keep_the_string_length
12703
                );
12704
            }
12705
        }
12706
12707 68
        return $str_part_one . $str_part_two;
12708
    }
12709
12710
    /**
12711
     * Uppercase for all words in the string.
12712
     *
12713
     * EXAMPLE: <code>UTF8::ucwords('iñt ërn âTi ônà liz æti øn'); // 'Iñt Ërn ÂTi Ônà Liz Æti Øn'</code>
12714
     *
12715
     * @param string   $str        <p>The input string.</p>
12716
     * @param string[] $exceptions [optional] <p>Exclusion for some words.</p>
12717
     * @param string   $char_list  [optional] <p>Additional chars that contains to words and do not start a new
12718
     *                             word.</p>
12719
     * @param string   $encoding   [optional] <p>Set the charset.</p>
12720
     * @param bool     $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
12721
     *
12722
     * @psalm-pure
12723
     *
12724
     * @return string
12725
     */
12726
    public static function ucwords(
12727
        string $str,
12728
        array $exceptions = [],
12729
        string $char_list = '',
12730
        string $encoding = 'UTF-8',
12731
        bool $clean_utf8 = false
12732
    ): string {
12733 9
        if (!$str) {
12734 2
            return '';
12735
        }
12736
12737
        // INFO: mb_convert_case($str, MB_CASE_TITLE);
12738
        // -> MB_CASE_TITLE didn't only uppercase the first letter, it also lowercase all other letters
12739
12740 8
        if ($clean_utf8) {
12741
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
12742
            // if invalid characters are found in $haystack before $needle
12743 1
            $str = self::clean($str);
12744
        }
12745
12746 8
        $use_php_default_functions = !(bool) ($char_list . \implode('', $exceptions));
12747
12748
        if (
12749 8
            $use_php_default_functions
12750
            &&
12751 8
            ASCII::is_ascii($str)
12752
        ) {
12753
            return \ucwords($str);
12754
        }
12755
12756 8
        $words = self::str_to_words($str, $char_list);
12757 8
        $use_exceptions = $exceptions !== [];
12758
12759 8
        $words_str = '';
12760 8
        foreach ($words as &$word) {
12761 8
            if (!$word) {
12762 8
                continue;
12763
            }
12764
12765
            if (
12766 8
                !$use_exceptions
12767
                ||
12768 8
                !\in_array($word, $exceptions, true)
12769
            ) {
12770 8
                $words_str .= self::ucfirst($word, $encoding);
12771
            } else {
12772 8
                $words_str .= $word;
12773
            }
12774
        }
12775
12776 8
        return $words_str;
12777
    }
12778
12779
    /**
12780
     * Multi decode HTML entity + fix urlencoded-win1252-chars.
12781
     *
12782
     * EXAMPLE: <code>UTF8::urldecode('tes%20öäü%20\u00edtest+test'); // 'tes öäü ítest test'</code>
12783
     *
12784
     * e.g:
12785
     * 'test+test'                     => 'test test'
12786
     * 'D&#252;sseldorf'               => 'Düsseldorf'
12787
     * 'D%FCsseldorf'                  => 'Düsseldorf'
12788
     * 'D&#xFC;sseldorf'               => 'Düsseldorf'
12789
     * 'D%26%23xFC%3Bsseldorf'         => 'Düsseldorf'
12790
     * 'Düsseldorf'                   => 'Düsseldorf'
12791
     * 'D%C3%BCsseldorf'               => 'Düsseldorf'
12792
     * 'D%C3%83%C2%BCsseldorf'         => 'Düsseldorf'
12793
     * 'D%25C3%2583%25C2%25BCsseldorf' => 'Düsseldorf'
12794
     *
12795
     * @param string $str          <p>The input string.</p>
12796
     * @param bool   $multi_decode <p>Decode as often as possible.</p>
12797
     *
12798
     * @psalm-pure
12799
     *
12800
     * @return string
12801
     */
12802
    public static function urldecode(string $str, bool $multi_decode = true): string
12803
    {
12804 4
        if ($str === '') {
12805 3
            return '';
12806
        }
12807
12808 4
        $str = self::urldecode_unicode_helper($str);
12809
12810 4
        if ($multi_decode) {
12811
            do {
12812 3
                $str_compare = $str;
12813
12814
                /**
12815
                 * @psalm-suppress PossiblyInvalidArgument
12816
                 */
12817 3
                $str = \urldecode(
12818 3
                    self::html_entity_decode(
12819 3
                        self::to_utf8($str),
12820 3
                        \ENT_QUOTES | \ENT_HTML5
12821
                    )
12822
                );
12823 3
            } while ($str_compare !== $str);
12824
        } else {
12825
            /**
12826
             * @psalm-suppress PossiblyInvalidArgument
12827
             */
12828 1
            $str = \urldecode(
12829 1
                self::html_entity_decode(
12830 1
                    self::to_utf8($str),
12831 1
                    \ENT_QUOTES | \ENT_HTML5
12832
                )
12833
            );
12834
        }
12835
12836 4
        return self::fix_simple_utf8($str);
12837
    }
12838
12839
    /**
12840
     * Decodes a UTF-8 string to ISO-8859-1.
12841
     *
12842
     * EXAMPLE: <code>UTF8::encode('UTF-8', UTF8::utf8_decode('-ABC-中文空白-')); // '-ABC-????-'</code>
12843
     *
12844
     * @param string $str             <p>The input string.</p>
12845
     * @param bool   $keep_utf8_chars
12846
     *
12847
     * @psalm-pure
12848
     *
12849
     * @return string
12850
     */
12851
    public static function utf8_decode(string $str, bool $keep_utf8_chars = false): string
12852
    {
12853 14
        if ($str === '') {
12854 6
            return '';
12855
        }
12856
12857
        // save for later comparision
12858 14
        $str_backup = $str;
12859 14
        $len = \strlen($str);
12860
12861 14
        if (self::$ORD === null) {
12862
            self::$ORD = self::getData('ord');
12863
        }
12864
12865 14
        if (self::$CHR === null) {
12866
            self::$CHR = self::getData('chr');
12867
        }
12868
12869 14
        $no_char_found = '?';
12870 14
        for ($i = 0, $j = 0; $i < $len; ++$i, ++$j) {
12871 14
            switch ($str[$i] & "\xF0") {
12872 14
                case "\xC0":
12873 13
                case "\xD0":
12874 13
                    $c = (self::$ORD[$str[$i] & "\x1F"] << 6) | self::$ORD[$str[++$i] & "\x3F"];
12875 13
                    $str[$j] = $c < 256 ? self::$CHR[$c] : $no_char_found;
12876
12877 13
                    break;
12878
12879 13
                case "\xF0":
12880
                    ++$i;
12881
12882
                // no break
12883
12884 13
                case "\xE0":
12885 11
                    $str[$j] = $no_char_found;
12886 11
                    $i += 2;
12887
12888 11
                    break;
12889
12890
                default:
12891 12
                    $str[$j] = $str[$i];
12892
            }
12893
        }
12894
12895
        /** @var false|string $return - needed for PhpStan (stubs error) */
12896 14
        $return = \substr($str, 0, $j);
12897 14
        if ($return === false) {
12898
            $return = '';
12899
        }
12900
12901
        if (
12902 14
            $keep_utf8_chars
12903
            &&
12904 14
            (int) self::strlen($return) >= (int) self::strlen($str_backup)
12905
        ) {
12906 2
            return $str_backup;
12907
        }
12908
12909 14
        return $return;
12910
    }
12911
12912
    /**
12913
     * Encodes an ISO-8859-1 string to UTF-8.
12914
     *
12915
     * EXAMPLE: <code>UTF8::utf8_decode(UTF8::utf8_encode('-ABC-中文空白-')); // '-ABC-中文空白-'</code>
12916
     *
12917
     * @param string $str <p>The input string.</p>
12918
     *
12919
     * @psalm-pure
12920
     *
12921
     * @return string
12922
     */
12923
    public static function utf8_encode(string $str): string
12924
    {
12925 16
        if ($str === '') {
12926 14
            return '';
12927
        }
12928
12929
        /** @var false|string $str - the polyfill maybe return false */
12930 16
        $str = \utf8_encode($str);
0 ignored issues
show
Bug introduced by
It seems like $str can also be of type false; however, parameter $string of utf8_encode() does only seem to accept string, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

12930
        $str = \utf8_encode(/** @scrutinizer ignore-type */ $str);
Loading history...
12931
12932 16
        if ($str === false) {
12933
            return '';
12934
        }
12935
12936 16
        return $str;
12937
    }
12938
12939
    /**
12940
     * Returns an array with all utf8 whitespace characters.
12941
     *
12942
     * @see http://www.bogofilter.org/pipermail/bogofilter/2003-March/001889.html
12943
     *
12944
     * @psalm-pure
12945
     *
12946
     * @return string[]
12947
     *                  An array with all known whitespace characters as values and the type of whitespace as keys
12948
     *                  as defined in above URL
12949
     */
12950
    public static function whitespace_table(): array
12951
    {
12952 2
        return self::$WHITESPACE_TABLE;
12953
    }
12954
12955
    /**
12956
     * Limit the number of words in a string.
12957
     *
12958
     * EXAMPLE: <code>UTF8::words_limit('fòô bàř fòô', 2, ''); // 'fòô bàř'</code>
12959
     *
12960
     * @param string $str        <p>The input string.</p>
12961
     * @param int    $limit      <p>The limit of words as integer.</p>
12962
     * @param string $str_add_on <p>Replacement for the striped string.</p>
12963
     *
12964
     * @psalm-pure
12965
     *
12966
     * @return string
12967
     */
12968
    public static function words_limit(
12969
        string $str,
12970
        int $limit = 100,
12971
        string $str_add_on = '…'
12972
    ): string {
12973 2
        if ($str === '' || $limit < 1) {
12974 2
            return '';
12975
        }
12976
12977 2
        \preg_match('/^\\s*+(?:[^\\s]++\\s*+){1,' . $limit . '}/u', $str, $matches);
12978
12979
        if (
12980 2
            !isset($matches[0])
12981
            ||
12982 2
            \mb_strlen($str) === (int) \mb_strlen($matches[0])
12983
        ) {
12984 2
            return $str;
12985
        }
12986
12987 2
        return \rtrim($matches[0]) . $str_add_on;
12988
    }
12989
12990
    /**
12991
     * Wraps a string to a given number of characters
12992
     *
12993
     * EXAMPLE: <code>UTF8::wordwrap('Iñtërnâtiônàlizætiøn', 2, '<br>', true)); // 'Iñ<br>të<br>rn<br>ât<br>iô<br>nà<br>li<br>zæ<br>ti<br>øn'</code>
12994
     *
12995
     * @see http://php.net/manual/en/function.wordwrap.php
12996
     *
12997
     * @param string $str   <p>The input string.</p>
12998
     * @param int    $width [optional] <p>The column width.</p>
12999
     * @param string $break [optional] <p>The line is broken using the optional break parameter.</p>
13000
     * @param bool   $cut   [optional] <p>
13001
     *                      If the cut is set to true, the string is
13002
     *                      always wrapped at or before the specified width. So if you have
13003
     *                      a word that is larger than the given width, it is broken apart.
13004
     *                      </p>
13005
     *
13006
     * @psalm-pure
13007
     *
13008
     * @return string
13009
     *                <p>The given string wrapped at the specified column.</p>
13010
     */
13011
    public static function wordwrap(
13012
        string $str,
13013
        int $width = 75,
13014
        string $break = "\n",
13015
        bool $cut = false
13016
    ): string {
13017 12
        if ($str === '' || $break === '') {
13018 4
            return '';
13019
        }
13020
13021 10
        $str_split = \explode($break, $str);
13022
13023
        /** @var string[] $charsArray */
13024 10
        $charsArray = [];
13025 10
        $word_split = '';
13026 10
        foreach ($str_split as $i => $i_value) {
13027 10
            if ($i) {
13028 3
                $charsArray[] = $break;
13029 3
                $word_split .= '#';
13030
            }
13031
13032 10
            foreach (self::str_split($i_value) as $c) {
13033 10
                $charsArray[] = $c;
13034 10
                if ($c === ' ') {
13035 3
                    $word_split .= ' ';
13036
                } else {
13037 10
                    $word_split .= '?';
13038
                }
13039
            }
13040
        }
13041
13042 10
        $str_return = '';
13043 10
        $j = 0;
13044 10
        $b = -1;
13045 10
        $i = -1;
13046 10
        $word_split = \wordwrap($word_split, $width, '#', $cut);
13047
13048 10
        $max = \mb_strlen($word_split);
13049
        /** @noinspection PhpAssignmentInConditionInspection - is ok here */
13050 10
        while (($b = \mb_strpos($word_split, '#', $b + 1)) !== false) {
13051 8
            for (++$i; $i < $b; ++$i) {
13052 8
                if (isset($charsArray[$j])) {
13053 8
                    $str_return .= $charsArray[$j];
13054 8
                    unset($charsArray[$j]);
13055
                }
13056 8
                ++$j;
13057
13058
                // prevent endless loop, e.g. if there is a error in the "mb_*" polyfill
13059 8
                if ($i > $max) {
13060
                    break 2;
13061
                }
13062
            }
13063
13064
            if (
13065 8
                $break === $charsArray[$j]
13066
                ||
13067 8
                $charsArray[$j] === ' '
13068
            ) {
13069 5
                unset($charsArray[$j++]);
13070
            }
13071
13072 8
            $str_return .= $break;
13073
13074
            // prevent endless loop, e.g. if there is a error in the "mb_*" polyfill
13075 8
            if ($b > $max) {
13076
                break;
13077
            }
13078
        }
13079
13080 10
        return $str_return . \implode('', $charsArray);
13081
    }
13082
13083
    /**
13084
     * Line-Wrap the string after $limit, but split the string by "$delimiter" before ...
13085
     *    ... so that we wrap the per line.
13086
     *
13087
     * @param string      $str             <p>The input string.</p>
13088
     * @param int         $width           [optional] <p>The column width.</p>
13089
     * @param string      $break           [optional] <p>The line is broken using the optional break parameter.</p>
13090
     * @param bool        $cut             [optional] <p>
13091
     *                                     If the cut is set to true, the string is
13092
     *                                     always wrapped at or before the specified width. So if you have
13093
     *                                     a word that is larger than the given width, it is broken apart.
13094
     *                                     </p>
13095
     * @param bool        $add_final_break [optional] <p>
13096
     *                                     If this flag is true, then the method will add a $break at the end
13097
     *                                     of the result string.
13098
     *                                     </p>
13099
     * @param string|null $delimiter       [optional] <p>
13100
     *                                     You can change the default behavior, where we split the string by newline.
13101
     *                                     </p>
13102
     *
13103
     * @psalm-pure
13104
     *
13105
     * @return string
13106
     */
13107
    public static function wordwrap_per_line(
13108
        string $str,
13109
        int $width = 75,
13110
        string $break = "\n",
13111
        bool $cut = false,
13112
        bool $add_final_break = true,
13113
        string $delimiter = null
13114
    ): string {
13115 1
        if ($delimiter === null) {
13116 1
            $strings = \preg_split('/\\r\\n|\\r|\\n/', $str);
13117
        } else {
13118 1
            $strings = \explode($delimiter, $str);
13119
        }
13120
13121 1
        $string_helper_array = [];
13122 1
        if ($strings !== false) {
13123 1
            foreach ($strings as $value) {
13124 1
                $string_helper_array[] = self::wordwrap($value, $width, $break, $cut);
13125
            }
13126
        }
13127
13128 1
        if ($add_final_break) {
13129 1
            $final_break = $break;
13130
        } else {
13131 1
            $final_break = '';
13132
        }
13133
13134 1
        return \implode($delimiter ?? "\n", $string_helper_array) . $final_break;
13135
    }
13136
13137
    /**
13138
     * Returns an array of Unicode White Space characters.
13139
     *
13140
     * @psalm-pure
13141
     *
13142
     * @return string[]
13143
     *                  <p>An array with numeric code point as key and White Space Character as value.</p>
13144
     */
13145
    public static function ws(): array
13146
    {
13147 2
        return self::$WHITESPACE;
13148
    }
13149
13150
    /**
13151
     * Checks whether the passed string contains only byte sequences that are valid UTF-8 characters.
13152
     *
13153
     * EXAMPLE: <code>
13154
     * UTF8::is_utf8_string('Iñtërnâtiônàlizætiøn']); // true
13155
     * //
13156
     * UTF8::is_utf8_string("Iñtërnâtiônàlizætiøn\xA0\xA1"); // false
13157
     * </code>
13158
     *
13159
     * @see          http://hsivonen.iki.fi/php-utf8/
13160
     *
13161
     * @param string $str    <p>The string to be checked.</p>
13162
     * @param bool   $strict <p>Check also if the string is not UTF-16 or UTF-32.</p>
13163
     *
13164
     * @psalm-pure
13165
     *
13166
     * @return bool
13167
     *
13168
     * @noinspection ReturnTypeCanBeDeclaredInspection
13169
     */
13170
    private static function is_utf8_string(string $str, bool $strict = false)
13171
    {
13172 110
        if ($str === '') {
13173 15
            return true;
13174
        }
13175
13176 103
        if ($strict) {
13177 2
            $is_binary = self::is_binary($str, true);
13178
13179 2
            if ($is_binary && self::is_utf16($str, false) !== false) {
13180 2
                return false;
13181
            }
13182
13183 2
            if ($is_binary && self::is_utf32($str, false) !== false) {
13184
                return false;
13185
            }
13186
        }
13187
13188 103
        if (self::$SUPPORT['pcre_utf8']) {
13189
            // If even just the first character can be matched, when the /u
13190
            // modifier is used, then it's valid UTF-8. If the UTF-8 is somehow
13191
            // invalid, nothing at all will match, even if the string contains
13192
            // some valid sequences
13193 103
            return \preg_match('/^./us', $str) === 1;
13194
        }
13195
13196 2
        $mState = 0; // cached expected number of octets after the current octet
13197
        // until the beginning of the next UTF8 character sequence
13198 2
        $mUcs4 = 0; // cached Unicode character
13199 2
        $mBytes = 1; // cached expected number of octets in the current sequence
13200
13201 2
        if (self::$ORD === null) {
13202
            self::$ORD = self::getData('ord');
13203
        }
13204
13205 2
        $len = \strlen($str);
13206 2
        for ($i = 0; $i < $len; ++$i) {
13207 2
            $in = self::$ORD[$str[$i]];
13208
13209 2
            if ($mState === 0) {
13210
                // When mState is zero we expect either a US-ASCII character or a
13211
                // multi-octet sequence.
13212 2
                if ((0x80 & $in) === 0) {
13213
                    // US-ASCII, pass straight through.
13214 2
                    $mBytes = 1;
13215 2
                } elseif ((0xE0 & $in) === 0xC0) {
13216
                    // First octet of 2 octet sequence.
13217 2
                    $mUcs4 = $in;
13218 2
                    $mUcs4 = ($mUcs4 & 0x1F) << 6;
13219 2
                    $mState = 1;
13220 2
                    $mBytes = 2;
13221 2
                } elseif ((0xF0 & $in) === 0xE0) {
13222
                    // First octet of 3 octet sequence.
13223 2
                    $mUcs4 = $in;
13224 2
                    $mUcs4 = ($mUcs4 & 0x0F) << 12;
13225 2
                    $mState = 2;
13226 2
                    $mBytes = 3;
13227
                } elseif ((0xF8 & $in) === 0xF0) {
13228
                    // First octet of 4 octet sequence.
13229
                    $mUcs4 = $in;
13230
                    $mUcs4 = ($mUcs4 & 0x07) << 18;
13231
                    $mState = 3;
13232
                    $mBytes = 4;
13233
                } elseif ((0xFC & $in) === 0xF8) {
13234
                    /* First octet of 5 octet sequence.
13235
                     *
13236
                     * This is illegal because the encoded codepoint must be either
13237
                     * (a) not the shortest form or
13238
                     * (b) outside the Unicode range of 0-0x10FFFF.
13239
                     * Rather than trying to resynchronize, we will carry on until the end
13240
                     * of the sequence and let the later error handling code catch it.
13241
                     */
13242
                    $mUcs4 = $in;
13243
                    $mUcs4 = ($mUcs4 & 0x03) << 24;
13244
                    $mState = 4;
13245
                    $mBytes = 5;
13246
                } elseif ((0xFE & $in) === 0xFC) {
13247
                    // First octet of 6 octet sequence, see comments for 5 octet sequence.
13248
                    $mUcs4 = $in;
13249
                    $mUcs4 = ($mUcs4 & 1) << 30;
13250
                    $mState = 5;
13251
                    $mBytes = 6;
13252
                } else {
13253
                    // Current octet is neither in the US-ASCII range nor a legal first
13254
                    // octet of a multi-octet sequence.
13255 2
                    return false;
13256
                }
13257 2
            } elseif ((0xC0 & $in) === 0x80) {
13258
13259
                // When mState is non-zero, we expect a continuation of the multi-octet
13260
                // sequence
13261
13262
                // Legal continuation.
13263 2
                $shift = ($mState - 1) * 6;
13264 2
                $tmp = $in;
13265 2
                $tmp = ($tmp & 0x0000003F) << $shift;
13266 2
                $mUcs4 |= $tmp;
13267
                // Prefix: End of the multi-octet sequence. mUcs4 now contains the final
13268
                // Unicode code point to be output.
13269 2
                if (--$mState === 0) {
13270
                    // Check for illegal sequences and code points.
13271
                    //
13272
                    // From Unicode 3.1, non-shortest form is illegal
13273
                    if (
13274 2
                        ($mBytes === 2 && $mUcs4 < 0x0080)
13275
                        ||
13276 2
                        ($mBytes === 3 && $mUcs4 < 0x0800)
13277
                        ||
13278 2
                        ($mBytes === 4 && $mUcs4 < 0x10000)
13279
                        ||
13280 2
                        ($mBytes > 4)
13281
                        ||
13282
                        // From Unicode 3.2, surrogate characters are illegal.
13283 2
                        (($mUcs4 & 0xFFFFF800) === 0xD800)
13284
                        ||
13285
                        // Code points outside the Unicode range are illegal.
13286 2
                        ($mUcs4 > 0x10FFFF)
13287
                    ) {
13288
                        return false;
13289
                    }
13290
                    // initialize UTF8 cache
13291 2
                    $mState = 0;
13292 2
                    $mUcs4 = 0;
13293 2
                    $mBytes = 1;
13294
                }
13295
            } else {
13296
                // ((0xC0 & (*in) != 0x80) && (mState != 0))
13297
                // Incomplete multi-octet sequence.
13298
                return false;
13299
            }
13300
        }
13301
13302 2
        return $mState === 0;
13303
    }
13304
13305
    /**
13306
     * @param string $str
13307
     * @param bool   $use_lowercase      <p>Use uppercase by default, otherwise use lowercase.</p>
13308
     * @param bool   $use_full_case_fold <p>Convert not only common cases.</p>
13309
     *
13310
     * @psalm-pure
13311
     *
13312
     * @return string
13313
     *
13314
     * @noinspection ReturnTypeCanBeDeclaredInspection
13315
     */
13316
    private static function fixStrCaseHelper(
13317
        string $str,
13318
        bool $use_lowercase = false,
13319
        bool $use_full_case_fold = false
13320
    ) {
13321 33
        $upper = self::$COMMON_CASE_FOLD['upper'];
13322 33
        $lower = self::$COMMON_CASE_FOLD['lower'];
13323
13324 33
        if ($use_lowercase) {
13325 2
            $str = \str_replace(
13326 2
                $upper,
13327 2
                $lower,
13328 2
                $str
13329
            );
13330
        } else {
13331 31
            $str = \str_replace(
13332 31
                $lower,
13333 31
                $upper,
13334 31
                $str
13335
            );
13336
        }
13337
13338 33
        if ($use_full_case_fold) {
13339
            /**
13340
             * @psalm-suppress ImpureStaticVariable
13341
             *
13342
             * @var array<mixed>|null
13343
             */
13344 31
            static $FULL_CASE_FOLD = null;
13345 31
            if ($FULL_CASE_FOLD === null) {
13346 1
                $FULL_CASE_FOLD = self::getData('caseFolding_full');
13347
            }
13348
13349 31
            if ($use_lowercase) {
13350 2
                $str = \str_replace($FULL_CASE_FOLD[0], $FULL_CASE_FOLD[1], $str);
13351
            } else {
13352 29
                $str = \str_replace($FULL_CASE_FOLD[1], $FULL_CASE_FOLD[0], $str);
13353
            }
13354
        }
13355
13356 33
        return $str;
13357
    }
13358
13359
    /**
13360
     * get data from "/data/*.php"
13361
     *
13362
     * @param string $file
13363
     *
13364
     * @psalm-pure
13365
     *
13366
     * @return array
13367
     *
13368
     * @noinspection ReturnTypeCanBeDeclaredInspection
13369
     */
13370
    private static function getData(string $file)
13371
    {
13372
        /** @noinspection PhpIncludeInspection */
13373
        /** @noinspection UsingInclusionReturnValueInspection */
13374
        /** @psalm-suppress UnresolvableInclude */
13375 7
        return include __DIR__ . '/data/' . $file . '.php';
13376
    }
13377
13378
    /**
13379
     * @psalm-pure
13380
     *
13381
     * @return true|null
13382
     *
13383
     * @noinspection ReturnTypeCanBeDeclaredInspection
13384
     */
13385
    private static function initEmojiData()
13386
    {
13387 1
        if (self::$EMOJI_KEYS_CACHE === null) {
13388 1
            if (self::$EMOJI === null) {
13389 1
                self::$EMOJI = self::getData('emoji');
13390
            }
13391
13392
            /**
13393
             * @psalm-suppress ImpureFunctionCall - static sort function is used
13394
             */
13395 1
            \uksort(
13396 1
                self::$EMOJI,
13397
                static function (string $a, string $b): int {
13398 1
                    return \strlen($b) <=> \strlen($a);
13399 1
                }
13400
            );
13401
13402 1
            self::$EMOJI_KEYS_CACHE = \array_keys(self::$EMOJI);
13403 1
            self::$EMOJI_VALUES_CACHE = self::$EMOJI;
13404
13405 1
            foreach (self::$EMOJI_KEYS_CACHE as $key) {
13406 1
                $tmp_key = \crc32($key);
13407 1
                self::$EMOJI_KEYS_REVERSIBLE_CACHE[] = '_-_PORTABLE_UTF8_-_' . $tmp_key . '_-_' . \strrev((string) $tmp_key) . '_-_8FTU_ELBATROP_-_';
13408
            }
13409
13410 1
            return true;
13411
        }
13412
13413
        return null;
13414
    }
13415
13416
    /**
13417
     * Checks whether mbstring "overloaded" is active on the server.
13418
     *
13419
     * @psalm-pure
13420
     *
13421
     * @return bool
13422
     */
13423
    private static function mbstring_overloaded(): bool
13424
    {
13425
        /**
13426
         * INI directive 'mbstring.func_overload' is deprecated since PHP 7.2
13427
         */
13428
13429
        /** @noinspection PhpComposerExtensionStubsInspection */
13430
        /** @noinspection PhpUsageOfSilenceOperatorInspection */
13431
        /** @noinspection DeprecatedIniOptionsInspection */
13432
        return \defined('MB_OVERLOAD_STRING')
13433
               &&
13434
               ((int) @\ini_get('mbstring.func_overload') & \MB_OVERLOAD_STRING);
13435
    }
13436
13437
    /**
13438
     * @param array    $strings
13439
     * @param bool     $remove_empty_values
13440
     * @param int|null $remove_short_values
13441
     *
13442
     * @psalm-pure
13443
     *
13444
     * @return array
13445
     *
13446
     * @noinspection ReturnTypeCanBeDeclaredInspection
13447
     */
13448
    private static function reduce_string_array(
13449
        array $strings,
13450
        bool $remove_empty_values,
13451
        int $remove_short_values = null
13452
    ) {
13453
        // init
13454 2
        $return = [];
13455
13456 2
        foreach ($strings as &$str) {
13457
            if (
13458 2
                $remove_short_values !== null
13459
                &&
13460 2
                \mb_strlen($str) <= $remove_short_values
13461
            ) {
13462 2
                continue;
13463
            }
13464
13465
            if (
13466 2
                $remove_empty_values
13467
                &&
13468 2
                \trim($str) === ''
13469
            ) {
13470 2
                continue;
13471
            }
13472
13473 2
            $return[] = $str;
13474
        }
13475
13476 2
        return $return;
13477
    }
13478
13479
    /**
13480
     * rxClass
13481
     *
13482
     * @param string $s
13483
     * @param string $class
13484
     *
13485
     * @return string
13486
     *                    *
13487
     * @psalm-pure
13488
     */
13489
    private static function rxClass(string $s, string $class = '')
13490
    {
13491
        /**
13492
         * @psalm-suppress ImpureStaticVariable
13493
         *
13494
         * @var array<string,string>
13495
         */
13496 36
        static $RX_CLASS_CACHE = [];
13497
13498 36
        $cache_key = $s . '_' . $class;
13499
13500 36
        if (isset($RX_CLASS_CACHE[$cache_key])) {
13501 24
            return $RX_CLASS_CACHE[$cache_key];
13502
        }
13503
13504 16
        $class_array[] = $class;
0 ignored issues
show
Comprehensibility Best Practice introduced by
$class_array was never initialized. Although not strictly required by PHP, it is generally a good practice to add $class_array = array(); before regardless.
Loading history...
13505
13506
        /** @noinspection SuspiciousLoopInspection */
13507
        /** @noinspection AlterInForeachInspection */
13508 16
        foreach (self::str_split($s) as &$s) {
0 ignored issues
show
Bug introduced by
The expression self::str_split($s) cannot be used as a reference.

Let?s assume that you have the following foreach statement:

foreach ($array as &$itemValue) { }

$itemValue is assigned by reference. This is possible because the expression (in the example $array) can be used as a reference target.

However, if we were to replace $array with something different like the result of a function call as in

foreach (getArray() as &$itemValue) { }

then assigning by reference is not possible anymore as there is no target that could be modified.

Available Fixes

1. Do not assign by reference
foreach (getArray() as $itemValue) { }
2. Assign to a local variable first
$array = getArray();
foreach ($array as &$itemValue) {}
3. Return a reference
function &getArray() { $array = array(); return $array; }

foreach (getArray() as &$itemValue) { }
Loading history...
13509 15
            if ($s === '-') {
13510
                $class_array[0] = '-' . $class_array[0];
13511 15
            } elseif (!isset($s[2])) {
13512 15
                $class_array[0] .= \preg_quote($s, '/');
13513 1
            } elseif (self::strlen($s) === 1) {
13514 1
                $class_array[0] .= $s;
13515
            } else {
13516 15
                $class_array[] = $s;
13517
            }
13518
        }
13519
13520 16
        if ($class_array[0]) {
13521 16
            $class_array[0] = '[' . $class_array[0] . ']';
13522
        }
13523
13524 16
        if (\count($class_array) === 1) {
13525 16
            $return = $class_array[0];
13526
        } else {
13527
            $return = '(?:' . \implode('|', $class_array) . ')';
13528
        }
13529
13530 16
        $RX_CLASS_CACHE[$cache_key] = $return;
13531
13532 16
        return $return;
13533
    }
13534
13535
    /**
13536
     * Personal names such as "Marcus Aurelius" are sometimes typed incorrectly using lowercase ("marcus aurelius").
13537
     *
13538
     * @param string $names
13539
     * @param string $delimiter
13540
     * @param string $encoding
13541
     *
13542
     * @psalm-pure
13543
     *
13544
     * @return string
13545
     *
13546
     * @noinspection ReturnTypeCanBeDeclaredInspection
13547
     */
13548
    private static function str_capitalize_name_helper(
13549
        string $names,
13550
        string $delimiter,
13551
        string $encoding = 'UTF-8'
13552
    ) {
13553
        // init
13554 1
        $name_helper_array = \explode($delimiter, $names);
13555 1
        if ($name_helper_array === false) {
13556
            return '';
13557
        }
13558
13559
        $special_cases = [
13560 1
            'names' => [
13561
                'ab',
13562
                'af',
13563
                'al',
13564
                'and',
13565
                'ap',
13566
                'bint',
13567
                'binte',
13568
                'da',
13569
                'de',
13570
                'del',
13571
                'den',
13572
                'der',
13573
                'di',
13574
                'dit',
13575
                'ibn',
13576
                'la',
13577
                'mac',
13578
                'nic',
13579
                'of',
13580
                'ter',
13581
                'the',
13582
                'und',
13583
                'van',
13584
                'von',
13585
                'y',
13586
                'zu',
13587
            ],
13588
            'prefixes' => [
13589
                'al-',
13590
                "d'",
13591
                'ff',
13592
                "l'",
13593
                'mac',
13594
                'mc',
13595
                'nic',
13596
            ],
13597
        ];
13598
13599 1
        foreach ($name_helper_array as &$name) {
13600 1
            if (\in_array($name, $special_cases['names'], true)) {
13601 1
                continue;
13602
            }
13603
13604 1
            $continue = false;
13605
13606 1
            if ($delimiter === '-') {
13607 1
                foreach ((array) $special_cases['names'] as &$beginning) {
13608 1
                    if (\strncmp($name, $beginning, \strlen($beginning)) === 0) {
13609 1
                        $continue = true;
13610
13611 1
                        break;
13612
                    }
13613
                }
13614 1
                unset($beginning);
13615
            }
13616
13617 1
            foreach ((array) $special_cases['prefixes'] as &$beginning) {
13618 1
                if (\strncmp($name, $beginning, \strlen($beginning)) === 0) {
13619 1
                    $continue = true;
13620
13621 1
                    break;
13622
                }
13623
            }
13624 1
            unset($beginning);
13625
13626 1
            if ($continue) {
13627 1
                continue;
13628
            }
13629
13630 1
            $name = self::ucfirst($name, $encoding);
13631
        }
13632
13633 1
        return \implode($delimiter, $name_helper_array);
13634
    }
13635
13636
    /**
13637
     * Generic case-sensitive transformation for collation matching.
13638
     *
13639
     * @param string $str <p>The input string</p>
13640
     *
13641
     * @psalm-pure
13642
     *
13643
     * @return string|null
13644
     *
13645
     * @noinspection ReturnTypeCanBeDeclaredInspection
13646
     */
13647
    private static function strtonatfold(string $str)
13648
    {
13649 6
        $str = \Normalizer::normalize($str, \Normalizer::NFD);
13650 6
        if ($str === false) {
13651 2
            return '';
13652
        }
13653
13654 6
        return \preg_replace(
13655 6
            '/\p{Mn}+/u',
13656 6
            '',
13657 6
            $str
13658
        );
13659
    }
13660
13661
    /**
13662
     * @param int|string $input
13663
     *
13664
     * @psalm-pure
13665
     *
13666
     * @return string
13667
     *
13668
     * @noinspection ReturnTypeCanBeDeclaredInspection
13669
     */
13670
    private static function to_utf8_convert_helper($input)
13671
    {
13672
        // init
13673 30
        $buf = '';
13674
13675 30
        if (self::$ORD === null) {
13676
            self::$ORD = self::getData('ord');
13677
        }
13678
13679 30
        if (self::$CHR === null) {
13680
            self::$CHR = self::getData('chr');
13681
        }
13682
13683 30
        if (self::$WIN1252_TO_UTF8 === null) {
13684 1
            self::$WIN1252_TO_UTF8 = self::getData('win1252_to_utf8');
13685
        }
13686
13687 30
        $ordC1 = self::$ORD[$input];
13688 30
        if (isset(self::$WIN1252_TO_UTF8[$ordC1])) { // found in Windows-1252 special cases
13689 30
            $buf .= self::$WIN1252_TO_UTF8[$ordC1];
13690
        } else {
13691
            /** @noinspection OffsetOperationsInspection */
13692 1
            $cc1 = self::$CHR[$ordC1 / 64] | "\xC0";
13693 1
            $cc2 = ((string) $input & "\x3F") | "\x80";
0 ignored issues
show
Bug introduced by
Are you sure you want to use the bitwise & or did you mean &&?
Loading history...
Bug introduced by
Are you sure you want to use the bitwise | or did you mean ||?
Loading history...
13694 1
            $buf .= $cc1 . $cc2;
13695
        }
13696
13697 30
        return $buf;
13698
    }
13699
13700
    /**
13701
     * @param string $str
13702
     *
13703
     * @psalm-pure
13704
     *
13705
     * @return string
13706
     *
13707
     * @noinspection ReturnTypeCanBeDeclaredInspection
13708
     */
13709
    private static function urldecode_unicode_helper(string $str)
13710
    {
13711 9
        if (\strpos($str, '%u') === false) {
13712 9
            return $str;
13713
        }
13714
13715 7
        $pattern = '/%u([0-9a-fA-F]{3,4})/';
13716 7
        if (\preg_match($pattern, $str)) {
13717 7
            $str = (string) \preg_replace($pattern, '&#x\\1;', $str);
13718
        }
13719
13720 7
        return $str;
13721
    }
13722
}
13723