Passed
Push — master ( 86a6b8...2617d0 )
by Lars
03:47 queued 11s
created

UTF8   F

Complexity

Total Complexity 1743

Size/Duplication

Total Lines 13654
Duplicated Lines 0 %

Test Coverage

Coverage 81.08%

Importance

Changes 108
Bugs 51 Features 6
Metric Value
eloc 4221
c 108
b 51
f 6
dl 0
loc 13654
ccs 3105
cts 3829
cp 0.8108
rs 0.8
wmc 1743

272 Methods

Rating   Name   Duplication   Size   Complexity  
A getData() 0 6 1
A fixStrCaseHelper() 0 41 5
A str_substr_after_first_separator() 0 28 6
A str_starts_with_any() 0 17 5
A filter_input() 0 16 3
A encode_mimeheader() 0 26 5
A filter_var_array() 0 15 2
F extract_text() 0 175 34
A filter_var() 0 15 2
A filter_input_array() 0 15 3
A first_char() 0 14 4
A finfo_loaded() 0 3 1
A fits_inside() 0 3 1
B chr_to_decimal() 0 38 8
A file_has_bom() 0 8 2
A max() 0 14 3
B str_camelize() 0 74 10
A add_bom_to_string() 0 7 2
A parse_str() 0 18 4
A str_contains() 0 15 3
B str_to_lines() 0 28 8
A substr_in_byte() 0 18 6
A stripos_in_byte() 0 12 4
A array_change_key_case() 0 23 5
A is_bom() 0 10 3
A is_hexadecimal() 0 7 2
A get_unique_string() 0 21 3
A strnatcasecmp() 0 5 1
A substr_left() 0 15 4
A count_chars() 0 11 1
D strlen() 0 104 19
A str_isubstr_last() 0 25 4
A to_int() 0 7 2
A ctype_loaded() 0 3 1
A str_replace_beginning() 0 25 6
A has_uppercase() 0 7 2
A remove_left() 0 28 4
C stripos() 0 67 14
A str_offset_exists() 0 10 2
D strrchr() 0 104 20
A to_filename() 0 9 1
A str_iends_with() 0 11 3
A max_chr_width() 0 8 2
C utf8_decode() 0 59 13
A ltrim() 0 26 5
A emoji_decode() 0 21 3
A is_utf8() 0 13 4
A remove_html() 0 3 1
B str_longest_common_suffix() 0 54 10
C wordwrap() 0 73 15
B ucfirst() 0 57 7
A str_pad_both() 0 12 1
A str_substr_last() 0 33 6
A mbstring_loaded() 0 3 1
A str_limit() 0 26 6
D chr() 0 107 19
A html_escape() 0 6 1
A string() 0 16 4
B str_obfuscate() 0 47 8
D normalize_encoding() 0 147 16
B rxClass() 0 44 8
B get_file_type() 0 60 7
A str_ensure_right() 0 13 4
B str_titleize_for_humans() 0 170 7
C is_utf16() 0 71 16
C filter() 0 59 14
A normalize_whitespace() 0 11 1
A str_starts_with() 0 16 4
A str_humanize() 0 15 1
A is_html() 0 14 2
A decode_mimeheader() 0 8 3
C substr_count_in_byte() 0 55 15
A strlen_in_byte() 0 12 3
A str_ireplace_ending() 0 21 6
A rtrim() 0 26 5
C str_longest_common_substring() 0 76 16
A regex_replace() 0 20 3
A chunk_split() 0 3 1
A titlecase() 0 35 5
B strtolower() 0 58 10
A urldecode() 0 35 4
A str_isubstr_before_first_separator() 0 19 5
B strrev() 0 43 10
A replace_all() 0 11 2
D substr_replace() 0 123 27
A strstr_in_byte() 0 15 4
A emoji_encode() 0 21 3
A str_matches_pattern() 0 3 1
A is_alpha() 0 7 2
C str_titleize() 0 69 12
A str_split_array() 0 17 2
B get_random_string() 0 54 10
A ws() 0 3 1
A str_replace_first() 0 20 2
A fix_utf8() 0 30 4
A str_pad_right() 0 12 1
B ucwords() 0 51 9
A to_boolean() 0 35 5
A css_identifier() 0 55 6
C stristr() 0 79 17
A strncasecmp() 0 10 1
B strwidth() 0 43 8
A css_stripe_media_queries() 0 6 1
A trim() 0 26 5
A clean() 0 47 6
A is_serialized() 0 11 3
A str_upper_camelize() 0 8 1
A is_uppercase() 0 7 2
A substr_compare() 0 33 6
D substr_count() 0 73 17
A strnatcmp() 0 9 2
D str_pad() 0 146 16
A urldecode_unicode_helper() 0 12 3
A str_ireplace() 0 31 5
A str_replace_ending() 0 24 6
A string_has_bom() 0 9 3
B strtr() 0 41 11
B str_contains_all() 0 22 9
A is_ascii() 0 3 1
A normalize_line_ending() 0 3 1
A str_isubstr_after_last_separator() 0 26 5
D range() 0 71 23
B strspn() 0 30 10
A strcasecmp() 0 21 1
A rawurldecode() 0 35 4
B str_capitalize_name_helper() 0 86 10
A utf8_encode() 0 14 3
A normalize_msword() 0 3 1
C str_detect_encoding() 0 111 13
A spaces_to_tabs() 0 11 3
A str_istarts_with() 0 11 3
A is_blank() 0 7 2
A str_replace() 0 18 1
A substr_iright() 0 15 4
D getCharDirection() 0 104 118
A htmlspecialchars() 0 15 3
A replace() 0 11 2
A __construct() 0 2 1
A decimal_to_chr() 0 3 1
A to_iso8859() 0 16 4
A has_whitespace() 0 7 2
A words_limit() 0 20 5
A strip_tags() 0 18 4
A pcre_utf8_support() 0 4 1
B between() 0 48 8
A str_isubstr_before_last_separator() 0 24 6
D str_truncate_safe() 0 86 18
A codepoints() 0 36 5
A substr_right() 0 31 6
D str_split() 0 134 29
A str_ends_with_any() 0 13 4
A chr_map() 0 5 1
A strrpos_in_byte() 0 12 4
A cleanup() 0 24 2
F strrpos() 0 136 31
A remove_right() 0 25 4
A remove_html_breaks() 0 3 1
A showSupport() 0 16 3
A char_at() 0 7 2
A remove_invisible_characters() 0 11 1
A single_chr_html_encode() 0 18 4
A chars() 0 4 1
A str_replace_last() 0 19 2
A str_substr_before_last_separator() 0 31 6
B is_binary() 0 37 9
A intlChar_loaded() 0 3 1
B strtocasefold() 0 33 7
A lcfirst() 0 44 5
A tabs_to_spaces() 0 11 3
B is_url() 0 40 7
B str_truncate() 0 43 7
F strripos() 0 113 25
A strpos_in_byte() 0 12 4
A str_ends_with() 0 16 4
A to_ascii() 0 6 1
A is_binary_file() 0 16 4
A intl_loaded() 0 3 1
A reduce_string_array() 0 29 6
B str_longest_common_prefix() 0 51 8
A mbstring_overloaded() 0 12 2
A str_pad_left() 0 12 1
A str_substr_first() 0 33 6
A html_stripe_empty_tags() 0 6 1
A chr_size_list() 0 17 3
A remove_bom() 0 22 5
A str_repeat() 0 5 1
A strpbrk() 0 11 4
D to_utf8_string() 0 110 33
A whitespace_table() 0 3 1
A substr_count_simple() 0 31 6
A str_iends_with_any() 0 13 4
A str_substr_after_last_separator() 0 31 6
A str_isubstr_after_first_separator() 0 26 5
A json_loaded() 0 3 1
B str_snakeize() 0 57 6
A is_lowercase() 0 7 2
A str_sort() 0 15 3
A to_utf8() 0 15 3
A str_underscored() 0 3 1
A str_offset_get() 0 14 4
A strip_whitespace() 0 7 2
A str_capitalize_name() 0 8 1
B str_limit_after_word() 0 51 11
A iconv_loaded() 0 3 1
A lcwords() 0 34 6
A swapCase() 0 17 4
A substr_ileft() 0 15 4
A is_empty() 0 3 1
B html_encode() 0 54 11
A str_dasherize() 0 3 1
A str_ensure_left() 0 11 3
F encode() 0 144 37
C is_utf32() 0 71 16
C ord() 0 68 16
B to_string() 0 26 8
A is_alphanumeric() 0 7 2
A strtonatfold() 0 11 2
A json_decode() 0 13 2
C strcspn() 0 48 12
A fix_simple_utf8() 0 32 4
A checkForSupport() 0 46 4
B is_json() 0 26 8
A is_printable() 0 3 1
A int_to_hex() 0 7 2
C str_split_pattern() 0 54 13
D strstr() 0 107 21
A has_lowercase() 0 7 2
A json_encode() 0 9 2
A str_isubstr_first() 0 25 4
A is_base64() 0 17 5
A str_last_char() 0 16 4
A str_ireplace_beginning() 0 22 6
A hex_to_int() 0 14 3
A hex_to_chr() 0 4 1
A htmlentities() 0 28 3
A str_substr_before_first_separator() 0 32 6
F substr() 0 137 31
A wordwrap_per_line() 0 28 5
A str_surround() 0 3 1
A strncmp() 0 19 4
A str_insert() 0 28 4
A getSupportInfo() 0 13 3
A replace_diamond_question_mark() 0 43 5
A chr_to_hex() 0 11 3
D is_utf8_string() 0 133 28
B str_delimit() 0 31 8
A to_utf8_convert_helper() 0 28 5
B strtoupper() 0 58 10
A min() 0 14 3
A is_punctuation() 0 3 1
A collapse_whitespace() 0 7 2
C html_entity_decode() 0 58 13
B strrichr() 0 54 11
A str_istarts_with_any() 0 17 5
B str_contains_any() 0 28 8
A initEmojiData() 0 29 4
A remove_duplicates() 0 16 4
B str_slice() 0 33 10
A access() 0 11 4
F strpos() 0 151 33
A str_shuffle() 0 33 6
A strcmp() 0 11 2
B file_get_contents() 0 56 11
A str_word_count() 0 23 5
A strripos_in_byte() 0 12 4
A str_to_binary() 0 10 2
A callback() 0 3 1
A symfony_polyfill_used() 0 16 5
A binary_to_str() 0 12 3
A bom() 0 3 1
B str_to_words() 0 36 8
A emoji_from_country_code() 0 17 3

How to fix   Complexity   

Complex Class

Complex classes like UTF8 often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.

Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.

While breaking up the class, it is a good idea to analyze how other classes use UTF8, and based on these observations, apply Extract Interface, too.

1
<?php
2
3
declare(strict_types=1);
4
5
namespace voku\helper;
6
7
/**
8
 * @psalm-immutable
9
 */
10
final class UTF8
11
{
12
    /**
13
     * Bom => Byte-Length
14
     *
15
     * INFO: https://en.wikipedia.org/wiki/Byte_order_mark
16
     *
17
     * @var array<string, int>
18
     */
19
    private static $BOM = [
20
        "\xef\xbb\xbf"     => 3, // UTF-8 BOM
21
        ''              => 6, // UTF-8 BOM as "WINDOWS-1252" (one char has [maybe] more then one byte ...)
22
        "\x00\x00\xfe\xff" => 4, // UTF-32 (BE) BOM
23
        '  þÿ'             => 6, // UTF-32 (BE) BOM as "WINDOWS-1252"
24
        "\xff\xfe\x00\x00" => 4, // UTF-32 (LE) BOM
25
        'ÿþ  '             => 6, // UTF-32 (LE) BOM as "WINDOWS-1252"
26
        "\xfe\xff"         => 2, // UTF-16 (BE) BOM
27
        'þÿ'               => 4, // UTF-16 (BE) BOM as "WINDOWS-1252"
28
        "\xff\xfe"         => 2, // UTF-16 (LE) BOM
29
        'ÿþ'               => 4, // UTF-16 (LE) BOM as "WINDOWS-1252"
30
    ];
31
32
    /**
33
     * Numeric code point => UTF-8 Character
34
     *
35
     * url: http://www.w3schools.com/charsets/ref_utf_punctuation.asp
36
     *
37
     * @var array<int, string>
38
     */
39
    private static $WHITESPACE = [
40
        // NULL Byte
41
        0 => "\x0",
42
        // Tab
43
        9 => "\x9",
44
        // New Line
45
        10 => "\xa",
46
        // Vertical Tab
47
        11 => "\xb",
48
        // Carriage Return
49
        13 => "\xd",
50
        // Ordinary Space
51
        32 => "\x20",
52
        // NO-BREAK SPACE
53
        160 => "\xc2\xa0",
54
        // OGHAM SPACE MARK
55
        5760 => "\xe1\x9a\x80",
56
        // MONGOLIAN VOWEL SEPARATOR
57
        6158 => "\xe1\xa0\x8e",
58
        // EN QUAD
59
        8192 => "\xe2\x80\x80",
60
        // EM QUAD
61
        8193 => "\xe2\x80\x81",
62
        // EN SPACE
63
        8194 => "\xe2\x80\x82",
64
        // EM SPACE
65
        8195 => "\xe2\x80\x83",
66
        // THREE-PER-EM SPACE
67
        8196 => "\xe2\x80\x84",
68
        // FOUR-PER-EM SPACE
69
        8197 => "\xe2\x80\x85",
70
        // SIX-PER-EM SPACE
71
        8198 => "\xe2\x80\x86",
72
        // FIGURE SPACE
73
        8199 => "\xe2\x80\x87",
74
        // PUNCTUATION SPACE
75
        8200 => "\xe2\x80\x88",
76
        // THIN SPACE
77
        8201 => "\xe2\x80\x89",
78
        // HAIR SPACE
79
        8202 => "\xe2\x80\x8a",
80
        // LINE SEPARATOR
81
        8232 => "\xe2\x80\xa8",
82
        // PARAGRAPH SEPARATOR
83
        8233 => "\xe2\x80\xa9",
84
        // NARROW NO-BREAK SPACE
85
        8239 => "\xe2\x80\xaf",
86
        // MEDIUM MATHEMATICAL SPACE
87
        8287 => "\xe2\x81\x9f",
88
        // HALFWIDTH HANGUL FILLER
89
        65440 => "\xef\xbe\xa0",
90
        // IDEOGRAPHIC SPACE
91
        12288 => "\xe3\x80\x80",
92
    ];
93
94
    /**
95
     * @var array<string, string>
96
     */
97
    private static $WHITESPACE_TABLE = [
98
        'SPACE'                     => "\x20",
99
        'NO-BREAK SPACE'            => "\xc2\xa0",
100
        'OGHAM SPACE MARK'          => "\xe1\x9a\x80",
101
        'EN QUAD'                   => "\xe2\x80\x80",
102
        'EM QUAD'                   => "\xe2\x80\x81",
103
        'EN SPACE'                  => "\xe2\x80\x82",
104
        'EM SPACE'                  => "\xe2\x80\x83",
105
        'THREE-PER-EM SPACE'        => "\xe2\x80\x84",
106
        'FOUR-PER-EM SPACE'         => "\xe2\x80\x85",
107
        'SIX-PER-EM SPACE'          => "\xe2\x80\x86",
108
        'FIGURE SPACE'              => "\xe2\x80\x87",
109
        'PUNCTUATION SPACE'         => "\xe2\x80\x88",
110
        'THIN SPACE'                => "\xe2\x80\x89",
111
        'HAIR SPACE'                => "\xe2\x80\x8a",
112
        'LINE SEPARATOR'            => "\xe2\x80\xa8",
113
        'PARAGRAPH SEPARATOR'       => "\xe2\x80\xa9",
114
        'ZERO WIDTH SPACE'          => "\xe2\x80\x8b",
115
        'NARROW NO-BREAK SPACE'     => "\xe2\x80\xaf",
116
        'MEDIUM MATHEMATICAL SPACE' => "\xe2\x81\x9f",
117
        'IDEOGRAPHIC SPACE'         => "\xe3\x80\x80",
118
        'HALFWIDTH HANGUL FILLER'   => "\xef\xbe\xa0",
119
    ];
120
121
    /**
122
     * @var array
123
     *
124
     * @phpstan-var array{upper: string[], lower: string[]}
125
     */
126
    private static $COMMON_CASE_FOLD = [
127
        'upper' => [
128
            'µ',
129
            'ſ',
130
            "\xCD\x85",
131
            'ς',
132
            'ẞ',
133
            "\xCF\x90",
134
            "\xCF\x91",
135
            "\xCF\x95",
136
            "\xCF\x96",
137
            "\xCF\xB0",
138
            "\xCF\xB1",
139
            "\xCF\xB5",
140
            "\xE1\xBA\x9B",
141
            "\xE1\xBE\xBE",
142
        ],
143
        'lower' => [
144
            'μ',
145
            's',
146
            'ι',
147
            'σ',
148
            'ß',
149
            'β',
150
            'θ',
151
            'φ',
152
            'π',
153
            'κ',
154
            'ρ',
155
            'ε',
156
            "\xE1\xB9\xA1",
157
            'ι',
158
        ],
159
    ];
160
161
    /**
162
     * @var array
163
     *
164
     * @phpstan-var array<string, mixed>
165
     */
166
    private static $SUPPORT = [];
167
168
    /**
169
     * @var string[]|null
170
     *
171
     * @phpstan-var array<string, string>|null
172
     */
173
    private static $BROKEN_UTF8_FIX;
174
175
    /**
176
     * @var string[]|null
177
     *
178
     * @phpstan-var array<int, string>|null
179
     */
180
    private static $WIN1252_TO_UTF8;
181
182
    /**
183
     * @var string[]|null
184
     *
185
     * @phpstan-var array<int ,string>|null
186
     */
187
    private static $INTL_TRANSLITERATOR_LIST;
188
189
    /**
190
     * @var string[]|null
191
     *
192
     * @phpstan-var array<string>|null
193
     */
194
    private static $ENCODINGS;
195
196
    /**
197
     * @var int[]|null
198
     *
199
     * @phpstan-var array<string ,int>|null
200
     */
201
    private static $ORD;
202
203
    /**
204
     * @var string[]|null
205
     *
206
     * @phpstan-var array<string, string>|null
207
     */
208
    private static $EMOJI;
209
210
    /**
211
     * @var string[]|null
212
     *
213
     * @phpstan-var array<string>|null
214
     */
215
    private static $EMOJI_VALUES_CACHE;
216
217
    /**
218
     * @var string[]|null
219
     *
220
     * @phpstan-var array<string>|null
221
     */
222
    private static $EMOJI_KEYS_CACHE;
223
224
    /**
225
     * @var string[]|null
226
     *
227
     * @phpstan-var array<string>|null
228
     */
229
    private static $EMOJI_KEYS_REVERSIBLE_CACHE;
230
231
    /**
232
     * @var string[]|null
233
     *
234
     * @phpstan-var array<int, string>|null
235
     */
236
    private static $CHR;
237
238
    /**
239
     * __construct()
240
     */
241 34
    public function __construct()
242
    {
243 34
    }
244
245
    /**
246
     * Return the character at the specified position: $str[1] like functionality.
247
     *
248
     * EXAMPLE: <code>UTF8::access('fòô', 1); // 'ò'</code>
249
     *
250
     * @param string $str      <p>A UTF-8 string.</p>
251
     * @param int    $pos      <p>The position of character to return.</p>
252
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
253
     *
254
     * @psalm-pure
255
     *
256
     * @return string
257
     *                <p>Single multi-byte character.</p>
258
     */
259 3
    public static function access(string $str, int $pos, string $encoding = 'UTF-8'): string
260
    {
261 3
        if ($str === '' || $pos < 0) {
262 2
            return '';
263
        }
264
265 3
        if ($encoding === 'UTF-8') {
266 3
            return (string) \mb_substr($str, $pos, 1);
267
        }
268
269
        return (string) self::substr($str, $pos, 1, $encoding);
270
    }
271
272
    /**
273
     * Prepends UTF-8 BOM character to the string and returns the whole string.
274
     *
275
     * INFO: If BOM already existed there, the Input string is returned.
276
     *
277
     * EXAMPLE: <code>UTF8::add_bom_to_string('fòô'); // "\xEF\xBB\xBF" . 'fòô'</code>
278
     *
279
     * @param string $str <p>The input string.</p>
280
     *
281
     * @psalm-pure
282
     *
283
     * @return string
284
     *                <p>The output string that contains BOM.</p>
285
     */
286 2
    public static function add_bom_to_string(string $str): string
287
    {
288 2
        if (!self::string_has_bom($str)) {
289 2
            $str = self::bom() . $str;
290
        }
291
292 2
        return $str;
293
    }
294
295
    /**
296
     * Changes all keys in an array.
297
     *
298
     * @param array<string, mixed> $array    <p>The array to work on</p>
299
     * @param int                  $case     [optional] <p> Either <strong>CASE_UPPER</strong><br>
300
     *                                       or <strong>CASE_LOWER</strong> (default)</p>
301
     * @param string               $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
302
     *
303
     * @psalm-pure
304
     *
305
     * @return string[]
306
     *                  <p>An array with its keys lower- or uppercased.</p>
307
     */
308 2
    public static function array_change_key_case(
309
        array $array,
310
        int $case = \CASE_LOWER,
311
        string $encoding = 'UTF-8'
312
    ): array {
313
        if (
314 2
            $case !== \CASE_LOWER
315
            &&
316 2
            $case !== \CASE_UPPER
317
        ) {
318
            $case = \CASE_LOWER;
319
        }
320
321 2
        $return = [];
322 2
        foreach ($array as $key => &$value) {
323 2
            $key = $case === \CASE_LOWER
324 2
                ? self::strtolower( $key, $encoding)
325 2
                : self::strtoupper( $key, $encoding);
326
327 2
            $return[$key] = $value;
328
        }
329
330 2
        return $return;
331
    }
332
333
    /**
334
     * Returns the substring between $start and $end, if found, or an empty
335
     * string. An optional offset may be supplied from which to begin the
336
     * search for the start string.
337
     *
338
     * @param string $str
339
     * @param string $start    <p>Delimiter marking the start of the substring.</p>
340
     * @param string $end      <p>Delimiter marking the end of the substring.</p>
341
     * @param int    $offset   [optional] <p>Index from which to begin the search. Default: 0</p>
342
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
343
     *
344
     * @psalm-pure
345
     *
346
     * @return string
347
     */
348 16
    public static function between(
349
        string $str,
350
        string $start,
351
        string $end,
352
        int $offset = 0,
353
        string $encoding = 'UTF-8'
354
    ): string {
355 16
        if ($encoding === 'UTF-8') {
356 8
            $start_position = \mb_strpos($str, $start, $offset);
357 8
            if ($start_position === false) {
358 1
                return '';
359
            }
360
361 7
            $substr_index = $start_position + (int) \mb_strlen($start);
362 7
            $end_position = \mb_strpos($str, $end, $substr_index);
363
            if (
364 7
                $end_position === false
365
                ||
366 7
                $end_position === $substr_index
367
            ) {
368 2
                return '';
369
            }
370
371 5
            return (string) \mb_substr($str, $substr_index, $end_position - $substr_index);
372
        }
373
374 8
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
375
376 8
        $start_position = self::strpos($str, $start, $offset, $encoding);
377 8
        if ($start_position === false) {
378 1
            return '';
379
        }
380
381 7
        $substr_index = $start_position + (int) self::strlen($start, $encoding);
382 7
        $end_position = self::strpos($str, $end, $substr_index, $encoding);
383
        if (
384 7
            $end_position === false
385
            ||
386 7
            $end_position === $substr_index
387
        ) {
388 2
            return '';
389
        }
390
391 5
        return (string) self::substr(
392 5
            $str,
393
            $substr_index,
394 5
            $end_position - $substr_index,
395
            $encoding
396
        );
397
    }
398
399
    /**
400
     * Convert binary into a string.
401
     *
402
     * INFO: opposite to UTF8::str_to_binary()
403
     *
404
     * EXAMPLE: <code>UTF8::binary_to_str('11110000100111111001100010000011'); // '😃'</code>
405
     *
406
     * @param string $bin 1|0
407
     *
408
     * @psalm-pure
409
     *
410
     * @return string
411
     */
412 2
    public static function binary_to_str($bin): string
413
    {
414 2
        if (!isset($bin[0])) {
415
            return '';
416
        }
417
418 2
        $convert = \base_convert($bin, 2, 16);
419 2
        if ($convert === '0') {
420 1
            return '';
421
        }
422
423 2
        return \pack('H*', $convert);
424
    }
425
426
    /**
427
     * Returns the UTF-8 Byte Order Mark Character.
428
     *
429
     * INFO: take a look at UTF8::$bom for e.g. UTF-16 and UTF-32 BOM values
430
     *
431
     * EXAMPLE: <code>UTF8::bom(); // "\xEF\xBB\xBF"</code>
432
     *
433
     * @psalm-pure
434
     *
435
     * @return string
436
     *                <p>UTF-8 Byte Order Mark.</p>
437
     */
438 4
    public static function bom(): string
439
    {
440 4
        return "\xef\xbb\xbf";
441
    }
442
443
    /**
444
     * @alias of UTF8::chr_map()
445
     *
446
     * @param callable $callback
447
     * @param string   $str
448
     *
449
     * @psalm-pure
450
     *
451
     * @return string[]
452
     *
453
     * @see   UTF8::chr_map()
454
     */
455 2
    public static function callback($callback, string $str): array
456
    {
457 2
        return self::chr_map($callback, $str);
458
    }
459
460
    /**
461
     * Returns the character at $index, with indexes starting at 0.
462
     *
463
     * @param string $str      <p>The input string.</p>
464
     * @param int    $index    <p>Position of the character.</p>
465
     * @param string $encoding [optional] <p>Default is UTF-8</p>
466
     *
467
     * @psalm-pure
468
     *
469
     * @return string
470
     *                <p>The character at $index.</p>
471
     */
472 9
    public static function char_at(string $str, int $index, string $encoding = 'UTF-8'): string
473
    {
474 9
        if ($encoding === 'UTF-8') {
475 5
            return (string) \mb_substr($str, $index, 1);
476
        }
477
478 4
        return (string) self::substr($str, $index, 1, $encoding);
479
    }
480
481
    /**
482
     * Returns an array consisting of the characters in the string.
483
     *
484
     * @param string $str <p>The input string.</p>
485
     *
486
     * @psalm-pure
487
     *
488
     * @return string[]
489
     *                  <p>An array of chars.</p>
490
     */
491 4
    public static function chars(string $str): array
492
    {
493
        /** @var string[] */
494 4
        return self::str_split($str);
0 ignored issues
show
Bug Best Practice introduced by
The expression return self::str_split($str) returns an array which contains values of type string[] which are incompatible with the documented value type string.
Loading history...
495
    }
496
497
    /**
498
     * This method will auto-detect your server environment for UTF-8 support.
499
     *
500
     * @return true|null
501
     *
502
     * @internal <p>You don't need to run it manually, it will be triggered if it's needed.</p>
503
     */
504 4
    public static function checkForSupport()
505
    {
506 4
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
507
            self::$SUPPORT['already_checked_via_portable_utf8'] = true;
508
509
            // http://php.net/manual/en/book.mbstring.php
510
            self::$SUPPORT['mbstring'] = self::mbstring_loaded();
511
512
            self::$SUPPORT['mbstring_func_overload'] = self::mbstring_overloaded();
513
            if (self::$SUPPORT['mbstring'] === true) {
514
                \mb_internal_encoding('UTF-8');
515
                \mb_regex_encoding('UTF-8');
516
                self::$SUPPORT['mbstring_internal_encoding'] = 'UTF-8';
517
            }
518
519
            // http://php.net/manual/en/book.iconv.php
520
            self::$SUPPORT['iconv'] = self::iconv_loaded();
521
522
            // http://php.net/manual/en/book.intl.php
523
            self::$SUPPORT['intl'] = self::intl_loaded();
524
525
            // http://php.net/manual/en/class.intlchar.php
526
            self::$SUPPORT['intlChar'] = self::intlChar_loaded();
527
528
            // http://php.net/manual/en/book.ctype.php
529
            self::$SUPPORT['ctype'] = self::ctype_loaded();
530
531
            // http://php.net/manual/en/class.finfo.php
532
            self::$SUPPORT['finfo'] = self::finfo_loaded();
533
534
            // http://php.net/manual/en/book.json.php
535
            self::$SUPPORT['json'] = self::json_loaded();
536
537
            // http://php.net/manual/en/book.pcre.php
538
            self::$SUPPORT['pcre_utf8'] = self::pcre_utf8_support();
539
540
            self::$SUPPORT['symfony_polyfill_used'] = self::symfony_polyfill_used();
541
            if (self::$SUPPORT['symfony_polyfill_used'] === true) {
542
                \mb_internal_encoding('UTF-8');
543
                self::$SUPPORT['mbstring_internal_encoding'] = 'UTF-8';
544
            }
545
546
            return true;
547
        }
548
549 4
        return null;
550
    }
551
552
    /**
553
     * Generates a UTF-8 encoded character from the given code point.
554
     *
555
     * INFO: opposite to UTF8::ord()
556
     *
557
     * EXAMPLE: <code>UTF8::chr(0x2603); // '☃'</code>
558
     *
559
     * @param int    $code_point <p>The code point for which to generate a character.</p>
560
     * @param string $encoding   [optional] <p>Default is UTF-8</p>
561
     *
562
     * @psalm-pure
563
     *
564
     * @return string|null
565
     *                     <p>Multi-byte character, returns null on failure or empty input.</p>
566
     */
567 21
    public static function chr($code_point, string $encoding = 'UTF-8')
568
    {
569
        // init
570
        /**
571
         * @psalm-suppress ImpureStaticVariable
572
         *
573
         * @var array<string,string>
574
         */
575 21
        static $CHAR_CACHE = [];
576
577 21
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
578 5
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
579
        }
580
581
        if (
582 21
            $encoding !== 'UTF-8'
583
            &&
584 21
            $encoding !== 'ISO-8859-1'
585
            &&
586 21
            $encoding !== 'WINDOWS-1252'
587
            &&
588 21
            self::$SUPPORT['mbstring'] === false
589
        ) {
590
            /**
591
             * @psalm-suppress ImpureFunctionCall - is is only a warning
592
             */
593
            \trigger_error('UTF8::chr() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
594
        }
595
596 21
        if (!\is_int($code_point) || $code_point <= 0) {
0 ignored issues
show
introduced by
The condition is_int($code_point) is always true.
Loading history...
597 5
            return null;
598
        }
599
600 21
        $cache_key = $code_point . '_' . $encoding;
601 21
        if (isset($CHAR_CACHE[$cache_key])) {
602 19
            return $CHAR_CACHE[$cache_key];
603
        }
604
605 10
        if ($code_point <= 0x80) { // only for "simple"-chars
606
607 9
            if (self::$CHR === null) {
608 1
                self::$CHR = self::getData('chr');
609
            }
610
611
            /**
612
             * @psalm-suppress PossiblyNullArrayAccess
613
             */
614 9
            $chr = self::$CHR[$code_point];
615
616 9
            if ($encoding !== 'UTF-8') {
617 1
                $chr = self::encode($encoding, $chr);
618
            }
619
620 9
            return $CHAR_CACHE[$cache_key] = $chr;
621
        }
622
623
        //
624
        // fallback via "IntlChar"
625
        //
626
627 6
        if (self::$SUPPORT['intlChar'] === true) {
628 6
            $chr = \IntlChar::chr($code_point);
629
630 6
            if ($encoding !== 'UTF-8') {
631
                $chr = self::encode($encoding, $chr);
632
            }
633
634 6
            return $CHAR_CACHE[$cache_key] = $chr;
635
        }
636
637
        //
638
        // fallback via vanilla php
639
        //
640
641
        if (self::$CHR === null) {
642
            self::$CHR = self::getData('chr');
643
        }
644
645
        $code_point = (int) $code_point;
646
        if ($code_point <= 0x7FF) {
647
            /**
648
             * @psalm-suppress PossiblyNullArrayAccess
649
             */
650
            $chr = self::$CHR[($code_point >> 6) + 0xC0] .
651
                   self::$CHR[($code_point & 0x3F) + 0x80];
652
        } elseif ($code_point <= 0xFFFF) {
653
            /**
654
             * @psalm-suppress PossiblyNullArrayAccess
655
             */
656
            $chr = self::$CHR[($code_point >> 12) + 0xE0] .
657
                   self::$CHR[(($code_point >> 6) & 0x3F) + 0x80] .
658
                   self::$CHR[($code_point & 0x3F) + 0x80];
659
        } else {
660
            /**
661
             * @psalm-suppress PossiblyNullArrayAccess
662
             */
663
            $chr = self::$CHR[($code_point >> 18) + 0xF0] .
664
                   self::$CHR[(($code_point >> 12) & 0x3F) + 0x80] .
665
                   self::$CHR[(($code_point >> 6) & 0x3F) + 0x80] .
666
                   self::$CHR[($code_point & 0x3F) + 0x80];
667
        }
668
669
        if ($encoding !== 'UTF-8') {
670
            $chr = self::encode($encoding, $chr);
671
        }
672
673
        return $CHAR_CACHE[$cache_key] = $chr;
674
    }
675
676
    /**
677
     * Applies callback to all characters of a string.
678
     *
679
     * EXAMPLE: <code>UTF8::chr_map([UTF8::class, 'strtolower'], 'Κόσμε'); // ['κ','ό', 'σ', 'μ', 'ε']</code>
680
     *
681
     * @param callable $callback <p>The callback function.</p>
682
     * @param string   $str      <p>UTF-8 string to run callback on.</p>
683
     *
684
     * @psalm-pure
685
     *
686
     * @return string[]
687
     *                  <p>The outcome of the callback, as array.</p>
688
     */
689 2
    public static function chr_map($callback, string $str): array
690
    {
691 2
        return \array_map(
692 2
            $callback,
693 2
            self::str_split($str)
694
        );
695
    }
696
697
    /**
698
     * Generates an array of byte length of each character of a Unicode string.
699
     *
700
     * 1 byte => U+0000  - U+007F
701
     * 2 byte => U+0080  - U+07FF
702
     * 3 byte => U+0800  - U+FFFF
703
     * 4 byte => U+10000 - U+10FFFF
704
     *
705
     * EXAMPLE: <code>UTF8::chr_size_list('中文空白-test'); // [3, 3, 3, 3, 1, 1, 1, 1, 1]</code>
706
     *
707
     * @param string $str <p>The original unicode string.</p>
708
     *
709
     * @psalm-pure
710
     *
711
     * @return int[]
712
     *               <p>An array of byte lengths of each character.</p>
713
     */
714 4
    public static function chr_size_list(string $str): array
715
    {
716 4
        if ($str === '') {
717 4
            return [];
718
        }
719
720 4
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
721
            return \array_map(
722
                static function (string $data): int {
723
                    // "mb_" is available if overload is used, so use it ...
724
                    return \mb_strlen($data, 'CP850'); // 8-BIT
725
                },
726
                self::str_split($str)
727
            );
728
        }
729
730 4
        return \array_map('\strlen', self::str_split($str));
731
    }
732
733
    /**
734
     * Get a decimal code representation of a specific character.
735
     *
736
     * INFO: opposite to UTF8::decimal_to_chr()
737
     *
738
     * EXAMPLE: <code>UTF8::chr_to_decimal('§'); // 0xa7</code>
739
     *
740
     * @param string $char <p>The input character.</p>
741
     *
742
     * @psalm-pure
743
     *
744
     * @return int
745
     */
746 5
    public static function chr_to_decimal(string $char): int
747
    {
748 5
        if (self::$SUPPORT['iconv'] === true) {
749 5
            $chr_tmp = \iconv('UTF-8', 'UCS-4LE', $char);
750 5
            if ($chr_tmp !== false) {
751
                /** @phpstan-ignore-next-line - "unpack": only false if the format string contains errors */
752 5
                return \unpack('V', $chr_tmp)[1];
753
            }
754
        }
755
756
        $code = self::ord($char[0]);
757
        $bytes = 1;
758
759
        if (!($code & 0x80)) {
760
            // 0xxxxxxx
761
            return $code;
762
        }
763
764
        if (($code & 0xe0) === 0xc0) {
765
            // 110xxxxx
766
            $bytes = 2;
767
            $code &= ~0xc0;
768
        } elseif (($code & 0xf0) === 0xe0) {
769
            // 1110xxxx
770
            $bytes = 3;
771
            $code &= ~0xe0;
772
        } elseif (($code & 0xf8) === 0xf0) {
773
            // 11110xxx
774
            $bytes = 4;
775
            $code &= ~0xf0;
776
        }
777
778
        for ($i = 2; $i <= $bytes; ++$i) {
779
            // 10xxxxxx
780
            $code = ($code << 6) + (self::ord($char[$i - 1]) & ~0x80);
781
        }
782
783
        return $code;
784
    }
785
786
    /**
787
     * Get hexadecimal code point (U+xxxx) of a UTF-8 encoded character.
788
     *
789
     * EXAMPLE: <code>UTF8::chr_to_hex('§'); // U+00a7</code>
790
     *
791
     * @param int|string $char   <p>The input character</p>
792
     * @param string     $prefix [optional]
793
     *
794
     * @psalm-pure
795
     *
796
     * @return string
797
     *                <p>The code point encoded as U+xxxx.</p>
798
     */
799 2
    public static function chr_to_hex($char, string $prefix = 'U+'): string
800
    {
801 2
        if ($char === '') {
802 2
            return '';
803
        }
804
805 2
        if ($char === '&#0;') {
806 2
            $char = '';
807
        }
808
809 2
        return self::int_to_hex(self::ord((string) $char), $prefix);
810
    }
811
812
    /**
813
     * Splits a string into smaller chunks and multiple lines, using the specified line ending character.
814
     *
815
     * EXAMPLE: <code>UTF8::chunk_split('ABC-ÖÄÜ-中文空白-κόσμε', 3); // "ABC\r\n-ÖÄ\r\nÜ-中\r\n文空白\r\n-κό\r\nσμε"</code>
816
     *
817
     * @param string $body         <p>The original string to be split.</p>
818
     * @param int    $chunk_length [optional] <p>The maximum character length of a chunk.</p>
819
     * @param string $end          [optional] <p>The character(s) to be inserted at the end of each chunk.</p>
820
     *
821
     * @psalm-pure
822
     *
823
     * @return string
824
     *                <p>The chunked string.</p>
825
     */
826 4
    public static function chunk_split(string $body, int $chunk_length = 76, string $end = "\r\n"): string
827
    {
828 4
        return \implode($end, self::str_split($body, $chunk_length));
829
    }
830
831
    /**
832
     * Accepts a string and removes all non-UTF-8 characters from it + extras if needed.
833
     *
834
     * EXAMPLE: <code>UTF8::clean("\xEF\xBB\xBF„Abcdef\xc2\xa0\x20…” — 😃 - Düsseldorf", true, true); // '„Abcdef  …” — 😃 - Düsseldorf'</code>
835
     *
836
     * @param string $str                                     <p>The string to be sanitized.</p>
837
     * @param bool   $remove_bom                              [optional] <p>Set to true, if you need to remove
838
     *                                                        UTF-BOM.</p>
839
     * @param bool   $normalize_whitespace                    [optional] <p>Set to true, if you need to normalize the
840
     *                                                        whitespace.</p>
841
     * @param bool   $normalize_msword                        [optional] <p>Set to true, if you need to normalize MS
842
     *                                                        Word chars e.g.: "…"
843
     *                                                        => "..."</p>
844
     * @param bool   $keep_non_breaking_space                 [optional] <p>Set to true, to keep non-breaking-spaces,
845
     *                                                        in
846
     *                                                        combination with
847
     *                                                        $normalize_whitespace</p>
848
     * @param bool   $replace_diamond_question_mark           [optional] <p>Set to true, if you need to remove diamond
849
     *                                                        question mark e.g.: "�"</p>
850
     * @param bool   $remove_invisible_characters             [optional] <p>Set to false, if you not want to remove
851
     *                                                        invisible characters e.g.: "\0"</p>
852
     * @param bool   $remove_invisible_characters_url_encoded [optional] <p>Set to true, if you not want to remove
853
     *                                                        invisible url encoded characters e.g.: "%0B"<br> WARNING:
854
     *                                                        maybe contains false-positives e.g. aa%0Baa -> aaaa.
855
     *                                                        </p>
856
     *
857
     * @psalm-pure
858
     *
859
     * @return string
860
     *                <p>An clean UTF-8 encoded string.</p>
861
     */
862 94
    public static function clean(
863
        string $str,
864
        bool $remove_bom = false,
865
        bool $normalize_whitespace = false,
866
        bool $normalize_msword = false,
867
        bool $keep_non_breaking_space = false,
868
        bool $replace_diamond_question_mark = false,
869
        bool $remove_invisible_characters = true,
870
        bool $remove_invisible_characters_url_encoded = false
871
    ): string {
872
        // http://stackoverflow.com/questions/1401317/remove-non-utf8-characters-from-string
873
        // caused connection reset problem on larger strings
874
875 94
        $regex = '/
876
          (
877
            (?: [\x00-\x7F]               # single-byte sequences   0xxxxxxx
878
            |   [\xC0-\xDF][\x80-\xBF]    # double-byte sequences   110xxxxx 10xxxxxx
879
            |   [\xE0-\xEF][\x80-\xBF]{2} # triple-byte sequences   1110xxxx 10xxxxxx * 2
880
            |   [\xF0-\xF7][\x80-\xBF]{3} # quadruple-byte sequence 11110xxx 10xxxxxx * 3
881
            ){1,100}                      # ...one or more times
882
          )
883
        | ( [\x80-\xBF] )                 # invalid byte in range 10000000 - 10111111
884
        | ( [\xC0-\xFF] )                 # invalid byte in range 11000000 - 11111111
885
        /x';
886 94
        $str = (string) \preg_replace($regex, '$1', $str);
887
888 94
        if ($replace_diamond_question_mark) {
889 33
            $str = self::replace_diamond_question_mark($str);
890
        }
891
892 94
        if ($remove_invisible_characters) {
893 94
            $str = self::remove_invisible_characters($str, $remove_invisible_characters_url_encoded);
894
        }
895
896 94
        if ($normalize_whitespace) {
897 37
            $str = self::normalize_whitespace($str, $keep_non_breaking_space);
898
        }
899
900 94
        if ($normalize_msword) {
901 4
            $str = self::normalize_msword($str);
902
        }
903
904 94
        if ($remove_bom) {
905 37
            $str = self::remove_bom($str);
906
        }
907
908 94
        return $str;
909
    }
910
911
    /**
912
     * Clean-up a string and show only printable UTF-8 chars at the end  + fix UTF-8 encoding.
913
     *
914
     * EXAMPLE: <code>UTF8::cleanup("\xEF\xBB\xBF„Abcdef\xc2\xa0\x20…” — 😃 - Düsseldorf", true, true); // '„Abcdef  …” — 😃 - Düsseldorf'</code>
915
     *
916
     * @param string $str <p>The input string.</p>
917
     *
918
     * @psalm-pure
919
     *
920
     * @return string
921
     */
922 33
    public static function cleanup($str): string
923
    {
924
        // init
925 33
        $str = (string) $str;
926
927 33
        if ($str === '') {
928 5
            return '';
929
        }
930
931
        // fixed ISO <-> UTF-8 Errors
932 33
        $str = self::fix_simple_utf8($str);
933
934
        // remove all none UTF-8 symbols
935
        // && remove diamond question mark (�)
936
        // && remove remove invisible characters (e.g. "\0")
937
        // && remove BOM
938
        // && normalize whitespace chars (but keep non-breaking-spaces)
939 33
        return self::clean(
940 33
            $str,
941 33
            true,
942 33
            true,
943 33
            false,
944 33
            true,
945 33
            true
946
        );
947
    }
948
949
    /**
950
     * Accepts a string or a array of strings and returns an array of Unicode code points.
951
     *
952
     * INFO: opposite to UTF8::string()
953
     *
954
     * EXAMPLE: <code>
955
     * UTF8::codepoints('κöñ'); // array(954, 246, 241)
956
     * // ... OR ...
957
     * UTF8::codepoints('κöñ', true); // array('U+03ba', 'U+00f6', 'U+00f1')
958
     * </code>
959
     *
960
     * @param string|string[] $arg         <p>A UTF-8 encoded string or an array of such strings.</p>
961
     * @param bool            $use_u_style <p>If True, will return code points in U+xxxx format,
962
     *                                     default, code points will be returned as integers.</p>
963
     *
964
     * @psalm-pure
965
     *
966
     * @return int[]|string[]
967
     *                        <p>
968
     *                        The array of code points:<br>
969
     *                        int[] for $u_style === false<br>
970
     *                        string[] for $u_style === true<br>
971
     *                        </p>
972
     */
973 12
    public static function codepoints($arg, bool $use_u_style = false): array
974
    {
975 12
        if (\is_string($arg)) {
976 12
            $arg = self::str_split($arg);
977
        }
978
979
        /**
980
         * @psalm-suppress DocblockTypeContradiction
981
         */
982 12
        if (!\is_array($arg)) {
0 ignored issues
show
introduced by
The condition is_array($arg) is always true.
Loading history...
983 4
            return [];
984
        }
985
986 12
        if ($arg === []) {
987 7
            return [];
988
        }
989
990 11
        $arg = \array_map(
991
            [
992 11
                self::class,
993
                'ord',
994
            ],
995 11
            $arg
996
        );
997
998 11
        if ($use_u_style) {
999 2
            $arg = \array_map(
1000
                [
1001 2
                    self::class,
1002
                    'int_to_hex',
1003
                ],
1004 2
                $arg
1005
            );
1006
        }
1007
1008 11
        return $arg;
1009
    }
1010
1011
    /**
1012
     * Trims the string and replaces consecutive whitespace characters with a
1013
     * single space. This includes tabs and newline characters, as well as
1014
     * multibyte whitespace such as the thin space and ideographic space.
1015
     *
1016
     * @param string $str <p>The input string.</p>
1017
     *
1018
     * @psalm-pure
1019
     *
1020
     * @return string
1021
     *                <p>A string with trimmed $str and condensed whitespace.</p>
1022
     */
1023 13
    public static function collapse_whitespace(string $str): string
1024
    {
1025 13
        if (self::$SUPPORT['mbstring'] === true) {
1026 13
            return \trim((string) \mb_ereg_replace('[[:space:]]+', ' ', $str));
1027
        }
1028
1029
        return \trim(self::regex_replace($str, '[[:space:]]+', ' '));
1030
    }
1031
1032
    /**
1033
     * Returns count of characters used in a string.
1034
     *
1035
     * EXAMPLE: <code>UTF8::count_chars('κaκbκc'); // array('κ' => 3, 'a' => 1, 'b' => 1, 'c' => 1)</code>
1036
     *
1037
     * @param string $str                     <p>The input string.</p>
1038
     * @param bool   $clean_utf8              [optional] <p>Remove non UTF-8 chars from the string.</p>
1039
     * @param bool   $try_to_use_mb_functions [optional] <p>Set to false, if you don't want to use
1040
     *
1041
     * @psalm-pure
1042
     *
1043
     * @return int[]
1044
     *               <p>An associative array of Character as keys and
1045
     *               their count as values.</p>
1046
     */
1047 25
    public static function count_chars(
1048
        string $str,
1049
        bool $clean_utf8 = false,
1050
        bool $try_to_use_mb_functions = true
1051
    ): array {
1052 25
        return \array_count_values(
1053 25
            self::str_split(
1054 25
                $str,
1055 25
                1,
1056
                $clean_utf8,
1057
                $try_to_use_mb_functions
1058
            )
1059
        );
1060
    }
1061
1062
    /**
1063
     * Create a valid CSS identifier for e.g. "class"- or "id"-attributes.
1064
     *
1065
     * EXAMPLE: <code>UTF8::css_identifier('123foo/bar!!!'); // _23foo-bar</code>
1066
     *
1067
     * copy&past from https://github.com/drupal/core/blob/8.8.x/lib/Drupal/Component/Utility/Html.php#L95
1068
     *
1069
     * @param string   $str        <p>INFO: if no identifier is given e.g. " " or "", we will create a unique string automatically</p>
1070
     * @param string[] $filter
1071
     * @param bool     $strip_tags
1072
     * @param bool     $strtolower
1073
     *
1074
     * @psalm-pure
1075
     *
1076
     * @return string
1077
     *
1078
     * @phpstan-param array<string,string> $filter
1079
     */
1080 1
    public static function css_identifier(
1081
        string $str = '',
1082
        array $filter = [
1083
            ' ' => '-',
1084
            '/' => '-',
1085
            '[' => '',
1086
            ']' => '',
1087
        ],
1088
        bool $strip_tags = false,
1089
        bool $strtolower = true
1090
    ): string {
1091
        // We could also use strtr() here but its much slower than str_replace(). In
1092
        // order to keep '__' to stay '__' we first replace it with a different
1093
        // placeholder after checking that it is not defined as a filter.
1094 1
        $double_underscore_replacements = 0;
1095
1096
        // Fallback ...
1097 1
        if (\trim($str) === '') {
1098 1
            $str = \uniqid('auto-generated-css-class', true);
1099
        } else {
1100 1
            $str = self::clean($str);
1101
        }
1102
1103 1
        if ($strip_tags) {
1104
            $str = \strip_tags($str);
1105
        }
1106
1107 1
        if ($strtolower) {
1108 1
            $str = \strtolower($str);
1109
        }
1110
1111 1
        if (!isset($filter['__'])) {
1112 1
            $str = \str_replace('__', '##', $str, $double_underscore_replacements);
1113
        }
1114
1115 1
        $str = \str_replace(\array_keys($filter), \array_values($filter), $str);
1116
        // Replace temporary placeholder '##' with '__' only if the original
1117
        // $identifier contained '__'.
1118 1
        if ($double_underscore_replacements > 0) {
1119
            $str = \str_replace('##', '__', $str);
1120
        }
1121
1122
        // Valid characters in a CSS identifier are:
1123
        // - the hyphen (U+002D)
1124
        // - a-z (U+0030 - U+0039)
1125
        // - A-Z (U+0041 - U+005A)
1126
        // - the underscore (U+005F)
1127
        // - 0-9 (U+0061 - U+007A)
1128
        // - ISO 10646 characters U+00A1 and higher
1129
        // We strip out any character not in the above list.
1130 1
        $str = (string) \preg_replace('/[^\x{002D}\x{0030}-\x{0039}\x{0041}-\x{005A}\x{005F}\x{0061}-\x{007A}\x{00A1}-\x{FFFF}]/u', '', $str);
1131
        // Identifiers cannot start with a digit, two hyphens, or a hyphen followed by a digit.
1132 1
        $str = (string) \preg_replace(['/^[0-9]/', '/^(-[0-9])|^(--)/'], ['_', '__'], $str);
1133
1134 1
        return \trim($str, '-');
1135
    }
1136
1137
    /**
1138
     * Remove css media-queries.
1139
     *
1140
     * @param string $str
1141
     *
1142
     * @psalm-pure
1143
     *
1144
     * @return string
1145
     */
1146 1
    public static function css_stripe_media_queries(string $str): string
1147
    {
1148 1
        return (string) \preg_replace(
1149 1
            '#@media\\s+(?:only\\s)?(?:[\\s{(]|screen|all)\\s?[^{]+{.*}\\s*}\\s*#isumU',
1150 1
            '',
1151 1
            $str
1152
        );
1153
    }
1154
1155
    /**
1156
     * Checks whether ctype is available on the server.
1157
     *
1158
     * @psalm-pure
1159
     *
1160
     * @return bool
1161
     *              <p><strong>true</strong> if available, <strong>false</strong> otherwise</p>
1162
     *
1163
     * @internal <p>Please do not use it anymore, we will make is private in next major version.</p>
1164
     */
1165
    public static function ctype_loaded(): bool
1166
    {
1167
        return \extension_loaded('ctype');
1168
    }
1169
1170
    /**
1171
     * Converts an int value into a UTF-8 character.
1172
     *
1173
     * INFO: opposite to UTF8::string()
1174
     *
1175
     * EXAMPLE: <code>UTF8::decimal_to_chr(931); // 'Σ'</code>
1176
     *
1177
     * @param int|string $int
1178
     *
1179
     * @phpstan-param int|numeric-string $int
1180
     *
1181
     * @psalm-pure
1182
     *
1183
     * @return string
1184
     */
1185 20
    public static function decimal_to_chr($int): string
1186
    {
1187 20
        return self::html_entity_decode('&#' . $int . ';', \ENT_QUOTES | \ENT_HTML5);
1188
    }
1189
1190
    /**
1191
     * Decodes a MIME header field
1192
     *
1193
     * @param string $str
1194
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
1195
     *
1196
     * @psalm-pure
1197
     *
1198
     * @return false|string
1199
     *                      <p>A decoded MIME field on success,
1200
     *                      or false if an error occurs during the decoding.</p>
1201
     */
1202 2
    public static function decode_mimeheader($str, string $encoding = 'UTF-8')
1203
    {
1204 2
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
1205 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
1206
        }
1207
1208
        // always fallback via symfony polyfill
1209 2
        return \iconv_mime_decode($str, \ICONV_MIME_DECODE_CONTINUE_ON_ERROR, $encoding);
1210
    }
1211
1212
    /**
1213
     * Convert any two-letter country code (ISO 3166-1) to the corresponding Emoji.
1214
     *
1215
     * @see https://en.wikipedia.org/wiki/ISO_3166-1
1216
     *
1217
     * @param string $country_code_iso_3166_1 <p>e.g. DE</p>
1218
     *
1219
     * @return string
1220
     *                <p>Emoji or empty string on error.</p>
1221
     */
1222 1
    public static function emoji_from_country_code(string $country_code_iso_3166_1): string
1223
    {
1224 1
        if ($country_code_iso_3166_1 === '') {
1225 1
            return '';
1226
        }
1227
1228 1
        if (self::strlen($country_code_iso_3166_1) !== 2) {
1229 1
            return '';
1230
        }
1231
1232 1
        $country_code_iso_3166_1 = \strtoupper($country_code_iso_3166_1);
1233
1234 1
        $flagOffset = 0x1F1E6;
1235 1
        $asciiOffset = 0x41;
1236
1237 1
        return (self::chr((self::ord($country_code_iso_3166_1[0]) - $asciiOffset + $flagOffset)) ?? '') .
1238 1
               (self::chr((self::ord($country_code_iso_3166_1[1]) - $asciiOffset + $flagOffset)) ?? '');
1239
    }
1240
1241
    /**
1242
     * Decodes a string which was encoded by "UTF8::emoji_encode()".
1243
     *
1244
     * INFO: opposite to UTF8::emoji_encode()
1245
     *
1246
     * EXAMPLE: <code>
1247
     * UTF8::emoji_decode('foo CHARACTER_OGRE', false); // 'foo 👹'
1248
     * //
1249
     * UTF8::emoji_decode('foo _-_PORTABLE_UTF8_-_308095726_-_627590803_-_8FTU_ELBATROP_-_', true); // 'foo 👹'
1250
     * </code>
1251
     *
1252
     * @param string $str                            <p>The input string.</p>
1253
     * @param bool   $use_reversible_string_mappings [optional] <p>
1254
     *                                               When <b>TRUE</b>, we se a reversible string mapping
1255
     *                                               between "emoji_encode" and "emoji_decode".</p>
1256
     *
1257
     * @psalm-pure
1258
     *
1259
     * @return string
1260
     */
1261 9
    public static function emoji_decode(
1262
        string $str,
1263
        bool $use_reversible_string_mappings = false
1264
    ): string {
1265 9
        if (self::$EMOJI_KEYS_CACHE === null) {
1266
            /** @phpstan-ignore-next-line - we need to load the data first */
1267
            self::initEmojiData();
1268
        }
1269
1270 9
        if ($use_reversible_string_mappings) {
1271 9
            return (string) \str_replace(
1272 9
                (array) self::$EMOJI_KEYS_REVERSIBLE_CACHE,
1273 9
                (array) self::$EMOJI_VALUES_CACHE,
1274 9
                $str
1275
            );
1276
        }
1277
1278 1
        return (string) \str_replace(
1279 1
            (array) self::$EMOJI_KEYS_CACHE,
1280 1
            (array) self::$EMOJI_VALUES_CACHE,
1281 1
            $str
1282
        );
1283
    }
1284
1285
    /**
1286
     * Encode a string with emoji chars into a non-emoji string.
1287
     *
1288
     * INFO: opposite to UTF8::emoji_decode()
1289
     *
1290
     * EXAMPLE: <code>
1291
     * UTF8::emoji_encode('foo 👹', false)); // 'foo CHARACTER_OGRE'
1292
     * //
1293
     * UTF8::emoji_encode('foo 👹', true)); // 'foo _-_PORTABLE_UTF8_-_308095726_-_627590803_-_8FTU_ELBATROP_-_'
1294
     * </code>
1295
     *
1296
     * @param string $str                            <p>The input string</p>
1297
     * @param bool   $use_reversible_string_mappings [optional] <p>
1298
     *                                               when <b>TRUE</b>, we use a reversible string mapping
1299
     *                                               between "emoji_encode" and "emoji_decode"</p>
1300
     *
1301
     * @psalm-pure
1302
     *
1303
     * @return string
1304
     */
1305 12
    public static function emoji_encode(
1306
        string $str,
1307
        bool $use_reversible_string_mappings = false
1308
    ): string {
1309 12
        if (self::$EMOJI_KEYS_CACHE === null) {
1310
            /** @phpstan-ignore-next-line - we need to load the data first */
1311 1
            self::initEmojiData();
1312
        }
1313
1314 12
        if ($use_reversible_string_mappings) {
1315 9
            return (string) \str_replace(
1316 9
                (array) self::$EMOJI_VALUES_CACHE,
1317 9
                (array) self::$EMOJI_KEYS_REVERSIBLE_CACHE,
1318 9
                $str
1319
            );
1320
        }
1321
1322 4
        return (string) \str_replace(
1323 4
            (array) self::$EMOJI_VALUES_CACHE,
1324 4
            (array) self::$EMOJI_KEYS_CACHE,
1325 4
            $str
1326
        );
1327
    }
1328
1329
    /**
1330
     * Encode a string with a new charset-encoding.
1331
     *
1332
     * INFO:  This function will also try to fix broken / double encoding,
1333
     *        so you can call this function also on a UTF-8 string and you don't mess up the string.
1334
     *
1335
     * EXAMPLE: <code>
1336
     * UTF8::encode('ISO-8859-1', '-ABC-中文空白-'); // '-ABC-????-'
1337
     * //
1338
     * UTF8::encode('UTF-8', '-ABC-中文空白-'); // '-ABC-中文空白-'
1339
     * //
1340
     * UTF8::encode('HTML', '-ABC-中文空白-'); // '-ABC-&#20013;&#25991;&#31354;&#30333;-'
1341
     * //
1342
     * UTF8::encode('BASE64', '-ABC-中文空白-'); // 'LUFCQy3kuK3mlofnqbrnmb0t'
1343
     * </code>
1344
     *
1345
     * @param string $to_encoding                   <p>e.g. 'UTF-16', 'UTF-8', 'ISO-8859-1', etc.</p>
1346
     * @param string $str                           <p>The input string</p>
1347
     * @param bool   $auto_detect_the_from_encoding [optional] <p>Force the new encoding (we try to fix broken / double
1348
     *                                              encoding for UTF-8)<br> otherwise we auto-detect the current
1349
     *                                              string-encoding</p>
1350
     * @param string $from_encoding                 [optional] <p>e.g. 'UTF-16', 'UTF-8', 'ISO-8859-1', etc.<br>
1351
     *                                              A empty string will trigger the autodetect anyway.</p>
1352
     *
1353
     * @psalm-pure
1354
     *
1355
     * @return string
1356
     *
1357
     * @psalm-suppress InvalidReturnStatement
1358
     */
1359 28
    public static function encode(
1360
        string $to_encoding,
1361
        string $str,
1362
        bool $auto_detect_the_from_encoding = true,
1363
        string $from_encoding = ''
1364
    ): string {
1365 28
        if ($str === '' || $to_encoding === '') {
1366 13
            return $str;
1367
        }
1368
1369 28
        if ($to_encoding !== 'UTF-8' && $to_encoding !== 'CP850') {
1370 7
            $to_encoding = self::normalize_encoding($to_encoding, 'UTF-8');
1371
        }
1372
1373 28
        if ($from_encoding && $from_encoding !== 'UTF-8' && $from_encoding !== 'CP850') {
1374 2
            $from_encoding = self::normalize_encoding($from_encoding);
1375
        }
1376
1377
        if (
1378 28
            $to_encoding
1379
            &&
1380 28
            $from_encoding
1381
            &&
1382 28
            $from_encoding === $to_encoding
1383
        ) {
1384
            return $str;
1385
        }
1386
1387 28
        if ($to_encoding === 'JSON') {
1388 1
            $return = self::json_encode($str);
1389 1
            if ($return === false) {
1390
                throw new \InvalidArgumentException('The input string [' . $str . '] can not be used for json_encode().');
1391
            }
1392
1393 1
            return $return;
1394
        }
1395 28
        if ($from_encoding === 'JSON') {
1396 1
            $str = self::json_decode($str);
1397 1
            $from_encoding = '';
1398
        }
1399
1400 28
        if ($to_encoding === 'BASE64') {
1401 2
            return \base64_encode($str);
1402
        }
1403 28
        if ($from_encoding === 'BASE64') {
1404 2
            $str = \base64_decode($str, true);
1405 2
            $from_encoding = '';
1406
        }
1407
1408 28
        if ($to_encoding === 'HTML-ENTITIES') {
1409 2
            return self::html_encode($str, true);
1410
        }
1411 28
        if ($from_encoding === 'HTML-ENTITIES') {
1412 2
            $str = self::html_entity_decode($str, \ENT_COMPAT);
1413 2
            $from_encoding = '';
1414
        }
1415
1416 28
        $from_encoding_auto_detected = false;
1417
        if (
1418 28
            $auto_detect_the_from_encoding
1419
            ||
1420 28
            !$from_encoding
1421
        ) {
1422 28
            $from_encoding_auto_detected = self::str_detect_encoding($str);
1423
        }
1424
1425
        // DEBUG
1426
        //var_dump($to_encoding, $from_encoding, $from_encoding_auto_detected, $str, "\n\n");
1427
1428 28
        if ($from_encoding_auto_detected !== false) {
1429 25
            $from_encoding = $from_encoding_auto_detected;
1430 6
        } elseif ($auto_detect_the_from_encoding) {
1431
            // fallback for the "autodetect"-mode
1432 6
            return self::to_utf8($str);
1433
        }
1434
1435
        if (
1436 25
            !$from_encoding
1437
            ||
1438 25
            $from_encoding === $to_encoding
1439
        ) {
1440 15
            return $str;
1441
        }
1442
1443
        if (
1444 20
            $to_encoding === 'UTF-8'
1445
            &&
1446
            (
1447 18
                $from_encoding === 'WINDOWS-1252'
1448
                ||
1449 20
                $from_encoding === 'ISO-8859-1'
1450
            )
1451
        ) {
1452 14
            return self::to_utf8($str);
1453
        }
1454
1455
        if (
1456 12
            $to_encoding === 'ISO-8859-1'
1457
            &&
1458
            (
1459 6
                $from_encoding === 'WINDOWS-1252'
1460
                ||
1461 12
                $from_encoding === 'UTF-8'
1462
            )
1463
        ) {
1464 6
            return self::to_iso8859($str);
1465
        }
1466
1467
        if (
1468 10
            $to_encoding !== 'UTF-8'
1469
            &&
1470 10
            $to_encoding !== 'ISO-8859-1'
1471
            &&
1472 10
            $to_encoding !== 'WINDOWS-1252'
1473
            &&
1474 10
            self::$SUPPORT['mbstring'] === false
1475
        ) {
1476
            /**
1477
             * @psalm-suppress ImpureFunctionCall - is is only a warning
1478
             */
1479
            \trigger_error('UTF8::encode() without mbstring cannot handle "' . $to_encoding . '" encoding', \E_USER_WARNING);
1480
        }
1481
1482 10
        if (self::$SUPPORT['mbstring'] === true) {
1483 10
            $str_encoded = \mb_convert_encoding(
1484 10
                $str,
1485 10
                $to_encoding,
1486 10
                $from_encoding
1487
            );
1488
1489 10
            if ($str_encoded) {
1490
                \assert(\is_string($str_encoded));
1491
1492 10
                return $str_encoded;
1493
            }
1494
        }
1495
1496
        /** @noinspection PhpUsageOfSilenceOperatorInspection - Detected an incomplete multibyte character in input string */
1497
        $return = @\iconv($from_encoding, $to_encoding, $str);
1498
        if ($return !== false) {
1499
            return $return;
1500
        }
1501
1502
        return $str;
1503
    }
1504
1505
    /**
1506
     * @param string $str
1507
     * @param string $from_charset      [optional] <p>Set the input charset.</p>
1508
     * @param string $to_charset        [optional] <p>Set the output charset.</p>
1509
     * @param string $transfer_encoding [optional] <p>Set the transfer encoding.</p>
1510
     * @param string $linefeed          [optional] <p>Set the used linefeed.</p>
1511
     * @param int    $indent            [optional] <p>Set the max length indent.</p>
1512
     *
1513
     * @psalm-pure
1514
     *
1515
     * @return false|string
1516
     *                      <p>An encoded MIME field on success,
1517
     *                      or false if an error occurs during the encoding.</p>
1518
     */
1519 1
    public static function encode_mimeheader(
1520
        string $str,
1521
        string $from_charset = 'UTF-8',
1522
        string $to_charset = 'UTF-8',
1523
        string $transfer_encoding = 'Q',
1524
        string $linefeed = "\r\n",
1525
        int $indent = 76
1526
    ) {
1527 1
        if ($from_charset !== 'UTF-8' && $from_charset !== 'CP850') {
1528
            $from_charset = self::normalize_encoding($from_charset, 'UTF-8');
1529
        }
1530
1531 1
        if ($to_charset !== 'UTF-8' && $to_charset !== 'CP850') {
1532 1
            $to_charset = self::normalize_encoding($to_charset, 'UTF-8');
1533
        }
1534
1535
        // always fallback via symfony polyfill
1536 1
        return \iconv_mime_encode(
1537 1
            '',
1538 1
            $str,
1539
            [
1540 1
                'scheme'           => $transfer_encoding,
1541 1
                'line-length'      => $indent,
1542 1
                'input-charset'    => $from_charset,
1543 1
                'output-charset'   => $to_charset,
1544 1
                'line-break-chars' => $linefeed,
1545
            ]
1546
        );
1547
    }
1548
1549
    /**
1550
     * Create an extract from a sentence, so if the search-string was found, it try to centered in the output.
1551
     *
1552
     * @param string   $str                       <p>The input string.</p>
1553
     * @param string   $search                    <p>The searched string.</p>
1554
     * @param int|null $length                    [optional] <p>Default: null === text->length / 2</p>
1555
     * @param string   $replacer_for_skipped_text [optional] <p>Default: …</p>
1556
     * @param string   $encoding                  [optional] <p>Set the charset for e.g. "mb_" function</p>
1557
     *
1558
     * @psalm-pure
1559
     *
1560
     * @return string
1561
     */
1562 1
    public static function extract_text(
1563
        string $str,
1564
        string $search = '',
1565
        int $length = null,
1566
        string $replacer_for_skipped_text = '…',
1567
        string $encoding = 'UTF-8'
1568
    ): string {
1569 1
        if ($str === '') {
1570 1
            return '';
1571
        }
1572
1573 1
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
1574
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
1575
        }
1576
1577 1
        $trim_chars = "\t\r\n -_()!~?=+/*\\,.:;\"'[]{}`&";
1578
1579 1
        if ($length === null) {
1580 1
            $length = (int) \round((int) self::strlen($str, $encoding) / 2);
1581
        }
1582
1583 1
        if ($search === '') {
1584 1
            if ($encoding === 'UTF-8') {
1585 1
                if ($length > 0) {
1586 1
                    $string_length = (int) \mb_strlen($str);
1587 1
                    $end = ($length - 1) > $string_length ? $string_length : ($length - 1);
1588
                } else {
1589 1
                    $end = 0;
1590
                }
1591
1592 1
                $pos = (int) \min(
1593 1
                    \mb_strpos($str, ' ', $end),
1594 1
                    \mb_strpos($str, '.', $end)
1595
                );
1596
            } else {
1597
                if ($length > 0) {
1598
                    $string_length = (int) self::strlen($str, $encoding);
1599
                    $end = ($length - 1) > $string_length ? $string_length : ($length - 1);
1600
                } else {
1601
                    $end = 0;
1602
                }
1603
1604
                $pos = (int) \min(
1605
                    self::strpos($str, ' ', $end, $encoding),
1606
                    self::strpos($str, '.', $end, $encoding)
1607
                );
1608
            }
1609
1610 1
            if ($pos) {
1611 1
                if ($encoding === 'UTF-8') {
1612 1
                    $str_sub = \mb_substr($str, 0, $pos);
1613
                } else {
1614
                    $str_sub = self::substr($str, 0, $pos, $encoding);
1615
                }
1616
1617 1
                if ($str_sub === false) {
1618
                    return '';
1619
                }
1620
1621 1
                return \rtrim($str_sub, $trim_chars) . $replacer_for_skipped_text;
1622
            }
1623
1624
            return $str;
1625
        }
1626
1627 1
        if ($encoding === 'UTF-8') {
1628 1
            $word_position = (int) \mb_stripos($str, $search);
1629 1
            $half_side = (int) ($word_position - $length / 2 + (int) \mb_strlen($search) / 2);
1630
        } else {
1631
            $word_position = (int) self::stripos($str, $search, 0, $encoding);
1632
            $half_side = (int) ($word_position - $length / 2 + (int) self::strlen($search, $encoding) / 2);
1633
        }
1634
1635 1
        $pos_start = 0;
1636 1
        if ($half_side > 0) {
1637 1
            if ($encoding === 'UTF-8') {
1638 1
                $half_text = \mb_substr($str, 0, $half_side);
1639
            } else {
1640
                $half_text = self::substr($str, 0, $half_side, $encoding);
1641
            }
1642 1
            if ($half_text !== false) {
1643 1
                if ($encoding === 'UTF-8') {
1644 1
                    $pos_start = (int) \max(
1645 1
                        \mb_strrpos($half_text, ' '),
1646 1
                        \mb_strrpos($half_text, '.')
1647
                    );
1648
                } else {
1649
                    $pos_start = (int) \max(
1650
                        self::strrpos($half_text, ' ', 0, $encoding),
1651
                        self::strrpos($half_text, '.', 0, $encoding)
1652
                    );
1653
                }
1654
            }
1655
        }
1656
1657 1
        if ($word_position && $half_side > 0) {
1658 1
            $offset = $pos_start + $length - 1;
1659 1
            $real_length = (int) self::strlen($str, $encoding);
1660
1661 1
            if ($offset > $real_length) {
1662
                $offset = $real_length;
1663
            }
1664
1665 1
            if ($encoding === 'UTF-8') {
1666 1
                $pos_end = (int) \min(
1667 1
                    \mb_strpos($str, ' ', $offset),
1668 1
                    \mb_strpos($str, '.', $offset)
1669 1
                ) - $pos_start;
1670
            } else {
1671
                $pos_end = (int) \min(
1672
                    self::strpos($str, ' ', $offset, $encoding),
1673
                    self::strpos($str, '.', $offset, $encoding)
1674
                ) - $pos_start;
1675
            }
1676
1677 1
            if (!$pos_end || $pos_end <= 0) {
1678 1
                if ($encoding === 'UTF-8') {
1679 1
                    $str_sub = \mb_substr($str, $pos_start, (int) \mb_strlen($str));
1680
                } else {
1681
                    $str_sub = self::substr($str, $pos_start, (int) self::strlen($str, $encoding), $encoding);
1682
                }
1683 1
                if ($str_sub !== false) {
1684 1
                    $extract = $replacer_for_skipped_text . \ltrim($str_sub, $trim_chars);
1685
                } else {
1686 1
                    $extract = '';
1687
                }
1688
            } else {
1689 1
                if ($encoding === 'UTF-8') {
1690 1
                    $str_sub = \mb_substr($str, $pos_start, $pos_end);
1691
                } else {
1692
                    $str_sub = self::substr($str, $pos_start, $pos_end, $encoding);
1693
                }
1694 1
                if ($str_sub !== false) {
1695 1
                    $extract = $replacer_for_skipped_text . \trim($str_sub, $trim_chars) . $replacer_for_skipped_text;
1696
                } else {
1697 1
                    $extract = '';
1698
                }
1699
            }
1700
        } else {
1701 1
            $offset = $length - 1;
1702 1
            $true_length = (int) self::strlen($str, $encoding);
1703
1704 1
            if ($offset > $true_length) {
1705
                $offset = $true_length;
1706
            }
1707
1708 1
            if ($encoding === 'UTF-8') {
1709 1
                $pos_end = (int) \min(
1710 1
                    \mb_strpos($str, ' ', $offset),
1711 1
                    \mb_strpos($str, '.', $offset)
1712
                );
1713
            } else {
1714
                $pos_end = (int) \min(
1715
                    self::strpos($str, ' ', $offset, $encoding),
1716
                    self::strpos($str, '.', $offset, $encoding)
1717
                );
1718
            }
1719
1720 1
            if ($pos_end) {
1721 1
                if ($encoding === 'UTF-8') {
1722 1
                    $str_sub = \mb_substr($str, 0, $pos_end);
1723
                } else {
1724
                    $str_sub = self::substr($str, 0, $pos_end, $encoding);
1725
                }
1726 1
                if ($str_sub !== false) {
1727 1
                    $extract = \rtrim($str_sub, $trim_chars) . $replacer_for_skipped_text;
1728
                } else {
1729 1
                    $extract = '';
1730
                }
1731
            } else {
1732 1
                $extract = $str;
1733
            }
1734
        }
1735
1736 1
        return $extract;
1737
    }
1738
1739
    /**
1740
     * Reads entire file into a string.
1741
     *
1742
     * EXAMPLE: <code>UTF8::file_get_contents('utf16le.txt'); // ...</code>
1743
     *
1744
     * WARNING: Do not use UTF-8 Option ($convert_to_utf8) for binary files (e.g.: images) !!!
1745
     *
1746
     * @see http://php.net/manual/en/function.file-get-contents.php
1747
     *
1748
     * @param string        $filename         <p>
1749
     *                                        Name of the file to read.
1750
     *                                        </p>
1751
     * @param bool          $use_include_path [optional] <p>
1752
     *                                        Prior to PHP 5, this parameter is called
1753
     *                                        use_include_path and is a bool.
1754
     *                                        As of PHP 5 the FILE_USE_INCLUDE_PATH can be used
1755
     *                                        to trigger include path
1756
     *                                        search.
1757
     *                                        </p>
1758
     * @param resource|null $context          [optional] <p>
1759
     *                                        A valid context resource created with
1760
     *                                        stream_context_create. If you don't need to use a
1761
     *                                        custom context, you can skip this parameter by &null;.
1762
     *                                        </p>
1763
     * @param int|null      $offset           [optional] <p>
1764
     *                                        The offset where the reading starts.
1765
     *                                        </p>
1766
     * @param int|null      $max_length       [optional] <p>
1767
     *                                        Maximum length of data read. The default is to read until end
1768
     *                                        of file is reached.
1769
     *                                        </p>
1770
     * @param int           $timeout          <p>The time in seconds for the timeout.</p>
1771
     * @param bool          $convert_to_utf8  <strong>WARNING!!!</strong> <p>Maybe you can't use this option for
1772
     *                                        some files, because they used non default utf-8 chars. Binary files
1773
     *                                        like images or pdf will not be converted.</p>
1774
     * @param string        $from_encoding    [optional] <p>e.g. 'UTF-16', 'UTF-8', 'ISO-8859-1', etc.<br>
1775
     *                                        A empty string will trigger the autodetect anyway.</p>
1776
     *
1777
     * @psalm-pure
1778
     *
1779
     * @return false|string
1780
     *                      <p>The function returns the read data as string or <b>false</b> on failure.</p>
1781
     */
1782 12
    public static function file_get_contents(
1783
        string $filename,
1784
        bool $use_include_path = false,
1785
        $context = null,
1786
        int $offset = null,
1787
        int $max_length = null,
1788
        int $timeout = 10,
1789
        bool $convert_to_utf8 = true,
1790
        string $from_encoding = ''
1791
    ) {
1792
        // init
1793
        /** @noinspection CallableParameterUseCaseInTypeContextInspection - is ok here */
1794 12
        $filename = Bootup::filter_sanitize_string_polyfill($filename);
1795 12
        if ($filename === false) {
1796
            return false;
1797
        }
1798
1799 12
        if ($timeout && $context === null) {
1800 9
            $context = \stream_context_create(
1801
                [
1802
                    'http' => [
1803 9
                        'timeout' => $timeout,
1804
                    ],
1805
                ]
1806
            );
1807
        }
1808
1809 12
        if ($offset === null) {
1810 12
            $offset = 0;
1811
        }
1812
1813 12
        if (\is_int($max_length)) {
1814 2
            $data = \file_get_contents($filename, $use_include_path, $context, $offset, $max_length);
1815
        } else {
1816 12
            $data = \file_get_contents($filename, $use_include_path, $context, $offset);
1817
        }
1818
1819
        // return false on error
1820 12
        if ($data === false) {
1821
            return false;
1822
        }
1823
1824 12
        if ($convert_to_utf8) {
1825
            if (
1826 12
                !self::is_binary($data, true)
1827
                ||
1828 9
                self::is_utf16($data, false) !== false
1829
                ||
1830 12
                self::is_utf32($data, false) !== false
1831
            ) {
1832 9
                $data = self::encode('UTF-8', $data, false, $from_encoding);
1833 9
                $data = self::cleanup($data);
1834
            }
1835
        }
1836
1837 12
        return $data;
1838
    }
1839
1840
    /**
1841
     * Checks if a file starts with BOM (Byte Order Mark) character.
1842
     *
1843
     * EXAMPLE: <code>UTF8::file_has_bom('utf8_with_bom.txt'); // true</code>
1844
     *
1845
     * @param string $file_path <p>Path to a valid file.</p>
1846
     *
1847
     * @throws \RuntimeException if file_get_contents() returned false
1848
     *
1849
     * @return bool
1850
     *              <p><strong>true</strong> if the file has BOM at the start, <strong>false</strong> otherwise</p>
1851
     *
1852
     * @psalm-pure
1853
     */
1854 2
    public static function file_has_bom(string $file_path): bool
1855
    {
1856 2
        $file_content = \file_get_contents($file_path);
1857 2
        if ($file_content === false) {
1858
            throw new \RuntimeException('file_get_contents() returned false for:' . $file_path);
1859
        }
1860
1861 2
        return self::string_has_bom($file_content);
1862
    }
1863
1864
    /**
1865
     * Normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1866
     *
1867
     * EXAMPLE: <code>UTF8::filter(array("\xE9", 'à', 'a')); // array('é', 'à', 'a')</code>
1868
     *
1869
     * @param array|object|string $var
1870
     * @param int                 $normalization_form
1871
     * @param string              $leading_combining
1872
     *
1873
     * @psalm-pure
1874
     *
1875
     * @return mixed
1876
     *
1877
     * @template TFilter
1878
     * @phpstan-param TFilter $var
1879
     * @phpstan-return TFilter
1880
     */
1881 64
    public static function filter(
1882
        $var,
1883
        int $normalization_form = \Normalizer::NFC,
1884
        string $leading_combining = '◌'
1885
    ) {
1886 64
        switch (\gettype($var)) {
1887 64
            case 'object':
1888 64
            case 'array':
1889 6
                foreach ($var as &$v) {
1890 6
                    $v = self::filter($v, $normalization_form, $leading_combining);
1891
                }
1892 6
                unset($v);
1893
1894 6
                break;
1895 64
            case 'string':
1896
1897 62
                if (\strpos($var, "\r") !== false) {
1898 2
                    $var = self::normalize_line_ending($var);
1899
                }
1900
1901 62
                if (!ASCII::is_ascii($var)) {
1902 32
                    if (\Normalizer::isNormalized($var, $normalization_form)) {
1903 26
                        $n = '-';
1904
                    } else {
1905 12
                        $n = \Normalizer::normalize($var, $normalization_form);
1906
1907 12
                        if ($n && isset($n[0])) {
1908 6
                            $var = $n;
1909
                        } else {
1910 8
                            $var = self::encode('UTF-8', $var);
1911
                        }
1912
                    }
1913
1914
                    \assert(\is_string($var));
1915
                    if (
1916 32
                        $n
1917
                        &&
1918 32
                        $var[0] >= "\x80"
1919
                        &&
1920 32
                        isset($n[0], $leading_combining[0])
1921
                        &&
1922 32
                        \preg_match('/^\\p{Mn}/u', $var)
1923
                    ) {
1924
                        // Prevent leading combining chars
1925
                        // for NFC-safe concatenations.
1926 2
                        $var = $leading_combining . $var;
1927
                    }
1928
                }
1929
1930 62
                break;
1931
            default:
1932
                // nothing
1933
        }
1934
1935
        /** @noinspection PhpSillyAssignmentInspection */
1936
        /** @phpstan-var TFilter $var */
1937 64
        $var = $var;
1938
1939 64
        return $var;
1940
    }
1941
1942
    /**
1943
     * "filter_input()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1944
     *
1945
     * Gets a specific external variable by name and optionally filters it.
1946
     *
1947
     * EXAMPLE: <code>
1948
     * // _GET['foo'] = 'bar';
1949
     * UTF8::filter_input(INPUT_GET, 'foo', FILTER_UNSAFE_RAW)); // 'bar'
1950
     * </code>
1951
     *
1952
     * @see http://php.net/manual/en/function.filter-input.php
1953
     *
1954
     * @param int            $type          <p>
1955
     *                                      One of <b>INPUT_GET</b>, <b>INPUT_POST</b>,
1956
     *                                      <b>INPUT_COOKIE</b>, <b>INPUT_SERVER</b>, or
1957
     *                                      <b>INPUT_ENV</b>.
1958
     *                                      </p>
1959
     * @param string         $variable_name <p>
1960
     *                                      Name of a variable to get.
1961
     *                                      </p>
1962
     * @param int            $filter        [optional] <p>
1963
     *                                      The ID of the filter to apply. The
1964
     *                                      manual page lists the available filters.
1965
     *                                      </p>
1966
     * @param int|int[]|null $options       [optional] <p>
1967
     *                                      Associative array of options or bitwise disjunction of flags. If filter
1968
     *                                      accepts options, flags can be provided in "flags" field of array.
1969
     *                                      </p>
1970
     *
1971
     * @psalm-pure
1972
     *
1973
     * @return mixed
1974
     *               <p>
1975
     *               Value of the requested variable on success, <b>FALSE</b> if the filter fails, or <b>NULL</b> if the
1976
     *               <i>variable_name</i> variable is not set. If the flag <b>FILTER_NULL_ON_FAILURE</b> is used, it
1977
     *               returns <b>FALSE</b> if the variable is not set and <b>NULL</b> if the filter fails.
1978
     *               </p>
1979
     */
1980 1
    public static function filter_input(
1981
        int $type,
1982
        string $variable_name,
1983
        int $filter = \FILTER_DEFAULT,
1984
        $options = null
1985
    ) {
1986
        /**
1987
         * @psalm-suppress ImpureFunctionCall - we use func_num_args only for args count matching here
1988
         */
1989 1
        if ($options === null || \func_num_args() < 4) {
1990 1
            $var = \filter_input($type, $variable_name, $filter);
1991
        } else {
1992
            $var = \filter_input($type, $variable_name, $filter, $options);
1993
        }
1994
1995 1
        return self::filter($var);
1996
    }
1997
1998
    /**
1999
     * "filter_input_array()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
2000
     *
2001
     * Gets external variables and optionally filters them.
2002
     *
2003
     * EXAMPLE: <code>
2004
     * // _GET['foo'] = 'bar';
2005
     * UTF8::filter_input_array(INPUT_GET, array('foo' => 'FILTER_UNSAFE_RAW')); // array('bar')
2006
     * </code>
2007
     *
2008
     * @see http://php.net/manual/en/function.filter-input-array.php
2009
     *
2010
     * @param int        $type       <p>
2011
     *                               One of <b>INPUT_GET</b>, <b>INPUT_POST</b>,
2012
     *                               <b>INPUT_COOKIE</b>, <b>INPUT_SERVER</b>, or
2013
     *                               <b>INPUT_ENV</b>.
2014
     *                               </p>
2015
     * @param array|null $definition [optional] <p>
2016
     *                               An array defining the arguments. A valid key is a string
2017
     *                               containing a variable name and a valid value is either a filter type, or an array
2018
     *                               optionally specifying the filter, flags and options. If the value is an
2019
     *                               array, valid keys are filter which specifies the
2020
     *                               filter type,
2021
     *                               flags which specifies any flags that apply to the
2022
     *                               filter, and options which specifies any options that
2023
     *                               apply to the filter. See the example below for a better understanding.
2024
     *                               </p>
2025
     *                               <p>
2026
     *                               This parameter can be also an integer holding a filter constant. Then all values in the
2027
     *                               input array are filtered by this filter.
2028
     *                               </p>
2029
     * @param bool       $add_empty  [optional] <p>
2030
     *                               Add missing keys as <b>NULL</b> to the return value.
2031
     *                               </p>
2032
     *
2033
     * @psalm-pure
2034
     *
2035
     * @return mixed
2036
     *               <p>
2037
     *               An array containing the values of the requested variables on success, or <b>FALSE</b> on failure.
2038
     *               An array value will be <b>FALSE</b> if the filter fails, or <b>NULL</b> if the variable is not
2039
     *               set. Or if the flag <b>FILTER_NULL_ON_FAILURE</b> is used, it returns <b>FALSE</b> if the variable
2040
     *               is not set and <b>NULL</b> if the filter fails.
2041
     *               </p>
2042
     */
2043 1
    public static function filter_input_array(
2044
        int $type,
2045
        $definition = null,
2046
        bool $add_empty = true
2047
    ) {
2048
        /**
2049
         * @psalm-suppress ImpureFunctionCall - we use func_num_args only for args count matching here
2050
         */
2051 1
        if ($definition === null || \func_num_args() < 2) {
2052
            $a = \filter_input_array($type);
2053
        } else {
2054 1
            $a = \filter_input_array($type, $definition, $add_empty);
2055
        }
2056
2057 1
        return self::filter($a);
2058
    }
2059
2060
    /**
2061
     * "filter_var()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
2062
     *
2063
     * Filters a variable with a specified filter.
2064
     *
2065
     * EXAMPLE: <code>UTF8::filter_var('-ABC-中文空白-', FILTER_VALIDATE_URL); // false</code>
2066
     *
2067
     * @see http://php.net/manual/en/function.filter-var.php
2068
     *
2069
     * @param float|int|string|null $variable <p>
2070
     *                                        Value to filter.
2071
     *                                        </p>
2072
     * @param int                   $filter   [optional] <p>
2073
     *                                        The ID of the filter to apply. The
2074
     *                                        manual page lists the available filters.
2075
     *                                        </p>
2076
     * @param int|int[]|null        $options  [optional] <p>
2077
     *                                        Associative array of options or bitwise disjunction of flags. If filter
2078
     *                                        accepts options, flags can be provided in "flags" field of array. For
2079
     *                                        the "callback" filter, callable type should be passed. The
2080
     *                                        callback must accept one argument, the value to be filtered, and return
2081
     *                                        the value after filtering/sanitizing it.
2082
     *                                        </p>
2083
     *                                        <p>
2084
     *                                        <code>
2085
     *                                        // for filters that accept options, use this format
2086
     *                                        $options = array(
2087
     *                                        'options' => array(
2088
     *                                        'default' => 3, // value to return if the filter fails
2089
     *                                        // other options here
2090
     *                                        'min_range' => 0
2091
     *                                        ),
2092
     *                                        'flags' => FILTER_FLAG_ALLOW_OCTAL,
2093
     *                                        );
2094
     *                                        $var = filter_var('0755', FILTER_VALIDATE_INT, $options);
2095
     *                                        // for filter that only accept flags, you can pass them directly
2096
     *                                        $var = filter_var('oops', FILTER_VALIDATE_BOOLEAN, FILTER_NULL_ON_FAILURE);
2097
     *                                        // for filter that only accept flags, you can also pass as an array
2098
     *                                        $var = filter_var('oops', FILTER_VALIDATE_BOOLEAN,
2099
     *                                        array('flags' => FILTER_NULL_ON_FAILURE));
2100
     *                                        // callback validate filter
2101
     *                                        function foo($value)
2102
     *                                        {
2103
     *                                        // Expected format: Surname, GivenNames
2104
     *                                        if (strpos($value, ", ") === false) return false;
2105
     *                                        list($surname, $givennames) = explode(", ", $value, 2);
2106
     *                                        $empty = (empty($surname) || empty($givennames));
2107
     *                                        $notstrings = (!is_string($surname) || !is_string($givennames));
2108
     *                                        if ($empty || $notstrings) {
2109
     *                                        return false;
2110
     *                                        } else {
2111
     *                                        return $value;
2112
     *                                        }
2113
     *                                        }
2114
     *                                        $var = filter_var('Doe, Jane Sue', FILTER_CALLBACK, array('options' => 'foo'));
2115
     *                                        </code>
2116
     *                                        </p>
2117
     *
2118
     * @psalm-pure
2119
     *
2120
     * @return mixed
2121
     *               <p>The filtered data, or <b>FALSE</b> if the filter fails.</p>
2122
     */
2123 2
    public static function filter_var(
2124
        $variable,
2125
        int $filter = \FILTER_DEFAULT,
2126
        $options = null
2127
    ) {
2128
        /**
2129
         * @psalm-suppress ImpureFunctionCall - we use func_num_args only for args count matching here
2130
         */
2131 2
        if (\func_num_args() < 3) {
2132 2
            $variable = \filter_var($variable, $filter);
2133
        } else {
2134 2
            $variable = \filter_var($variable, $filter, $options);
0 ignored issues
show
Bug introduced by
It seems like $options can also be of type null; however, parameter $options of filter_var() does only seem to accept array|integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

2134
            $variable = \filter_var($variable, $filter, /** @scrutinizer ignore-type */ $options);
Loading history...
2135
        }
2136
2137 2
        return self::filter($variable);
2138
    }
2139
2140
    /**
2141
     * "filter_var_array()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
2142
     *
2143
     * Gets multiple variables and optionally filters them.
2144
     *
2145
     * EXAMPLE: <code>
2146
     * $filters = [
2147
     *     'name'  => ['filter'  => FILTER_CALLBACK, 'options' => [UTF8::class, 'ucwords']],
2148
     *     'age'   => ['filter'  => FILTER_VALIDATE_INT, 'options' => ['min_range' => 1, 'max_range' => 120]],
2149
     *     'email' => FILTER_VALIDATE_EMAIL,
2150
     * ];
2151
     *
2152
     * $data = [
2153
     *     'name' => 'κόσμε',
2154
     *     'age' => '18',
2155
     *     'email' => '[email protected]'
2156
     * ];
2157
     *
2158
     * UTF8::filter_var_array($data, $filters, true); // ['name' => 'Κόσμε', 'age' => 18, 'email' => '[email protected]']
2159
     * </code>
2160
     *
2161
     * @see http://php.net/manual/en/function.filter-var-array.php
2162
     *
2163
     * @param array<mixed>   $data       <p>
2164
     *                                   An array with string keys containing the data to filter.
2165
     *                                   </p>
2166
     * @param array|int|null $definition [optional] <p>
2167
     *                                   An array defining the arguments. A valid key is a string
2168
     *                                   containing a variable name and a valid value is either a
2169
     *                                   filter type, or an
2170
     *                                   array optionally specifying the filter, flags and options.
2171
     *                                   If the value is an array, valid keys are filter
2172
     *                                   which specifies the filter type,
2173
     *                                   flags which specifies any flags that apply to the
2174
     *                                   filter, and options which specifies any options that
2175
     *                                   apply to the filter. See the example below for a better understanding.
2176
     *                                   </p>
2177
     *                                   <p>
2178
     *                                   This parameter can be also an integer holding a filter constant. Then all values
2179
     *                                   in the input array are filtered by this filter.
2180
     *                                   </p>
2181
     * @param bool           $add_empty  [optional] <p>
2182
     *                                   Add missing keys as <b>NULL</b> to the return value.
2183
     *                                   </p>
2184
     *
2185
     * @psalm-pure
2186
     *
2187
     * @return mixed
2188
     *               <p>
2189
     *               An array containing the values of the requested variables on success, or <b>FALSE</b> on failure.
2190
     *               An array value will be <b>FALSE</b> if the filter fails, or <b>NULL</b> if the variable is not
2191
     *               set.
2192
     *               </p>
2193
     */
2194 2
    public static function filter_var_array(
2195
        array $data,
2196
        $definition = null,
2197
        bool $add_empty = true
2198
    ) {
2199
        /**
2200
         * @psalm-suppress ImpureFunctionCall - we use func_num_args only for args count matching here
2201
         */
2202 2
        if (\func_num_args() < 2) {
2203 2
            $a = \filter_var_array($data);
2204
        } else {
2205 2
            $a = \filter_var_array($data, $definition, $add_empty);
0 ignored issues
show
Bug introduced by
It seems like $definition can also be of type null; however, parameter $options of filter_var_array() does only seem to accept array|integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

2205
            $a = \filter_var_array($data, /** @scrutinizer ignore-type */ $definition, $add_empty);
Loading history...
2206
        }
2207
2208 2
        return self::filter($a);
2209
    }
2210
2211
    /**
2212
     * Checks whether finfo is available on the server.
2213
     *
2214
     * @psalm-pure
2215
     *
2216
     * @return bool
2217
     *              <p><strong>true</strong> if available, <strong>false</strong> otherwise</p>
2218
     *
2219
     * @internal <p>Please do not use it anymore, we will make is private in next major version.</p>
2220
     */
2221
    public static function finfo_loaded(): bool
2222
    {
2223
        return \class_exists('finfo');
2224
    }
2225
2226
    /**
2227
     * Returns the first $n characters of the string.
2228
     *
2229
     * @param string $str      <p>The input string.</p>
2230
     * @param int    $n        <p>Number of characters to retrieve from the start.</p>
2231
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
2232
     *
2233
     * @psalm-pure
2234
     *
2235
     * @return string
2236
     */
2237 13
    public static function first_char(
2238
        string $str,
2239
        int $n = 1,
2240
        string $encoding = 'UTF-8'
2241
    ): string {
2242 13
        if ($str === '' || $n <= 0) {
2243 5
            return '';
2244
        }
2245
2246 8
        if ($encoding === 'UTF-8') {
2247 4
            return (string) \mb_substr($str, 0, $n);
2248
        }
2249
2250 4
        return (string) self::substr($str, 0, $n, $encoding);
2251
    }
2252
2253
    /**
2254
     * Check if the number of Unicode characters isn't greater than the specified integer.
2255
     *
2256
     * EXAMPLE: <code>UTF8::fits_inside('κόσμε', 6); // false</code>
2257
     *
2258
     * @param string $str      the original string to be checked
2259
     * @param int    $box_size the size in number of chars to be checked against string
2260
     *
2261
     * @psalm-pure
2262
     *
2263
     * @return bool
2264
     *              <p><strong>TRUE</strong> if string is less than or equal to $box_size, <strong>FALSE</strong> otherwise.</p>
2265
     */
2266 2
    public static function fits_inside(string $str, int $box_size): bool
2267
    {
2268 2
        return (int) self::strlen($str) <= $box_size;
2269
    }
2270
2271
    /**
2272
     * Try to fix simple broken UTF-8 strings.
2273
     *
2274
     * INFO: Take a look at "UTF8::fix_utf8()" if you need a more advanced fix for broken UTF-8 strings.
2275
     *
2276
     * EXAMPLE: <code>UTF8::fix_simple_utf8('Düsseldorf'); // 'Düsseldorf'</code>
2277
     *
2278
     * If you received an UTF-8 string that was converted from Windows-1252 as it was ISO-8859-1
2279
     * (ignoring Windows-1252 chars from 80 to 9F) use this function to fix it.
2280
     * See: http://en.wikipedia.org/wiki/Windows-1252
2281
     *
2282
     * @param string $str <p>The input string</p>
2283
     *
2284
     * @psalm-pure
2285
     *
2286
     * @return string
2287
     */
2288 46
    public static function fix_simple_utf8(string $str): string
2289
    {
2290 46
        if ($str === '') {
2291 4
            return '';
2292
        }
2293
2294
        /**
2295
         * @psalm-suppress ImpureStaticVariable
2296
         *
2297
         * @var array<mixed>|null
2298
         */
2299 46
        static $BROKEN_UTF8_TO_UTF8_KEYS_CACHE = null;
2300
2301
        /**
2302
         * @psalm-suppress ImpureStaticVariable
2303
         *
2304
         * @var array<mixed>|null
2305
         */
2306 46
        static $BROKEN_UTF8_TO_UTF8_VALUES_CACHE = null;
2307
2308 46
        if ($BROKEN_UTF8_TO_UTF8_KEYS_CACHE === null) {
2309 1
            if (self::$BROKEN_UTF8_FIX === null) {
2310 1
                self::$BROKEN_UTF8_FIX = self::getData('utf8_fix');
2311
            }
2312
2313 1
            $BROKEN_UTF8_TO_UTF8_KEYS_CACHE = \array_keys(self::$BROKEN_UTF8_FIX);
2314 1
            $BROKEN_UTF8_TO_UTF8_VALUES_CACHE = self::$BROKEN_UTF8_FIX;
2315
        }
2316
2317
        \assert(\is_array($BROKEN_UTF8_TO_UTF8_VALUES_CACHE));
2318
2319 46
        return \str_replace($BROKEN_UTF8_TO_UTF8_KEYS_CACHE, $BROKEN_UTF8_TO_UTF8_VALUES_CACHE, $str);
2320
    }
2321
2322
    /**
2323
     * Fix a double (or multiple) encoded UTF8 string.
2324
     *
2325
     * EXAMPLE: <code>UTF8::fix_utf8('Fédération'); // 'Fédération'</code>
2326
     *
2327
     * @param string|string[] $str you can use a string or an array of strings
2328
     *
2329
     * @psalm-pure
2330
     *
2331
     * @return string|string[]
2332
     *                         <p>Will return the fixed input-"array" or
2333
     *                         the fixed input-"string".</p>
2334
     *
2335
     * @template TFixUtf8
2336
     * @phpstan-param TFixUtf8 $str
2337
     * @phpstan-return TFixUtf8
2338
     */
2339 2
    public static function fix_utf8($str)
2340
    {
2341 2
        if (\is_array($str)) {
2342 2
            foreach ($str as &$v) {
2343 2
                $v = self::fix_utf8($v);
2344
            }
2345 2
            unset($v);
2346
2347
            /**
2348
             * @psalm-suppress InvalidReturnStatement
2349
             */
2350 2
            return $str;
2351
        }
2352
2353 2
        $str = (string) $str;
2354 2
        $last = '';
2355 2
        while ($last !== $str) {
2356 2
            $last = $str;
2357
            /**
2358
             * @psalm-suppress PossiblyInvalidArgument
2359
             */
2360 2
            $str = self::to_utf8(
2361 2
                self::utf8_decode($str, true)
2362
            );
2363
        }
2364
2365
        /**
2366
         * @psalm-suppress InvalidReturnStatement
2367
         */
2368 2
        return $str;
2369
    }
2370
2371
    /**
2372
     * Get character of a specific character.
2373
     *
2374
     * EXAMPLE: <code>UTF8::getCharDirection('ا'); // 'RTL'</code>
2375
     *
2376
     * @param string $char
2377
     *
2378
     * @psalm-pure
2379
     *
2380
     * @return string
2381
     *                <p>'RTL' or 'LTR'.</p>
2382
     */
2383 2
    public static function getCharDirection(string $char): string
2384
    {
2385 2
        if (self::$SUPPORT['intlChar'] === true) {
2386 2
            $tmp_return = \IntlChar::charDirection($char);
2387
2388
            // from "IntlChar"-Class
2389 2
            $char_direction = [
2390
                'RTL' => [1, 13, 14, 15, 21],
2391
                'LTR' => [0, 11, 12, 20],
2392
            ];
2393
2394 2
            if (\in_array($tmp_return, $char_direction['LTR'], true)) {
2395
                return 'LTR';
2396
            }
2397
2398 2
            if (\in_array($tmp_return, $char_direction['RTL'], true)) {
2399 2
                return 'RTL';
2400
            }
2401
        }
2402
2403 2
        $c = static::chr_to_decimal($char);
2404
2405 2
        if (!($c >= 0x5be && $c <= 0x10b7f)) {
2406 2
            return 'LTR';
2407
        }
2408
2409 2
        if ($c <= 0x85e) {
2410 2
            if ($c === 0x5be ||
2411 2
                $c === 0x5c0 ||
2412 2
                $c === 0x5c3 ||
2413 2
                $c === 0x5c6 ||
2414 2
                ($c >= 0x5d0 && $c <= 0x5ea) ||
2415 2
                ($c >= 0x5f0 && $c <= 0x5f4) ||
2416 2
                $c === 0x608 ||
2417 2
                $c === 0x60b ||
2418 2
                $c === 0x60d ||
2419 2
                $c === 0x61b ||
2420 2
                ($c >= 0x61e && $c <= 0x64a) ||
2421
                ($c >= 0x66d && $c <= 0x66f) ||
2422
                ($c >= 0x671 && $c <= 0x6d5) ||
2423
                ($c >= 0x6e5 && $c <= 0x6e6) ||
2424
                ($c >= 0x6ee && $c <= 0x6ef) ||
2425
                ($c >= 0x6fa && $c <= 0x70d) ||
2426
                $c === 0x710 ||
2427
                ($c >= 0x712 && $c <= 0x72f) ||
2428
                ($c >= 0x74d && $c <= 0x7a5) ||
2429
                $c === 0x7b1 ||
2430
                ($c >= 0x7c0 && $c <= 0x7ea) ||
2431
                ($c >= 0x7f4 && $c <= 0x7f5) ||
2432
                $c === 0x7fa ||
2433
                ($c >= 0x800 && $c <= 0x815) ||
2434
                $c === 0x81a ||
2435
                $c === 0x824 ||
2436
                $c === 0x828 ||
2437
                ($c >= 0x830 && $c <= 0x83e) ||
2438
                ($c >= 0x840 && $c <= 0x858) ||
2439 2
                $c === 0x85e
2440
            ) {
2441 2
                return 'RTL';
2442
            }
2443 2
        } elseif ($c === 0x200f) {
2444
            return 'RTL';
2445 2
        } elseif ($c >= 0xfb1d) {
2446 2
            if ($c === 0xfb1d ||
2447 2
                ($c >= 0xfb1f && $c <= 0xfb28) ||
2448 2
                ($c >= 0xfb2a && $c <= 0xfb36) ||
2449 2
                ($c >= 0xfb38 && $c <= 0xfb3c) ||
2450 2
                $c === 0xfb3e ||
2451 2
                ($c >= 0xfb40 && $c <= 0xfb41) ||
2452 2
                ($c >= 0xfb43 && $c <= 0xfb44) ||
2453 2
                ($c >= 0xfb46 && $c <= 0xfbc1) ||
2454 2
                ($c >= 0xfbd3 && $c <= 0xfd3d) ||
2455 2
                ($c >= 0xfd50 && $c <= 0xfd8f) ||
2456 2
                ($c >= 0xfd92 && $c <= 0xfdc7) ||
2457 2
                ($c >= 0xfdf0 && $c <= 0xfdfc) ||
2458 2
                ($c >= 0xfe70 && $c <= 0xfe74) ||
2459 2
                ($c >= 0xfe76 && $c <= 0xfefc) ||
2460 2
                ($c >= 0x10800 && $c <= 0x10805) ||
2461 2
                $c === 0x10808 ||
2462 2
                ($c >= 0x1080a && $c <= 0x10835) ||
2463 2
                ($c >= 0x10837 && $c <= 0x10838) ||
2464 2
                $c === 0x1083c ||
2465 2
                ($c >= 0x1083f && $c <= 0x10855) ||
2466 2
                ($c >= 0x10857 && $c <= 0x1085f) ||
2467 2
                ($c >= 0x10900 && $c <= 0x1091b) ||
2468 2
                ($c >= 0x10920 && $c <= 0x10939) ||
2469 2
                $c === 0x1093f ||
2470 2
                $c === 0x10a00 ||
2471 2
                ($c >= 0x10a10 && $c <= 0x10a13) ||
2472 2
                ($c >= 0x10a15 && $c <= 0x10a17) ||
2473 2
                ($c >= 0x10a19 && $c <= 0x10a33) ||
2474 2
                ($c >= 0x10a40 && $c <= 0x10a47) ||
2475 2
                ($c >= 0x10a50 && $c <= 0x10a58) ||
2476 2
                ($c >= 0x10a60 && $c <= 0x10a7f) ||
2477 2
                ($c >= 0x10b00 && $c <= 0x10b35) ||
2478 2
                ($c >= 0x10b40 && $c <= 0x10b55) ||
2479 2
                ($c >= 0x10b58 && $c <= 0x10b72) ||
2480 2
                ($c >= 0x10b78 && $c <= 0x10b7f)
2481
            ) {
2482 2
                return 'RTL';
2483
            }
2484
        }
2485
2486 2
        return 'LTR';
2487
    }
2488
2489
    /**
2490
     * Check for php-support.
2491
     *
2492
     * @param string|null $key
2493
     *
2494
     * @psalm-pure
2495
     *
2496
     * @return mixed
2497
     *               Return the full support-"array", if $key === null<br>
2498
     *               return bool-value, if $key is used and available<br>
2499
     *               otherwise return <strong>null</strong>
2500
     */
2501 27
    public static function getSupportInfo(string $key = null)
2502
    {
2503 27
        if ($key === null) {
2504 4
            return self::$SUPPORT;
2505
        }
2506
2507 25
        if (self::$INTL_TRANSLITERATOR_LIST === null) {
2508 1
            self::$INTL_TRANSLITERATOR_LIST = self::getData('transliterator_list');
2509
        }
2510
        // compatibility fix for old versions
2511 25
        self::$SUPPORT['intl__transliterator_list_ids'] = self::$INTL_TRANSLITERATOR_LIST;
2512
2513 25
        return self::$SUPPORT[$key] ?? null;
2514
    }
2515
2516
    /**
2517
     * Warning: this method only works for some file-types (png, jpg)
2518
     *          if you need more supported types, please use e.g. "finfo"
2519
     *
2520
     * @param string $str
2521
     * @param array  $fallback <p>with this keys: 'ext', 'mime', 'type'
2522
     *
2523
     * @psalm-pure
2524
     *
2525
     * @return null[]|string[]
2526
     *                         <p>with this keys: 'ext', 'mime', 'type'</p>
2527
     *
2528
     * @phpstan-param array{ext: null|string, mime: null|string, type: null|string} $fallback
2529
     */
2530 39
    public static function get_file_type(
2531
        string $str,
2532
        array $fallback = [
2533
            'ext'  => null,
2534
            'mime' => 'application/octet-stream',
2535
            'type' => null,
2536
        ]
2537
    ): array {
2538 39
        if ($str === '') {
2539
            return $fallback;
2540
        }
2541
2542
        /** @var false|string $str_info - needed for PhpStan (stubs error) */
2543 39
        $str_info = \substr($str, 0, 2);
2544 39
        if ($str_info === false || \strlen($str_info) !== 2) {
2545 10
            return $fallback;
2546
        }
2547
2548
        // DEBUG
2549
        //var_dump($str_info);
2550
2551 36
        $str_info = \unpack('C2chars', $str_info);
2552
2553 36
        if ($str_info === false) {
2554
            return $fallback;
2555
        }
2556 36
        $type_code = (int) ($str_info['chars1'] . $str_info['chars2']);
2557
2558
        // DEBUG
2559
        //var_dump($type_code);
2560
2561
        //
2562
        // info: https://en.wikipedia.org/wiki/Magic_number_%28programming%29#Format_indicator
2563
        //
2564
        switch ($type_code) {
2565
            // WARNING: do not add too simple comparisons, because of false-positive results:
2566
            //
2567
            // 3780 => 'pdf', 7790 => 'exe', 7784 => 'midi', 8075 => 'zip',
2568
            // 8297 => 'rar', 7173 => 'gif', 7373 => 'tiff' 6677 => 'bmp', ...
2569
            //
2570 36
            case 255216:
2571
                $ext = 'jpg';
2572
                $mime = 'image/jpeg';
2573
                $type = 'binary';
2574
2575
                break;
2576 36
            case 13780:
2577 7
                $ext = 'png';
2578 7
                $mime = 'image/png';
2579 7
                $type = 'binary';
2580
2581 7
                break;
2582
            default:
2583 35
                return $fallback;
2584
        }
2585
2586
        return [
2587 7
            'ext'  => $ext,
2588 7
            'mime' => $mime,
2589 7
            'type' => $type,
2590
        ];
2591
    }
2592
2593
    /**
2594
     * @param int    $length         <p>Length of the random string.</p>
2595
     * @param string $possible_chars [optional] <p>Characters string for the random selection.</p>
2596
     * @param string $encoding       [optional] <p>Set the charset for e.g. "mb_" function</p>
2597
     *
2598
     * @return string
2599
     */
2600 1
    public static function get_random_string(
2601
        int $length,
2602
        string $possible_chars = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789',
2603
        string $encoding = 'UTF-8'
2604
    ): string {
2605
        // init
2606 1
        $i = 0;
2607 1
        $str = '';
2608
2609
        //
2610
        // add random chars
2611
        //
2612
2613 1
        if ($encoding === 'UTF-8') {
2614 1
            $max_length = (int) \mb_strlen($possible_chars);
2615 1
            if ($max_length === 0) {
2616 1
                return '';
2617
            }
2618
2619 1
            while ($i < $length) {
2620
                try {
2621 1
                    $rand_int = \random_int(0, $max_length - 1);
2622
                } catch (\Exception $e) {
2623
                    $rand_int = \mt_rand(0, $max_length - 1);
2624
                }
2625 1
                $char = \mb_substr($possible_chars, $rand_int, 1);
2626 1
                if ($char !== false) {
2627 1
                    $str .= $char;
2628 1
                    ++$i;
2629
                }
2630
            }
2631
        } else {
2632
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
2633
2634
            $max_length = (int) self::strlen($possible_chars, $encoding);
2635
            if ($max_length === 0) {
2636
                return '';
2637
            }
2638
2639
            while ($i < $length) {
2640
                try {
2641
                    $rand_int = \random_int(0, $max_length - 1);
2642
                } catch (\Exception $e) {
2643
                    $rand_int = \mt_rand(0, $max_length - 1);
2644
                }
2645
                $char = self::substr($possible_chars, $rand_int, 1, $encoding);
2646
                if ($char !== false) {
2647
                    $str .= $char;
2648
                    ++$i;
2649
                }
2650
            }
2651
        }
2652
2653 1
        return $str;
2654
    }
2655
2656
    /**
2657
     * @param int|string $extra_entropy [optional] <p>Extra entropy via a string or int value.</p>
2658
     * @param bool       $use_md5       [optional] <p>Return the unique identifier as md5-hash? Default: true</p>
2659
     *
2660
     * @return string
2661
     */
2662 1
    public static function get_unique_string($extra_entropy = '', bool $use_md5 = true): string
2663
    {
2664
        try {
2665 1
            $rand_int = \random_int(0, \mt_getrandmax());
2666
        } catch (\Exception $e) {
2667
            $rand_int = \mt_rand(0, \mt_getrandmax());
2668
        }
2669
2670 1
        $unique_helper = $rand_int .
2671 1
                         \session_id() .
2672 1
                         ($_SERVER['REMOTE_ADDR'] ?? '') .
2673 1
                         ($_SERVER['SERVER_ADDR'] ?? '') .
2674 1
                         $extra_entropy;
2675
2676 1
        $unique_string = \uniqid($unique_helper, true);
2677
2678 1
        if ($use_md5) {
2679 1
            $unique_string = \md5($unique_string . $unique_helper);
2680
        }
2681
2682 1
        return $unique_string;
2683
    }
2684
2685
    /**
2686
     * Returns true if the string contains a lower case char, false otherwise.
2687
     *
2688
     * @param string $str <p>The input string.</p>
2689
     *
2690
     * @psalm-pure
2691
     *
2692
     * @return bool
2693
     *              <p>Whether or not the string contains a lower case character.</p>
2694
     */
2695 47
    public static function has_lowercase(string $str): bool
2696
    {
2697 47
        if (self::$SUPPORT['mbstring'] === true) {
2698 47
            return \mb_ereg_match('.*[[:lower:]]', $str);
2699
        }
2700
2701
        return self::str_matches_pattern($str, '.*[[:lower:]]');
2702
    }
2703
2704
    /**
2705
     * Returns true if the string contains whitespace, false otherwise.
2706
     *
2707
     * @param string $str <p>The input string.</p>
2708
     *
2709
     * @psalm-pure
2710
     *
2711
     * @return bool
2712
     *              <p>Whether or not the string contains whitespace.</p>
2713
     */
2714 11
    public static function has_whitespace(string $str): bool
2715
    {
2716 11
        if (self::$SUPPORT['mbstring'] === true) {
2717 11
            return \mb_ereg_match('.*[[:space:]]', $str);
2718
        }
2719
2720
        return self::str_matches_pattern($str, '.*[[:space:]]');
2721
    }
2722
2723
    /**
2724
     * Returns true if the string contains an upper case char, false otherwise.
2725
     *
2726
     * @param string $str <p>The input string.</p>
2727
     *
2728
     * @psalm-pure
2729
     *
2730
     * @return bool
2731
     *              <p>Whether or not the string contains an upper case character.</p>
2732
     */
2733 12
    public static function has_uppercase(string $str): bool
2734
    {
2735 12
        if (self::$SUPPORT['mbstring'] === true) {
2736 12
            return \mb_ereg_match('.*[[:upper:]]', $str);
2737
        }
2738
2739
        return self::str_matches_pattern($str, '.*[[:upper:]]');
2740
    }
2741
2742
    /**
2743
     * Converts a hexadecimal value into a UTF-8 character.
2744
     *
2745
     * INFO: opposite to UTF8::chr_to_hex()
2746
     *
2747
     * EXAMPLE: <code>UTF8::hex_to_chr('U+00a7'); // '§'</code>
2748
     *
2749
     * @param string $hexdec <p>The hexadecimal value.</p>
2750
     *
2751
     * @psalm-pure
2752
     *
2753
     * @return false|string one single UTF-8 character
2754
     */
2755 4
    public static function hex_to_chr(string $hexdec)
2756
    {
2757
        /** @noinspection PhpUsageOfSilenceOperatorInspection - Invalid characters passed for attempted conversion, these have been ignored */
2758 4
        return self::decimal_to_chr((int) @\hexdec($hexdec));
2759
    }
2760
2761
    /**
2762
     * Converts hexadecimal U+xxxx code point representation to integer.
2763
     *
2764
     * INFO: opposite to UTF8::int_to_hex()
2765
     *
2766
     * EXAMPLE: <code>UTF8::hex_to_int('U+00f1'); // 241</code>
2767
     *
2768
     * @param string $hexdec <p>The hexadecimal code point representation.</p>
2769
     *
2770
     * @psalm-pure
2771
     *
2772
     * @return false|int
2773
     *                   <p>The code point, or false on failure.</p>
2774
     */
2775 2
    public static function hex_to_int($hexdec)
2776
    {
2777
        // init
2778 2
        $hexdec = (string) $hexdec;
2779
2780 2
        if ($hexdec === '') {
2781 2
            return false;
2782
        }
2783
2784 2
        if (\preg_match('/^(?:\\\u|U\+|)([a-zA-Z0-9]{4,6})$/', $hexdec, $match)) {
2785 2
            return \intval($match[1], 16);
2786
        }
2787
2788 2
        return false;
2789
    }
2790
2791
    /**
2792
     * Converts a UTF-8 string to a series of HTML numbered entities.
2793
     *
2794
     * INFO: opposite to UTF8::html_decode()
2795
     *
2796
     * EXAMPLE: <code>UTF8::html_encode('中文空白'); // '&#20013;&#25991;&#31354;&#30333;'</code>
2797
     *
2798
     * @param string $str              <p>The Unicode string to be encoded as numbered entities.</p>
2799
     * @param bool   $keep_ascii_chars [optional] <p>Keep ASCII chars.</p>
2800
     * @param string $encoding         [optional] <p>Set the charset for e.g. "mb_" function</p>
2801
     *
2802
     * @psalm-pure
2803
     *
2804
     * @return string HTML numbered entities
2805
     */
2806 14
    public static function html_encode(
2807
        string $str,
2808
        bool $keep_ascii_chars = false,
2809
        string $encoding = 'UTF-8'
2810
    ): string {
2811 14
        if ($str === '') {
2812 4
            return '';
2813
        }
2814
2815 14
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
2816 4
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
2817
        }
2818
2819
        // INFO: http://stackoverflow.com/questions/35854535/better-explanation-of-convmap-in-mb-encode-numericentity
2820 14
        if (self::$SUPPORT['mbstring'] === true) {
2821 14
            if ($keep_ascii_chars) {
2822 13
                $start_code = 0x80;
2823
            } else {
2824 3
                $start_code = 0x00;
2825
            }
2826
2827 14
            if ($encoding === 'UTF-8') {
2828
                /** @var false|string|null $return - needed for PhpStan (stubs error) */
2829 14
                $return = \mb_encode_numericentity(
0 ignored issues
show
Bug introduced by
The call to mb_encode_numericentity() has too few arguments starting with encoding. ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-call  annotation

2829
                $return = /** @scrutinizer ignore-call */ \mb_encode_numericentity(

This check compares calls to functions or methods with their respective definitions. If the call has less arguments than are defined, it raises an issue.

If a function is defined several times with a different number of parameters, the check may pick up the wrong definition and report false positives. One codebase where this has been known to happen is Wordpress. Please note the @ignore annotation hint above.

Loading history...
2830 14
                    $str,
2831 14
                    [$start_code, 0xfffff, 0, 0xfffff]
2832
                );
2833 14
                if ($return !== null && $return !== false) {
2834 14
                    return $return;
2835
                }
2836
            }
2837
2838
            /** @var false|string|null $return - needed for PhpStan (stubs error) */
2839 4
            $return = \mb_encode_numericentity(
2840 4
                $str,
2841 4
                [$start_code, 0xfffff, 0, 0xfffff],
2842 4
                $encoding
2843
            );
2844 4
            if ($return !== null && $return !== false) {
2845 4
                return $return;
2846
            }
2847
        }
2848
2849
        //
2850
        // fallback via vanilla php
2851
        //
2852
2853
        return \implode(
2854
            '',
2855
            \array_map(
2856
                static function (string $chr) use ($keep_ascii_chars, $encoding): string {
2857
                    return self::single_chr_html_encode($chr, $keep_ascii_chars, $encoding);
2858
                },
2859
                self::str_split($str)
2860
            )
2861
        );
2862
    }
2863
2864
    /**
2865
     * UTF-8 version of html_entity_decode()
2866
     *
2867
     * The reason we are not using html_entity_decode() by itself is because
2868
     * while it is not technically correct to leave out the semicolon
2869
     * at the end of an entity most browsers will still interpret the entity
2870
     * correctly. html_entity_decode() does not convert entities without
2871
     * semicolons, so we are left with our own little solution here. Bummer.
2872
     *
2873
     * Convert all HTML entities to their applicable characters.
2874
     *
2875
     * INFO: opposite to UTF8::html_encode()
2876
     *
2877
     * EXAMPLE: <code>UTF8::html_entity_decode('&#20013;&#25991;&#31354;&#30333;'); // '中文空白'</code>
2878
     *
2879
     * @see http://php.net/manual/en/function.html-entity-decode.php
2880
     *
2881
     * @param string   $str      <p>
2882
     *                           The input string.
2883
     *                           </p>
2884
     * @param int|null $flags    [optional] <p>
2885
     *                           A bitmask of one or more of the following flags, which specify how to handle quotes
2886
     *                           and which document type to use. The default is ENT_COMPAT | ENT_HTML401.
2887
     *                           <table>
2888
     *                           Available <i>flags</i> constants
2889
     *                           <tr valign="top">
2890
     *                           <td>Constant Name</td>
2891
     *                           <td>Description</td>
2892
     *                           </tr>
2893
     *                           <tr valign="top">
2894
     *                           <td><b>ENT_COMPAT</b></td>
2895
     *                           <td>Will convert double-quotes and leave single-quotes alone.</td>
2896
     *                           </tr>
2897
     *                           <tr valign="top">
2898
     *                           <td><b>ENT_QUOTES</b></td>
2899
     *                           <td>Will convert both double and single quotes.</td>
2900
     *                           </tr>
2901
     *                           <tr valign="top">
2902
     *                           <td><b>ENT_NOQUOTES</b></td>
2903
     *                           <td>Will leave both double and single quotes unconverted.</td>
2904
     *                           </tr>
2905
     *                           <tr valign="top">
2906
     *                           <td><b>ENT_HTML401</b></td>
2907
     *                           <td>
2908
     *                           Handle code as HTML 4.01.
2909
     *                           </td>
2910
     *                           </tr>
2911
     *                           <tr valign="top">
2912
     *                           <td><b>ENT_XML1</b></td>
2913
     *                           <td>
2914
     *                           Handle code as XML 1.
2915
     *                           </td>
2916
     *                           </tr>
2917
     *                           <tr valign="top">
2918
     *                           <td><b>ENT_XHTML</b></td>
2919
     *                           <td>
2920
     *                           Handle code as XHTML.
2921
     *                           </td>
2922
     *                           </tr>
2923
     *                           <tr valign="top">
2924
     *                           <td><b>ENT_HTML5</b></td>
2925
     *                           <td>
2926
     *                           Handle code as HTML 5.
2927
     *                           </td>
2928
     *                           </tr>
2929
     *                           </table>
2930
     *                           </p>
2931
     * @param string   $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
2932
     *
2933
     * @psalm-pure
2934
     *
2935
     * @return string the decoded string
2936
     */
2937 50
    public static function html_entity_decode(
2938
        string $str,
2939
        int $flags = null,
2940
        string $encoding = 'UTF-8'
2941
    ): string {
2942
        if (
2943 50
            !isset($str[3]) // examples: &; || &x;
2944
            ||
2945 50
            \strpos($str, '&') === false // no "&"
2946
        ) {
2947 23
            return $str;
2948
        }
2949
2950 50
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
2951 9
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
2952
        }
2953
2954 50
        if ($flags === null) {
2955 11
            $flags = \ENT_QUOTES | \ENT_HTML5;
2956
        }
2957
2958
        if (
2959 50
            $encoding !== 'UTF-8'
2960
            &&
2961 50
            $encoding !== 'ISO-8859-1'
2962
            &&
2963 50
            $encoding !== 'WINDOWS-1252'
2964
            &&
2965 50
            self::$SUPPORT['mbstring'] === false
2966
        ) {
2967
            /**
2968
             * @psalm-suppress ImpureFunctionCall - is is only a warning
2969
             */
2970
            \trigger_error('UTF8::html_entity_decode() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
2971
        }
2972
2973
        do {
2974 50
            $str_compare = $str;
2975
2976 50
            if (\strpos($str, '&') !== false) {
2977 50
                if (\strpos($str, '&#') !== false) {
2978
                    // decode also numeric & UTF16 two byte entities
2979 41
                    $str = (string) \preg_replace(
2980 41
                        '/(&#(?:x0*[0-9a-fA-F]{2,6}(?![0-9a-fA-F;])|(?:0*\d{2,6}(?![0-9;]))))/S',
2981 41
                        '$1;',
2982 41
                        $str
2983
                    );
2984
                }
2985
2986 50
                $str = \html_entity_decode(
2987 50
                    $str,
2988 50
                    $flags,
2989 50
                    $encoding
2990
                );
2991
            }
2992 50
        } while ($str_compare !== $str);
2993
2994 50
        return $str;
2995
    }
2996
2997
    /**
2998
     * Create a escape html version of the string via "UTF8::htmlspecialchars()".
2999
     *
3000
     * @param string $str
3001
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
3002
     *
3003
     * @psalm-pure
3004
     *
3005
     * @return string
3006
     */
3007 6
    public static function html_escape(string $str, string $encoding = 'UTF-8'): string
3008
    {
3009 6
        return self::htmlspecialchars(
3010 6
            $str,
3011 6
            \ENT_QUOTES | \ENT_SUBSTITUTE,
3012
            $encoding
3013
        );
3014
    }
3015
3016
    /**
3017
     * Remove empty html-tag.
3018
     *
3019
     * e.g.: <pre><tag></tag></pre>
3020
     *
3021
     * @param string $str
3022
     *
3023
     * @psalm-pure
3024
     *
3025
     * @return string
3026
     */
3027 1
    public static function html_stripe_empty_tags(string $str): string
3028
    {
3029 1
        return (string) \preg_replace(
3030 1
            '/<[^\\/>]*?>\\s*?<\\/[^>]*?>/u',
3031 1
            '',
3032 1
            $str
3033
        );
3034
    }
3035
3036
    /**
3037
     * Convert all applicable characters to HTML entities: UTF-8 version of htmlentities().
3038
     *
3039
     * EXAMPLE: <code>UTF8::htmlentities('<白-öäü>'); // '&lt;&#30333;-&ouml;&auml;&uuml;&gt;'</code>
3040
     *
3041
     * @see http://php.net/manual/en/function.htmlentities.php
3042
     *
3043
     * @param string $str           <p>
3044
     *                              The input string.
3045
     *                              </p>
3046
     * @param int    $flags         [optional] <p>
3047
     *                              A bitmask of one or more of the following flags, which specify how to handle
3048
     *                              quotes, invalid code unit sequences and the used document type. The default is
3049
     *                              ENT_COMPAT | ENT_HTML401.
3050
     *                              <table>
3051
     *                              Available <i>flags</i> constants
3052
     *                              <tr valign="top">
3053
     *                              <td>Constant Name</td>
3054
     *                              <td>Description</td>
3055
     *                              </tr>
3056
     *                              <tr valign="top">
3057
     *                              <td><b>ENT_COMPAT</b></td>
3058
     *                              <td>Will convert double-quotes and leave single-quotes alone.</td>
3059
     *                              </tr>
3060
     *                              <tr valign="top">
3061
     *                              <td><b>ENT_QUOTES</b></td>
3062
     *                              <td>Will convert both double and single quotes.</td>
3063
     *                              </tr>
3064
     *                              <tr valign="top">
3065
     *                              <td><b>ENT_NOQUOTES</b></td>
3066
     *                              <td>Will leave both double and single quotes unconverted.</td>
3067
     *                              </tr>
3068
     *                              <tr valign="top">
3069
     *                              <td><b>ENT_IGNORE</b></td>
3070
     *                              <td>
3071
     *                              Silently discard invalid code unit sequences instead of returning
3072
     *                              an empty string. Using this flag is discouraged as it
3073
     *                              may have security implications.
3074
     *                              </td>
3075
     *                              </tr>
3076
     *                              <tr valign="top">
3077
     *                              <td><b>ENT_SUBSTITUTE</b></td>
3078
     *                              <td>
3079
     *                              Replace invalid code unit sequences with a Unicode Replacement Character
3080
     *                              U+FFFD (UTF-8) or &#38;#38;#FFFD; (otherwise) instead of returning an empty
3081
     *                              string.
3082
     *                              </td>
3083
     *                              </tr>
3084
     *                              <tr valign="top">
3085
     *                              <td><b>ENT_DISALLOWED</b></td>
3086
     *                              <td>
3087
     *                              Replace invalid code points for the given document type with a
3088
     *                              Unicode Replacement Character U+FFFD (UTF-8) or &#38;#38;#FFFD;
3089
     *                              (otherwise) instead of leaving them as is. This may be useful, for
3090
     *                              instance, to ensure the well-formedness of XML documents with
3091
     *                              embedded external content.
3092
     *                              </td>
3093
     *                              </tr>
3094
     *                              <tr valign="top">
3095
     *                              <td><b>ENT_HTML401</b></td>
3096
     *                              <td>
3097
     *                              Handle code as HTML 4.01.
3098
     *                              </td>
3099
     *                              </tr>
3100
     *                              <tr valign="top">
3101
     *                              <td><b>ENT_XML1</b></td>
3102
     *                              <td>
3103
     *                              Handle code as XML 1.
3104
     *                              </td>
3105
     *                              </tr>
3106
     *                              <tr valign="top">
3107
     *                              <td><b>ENT_XHTML</b></td>
3108
     *                              <td>
3109
     *                              Handle code as XHTML.
3110
     *                              </td>
3111
     *                              </tr>
3112
     *                              <tr valign="top">
3113
     *                              <td><b>ENT_HTML5</b></td>
3114
     *                              <td>
3115
     *                              Handle code as HTML 5.
3116
     *                              </td>
3117
     *                              </tr>
3118
     *                              </table>
3119
     *                              </p>
3120
     * @param string $encoding      [optional] <p>
3121
     *                              Like <b>htmlspecialchars</b>,
3122
     *                              <b>htmlentities</b> takes an optional third argument
3123
     *                              <i>encoding</i> which defines encoding used in
3124
     *                              conversion.
3125
     *                              Although this argument is technically optional, you are highly
3126
     *                              encouraged to specify the correct value for your code.
3127
     *                              </p>
3128
     * @param bool   $double_encode [optional] <p>
3129
     *                              When <i>double_encode</i> is turned off PHP will not
3130
     *                              encode existing html entities. The default is to convert everything.
3131
     *                              </p>
3132
     *
3133
     * @psalm-pure
3134
     *
3135
     * @return string
3136
     *                <p>
3137
     *                The encoded string.
3138
     *                <br><br>
3139
     *                If the input <i>string</i> contains an invalid code unit
3140
     *                sequence within the given <i>encoding</i> an empty string
3141
     *                will be returned, unless either the <b>ENT_IGNORE</b> or
3142
     *                <b>ENT_SUBSTITUTE</b> flags are set.
3143
     *                </p>
3144
     */
3145 9
    public static function htmlentities(
3146
        string $str,
3147
        int $flags = \ENT_COMPAT,
3148
        string $encoding = 'UTF-8',
3149
        bool $double_encode = true
3150
    ): string {
3151 9
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
3152 7
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
3153
        }
3154
3155 9
        $str = \htmlentities(
3156 9
            $str,
3157 9
            $flags,
3158 9
            $encoding,
3159 9
            $double_encode
3160
        );
3161
3162
        /**
3163
         * PHP doesn't replace a backslash to its html entity since this is something
3164
         * that's mostly used to escape characters when inserting in a database. Since
3165
         * we're using a decent database layer, we don't need this shit and we're replacing
3166
         * the double backslashes by its' html entity equivalent.
3167
         *
3168
         * https://github.com/forkcms/library/blob/master/spoon/filter/filter.php#L303
3169
         */
3170 9
        $str = \str_replace('\\', '&#92;', $str);
3171
3172 9
        return self::html_encode($str, true, $encoding);
3173
    }
3174
3175
    /**
3176
     * Convert only special characters to HTML entities: UTF-8 version of htmlspecialchars()
3177
     *
3178
     * INFO: Take a look at "UTF8::htmlentities()"
3179
     *
3180
     * EXAMPLE: <code>UTF8::htmlspecialchars('<白-öäü>'); // '&lt;白-öäü&gt;'</code>
3181
     *
3182
     * @see http://php.net/manual/en/function.htmlspecialchars.php
3183
     *
3184
     * @param string $str           <p>
3185
     *                              The string being converted.
3186
     *                              </p>
3187
     * @param int    $flags         [optional] <p>
3188
     *                              A bitmask of one or more of the following flags, which specify how to handle
3189
     *                              quotes, invalid code unit sequences and the used document type. The default is
3190
     *                              ENT_COMPAT | ENT_HTML401.
3191
     *                              <table>
3192
     *                              Available <i>flags</i> constants
3193
     *                              <tr valign="top">
3194
     *                              <td>Constant Name</td>
3195
     *                              <td>Description</td>
3196
     *                              </tr>
3197
     *                              <tr valign="top">
3198
     *                              <td><b>ENT_COMPAT</b></td>
3199
     *                              <td>Will convert double-quotes and leave single-quotes alone.</td>
3200
     *                              </tr>
3201
     *                              <tr valign="top">
3202
     *                              <td><b>ENT_QUOTES</b></td>
3203
     *                              <td>Will convert both double and single quotes.</td>
3204
     *                              </tr>
3205
     *                              <tr valign="top">
3206
     *                              <td><b>ENT_NOQUOTES</b></td>
3207
     *                              <td>Will leave both double and single quotes unconverted.</td>
3208
     *                              </tr>
3209
     *                              <tr valign="top">
3210
     *                              <td><b>ENT_IGNORE</b></td>
3211
     *                              <td>
3212
     *                              Silently discard invalid code unit sequences instead of returning
3213
     *                              an empty string. Using this flag is discouraged as it
3214
     *                              may have security implications.
3215
     *                              </td>
3216
     *                              </tr>
3217
     *                              <tr valign="top">
3218
     *                              <td><b>ENT_SUBSTITUTE</b></td>
3219
     *                              <td>
3220
     *                              Replace invalid code unit sequences with a Unicode Replacement Character
3221
     *                              U+FFFD (UTF-8) or &#38;#38;#FFFD; (otherwise) instead of returning an empty
3222
     *                              string.
3223
     *                              </td>
3224
     *                              </tr>
3225
     *                              <tr valign="top">
3226
     *                              <td><b>ENT_DISALLOWED</b></td>
3227
     *                              <td>
3228
     *                              Replace invalid code points for the given document type with a
3229
     *                              Unicode Replacement Character U+FFFD (UTF-8) or &#38;#38;#FFFD;
3230
     *                              (otherwise) instead of leaving them as is. This may be useful, for
3231
     *                              instance, to ensure the well-formedness of XML documents with
3232
     *                              embedded external content.
3233
     *                              </td>
3234
     *                              </tr>
3235
     *                              <tr valign="top">
3236
     *                              <td><b>ENT_HTML401</b></td>
3237
     *                              <td>
3238
     *                              Handle code as HTML 4.01.
3239
     *                              </td>
3240
     *                              </tr>
3241
     *                              <tr valign="top">
3242
     *                              <td><b>ENT_XML1</b></td>
3243
     *                              <td>
3244
     *                              Handle code as XML 1.
3245
     *                              </td>
3246
     *                              </tr>
3247
     *                              <tr valign="top">
3248
     *                              <td><b>ENT_XHTML</b></td>
3249
     *                              <td>
3250
     *                              Handle code as XHTML.
3251
     *                              </td>
3252
     *                              </tr>
3253
     *                              <tr valign="top">
3254
     *                              <td><b>ENT_HTML5</b></td>
3255
     *                              <td>
3256
     *                              Handle code as HTML 5.
3257
     *                              </td>
3258
     *                              </tr>
3259
     *                              </table>
3260
     *                              </p>
3261
     * @param string $encoding      [optional] <p>
3262
     *                              Defines encoding used in conversion.
3263
     *                              </p>
3264
     *                              <p>
3265
     *                              For the purposes of this function, the encodings
3266
     *                              ISO-8859-1, ISO-8859-15,
3267
     *                              UTF-8, cp866,
3268
     *                              cp1251, cp1252, and
3269
     *                              KOI8-R are effectively equivalent, provided the
3270
     *                              <i>string</i> itself is valid for the encoding, as
3271
     *                              the characters affected by <b>htmlspecialchars</b> occupy
3272
     *                              the same positions in all of these encodings.
3273
     *                              </p>
3274
     * @param bool   $double_encode [optional] <p>
3275
     *                              When <i>double_encode</i> is turned off PHP will not
3276
     *                              encode existing html entities, the default is to convert everything.
3277
     *                              </p>
3278
     *
3279
     * @psalm-pure
3280
     *
3281
     * @return string the converted string.
3282
     *                </p>
3283
     *                <p>
3284
     *                If the input <i>string</i> contains an invalid code unit
3285
     *                sequence within the given <i>encoding</i> an empty string
3286
     *                will be returned, unless either the <b>ENT_IGNORE</b> or
3287
     *                <b>ENT_SUBSTITUTE</b> flags are set
3288
     */
3289 8
    public static function htmlspecialchars(
3290
        string $str,
3291
        int $flags = \ENT_COMPAT,
3292
        string $encoding = 'UTF-8',
3293
        bool $double_encode = true
3294
    ): string {
3295 8
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
3296 8
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
3297
        }
3298
3299 8
        return \htmlspecialchars(
3300 8
            $str,
3301 8
            $flags,
3302 8
            $encoding,
3303 8
            $double_encode
3304
        );
3305
    }
3306
3307
    /**
3308
     * Checks whether iconv is available on the server.
3309
     *
3310
     * @psalm-pure
3311
     *
3312
     * @return bool
3313
     *              <p><strong>true</strong> if available, <strong>false</strong> otherwise</p>
3314
     *
3315
     * @internal <p>Please do not use it anymore, we will make is private in next major version.</p>
3316
     */
3317
    public static function iconv_loaded(): bool
3318
    {
3319
        return \extension_loaded('iconv');
3320
    }
3321
3322
    /**
3323
     * Converts Integer to hexadecimal U+xxxx code point representation.
3324
     *
3325
     * INFO: opposite to UTF8::hex_to_int()
3326
     *
3327
     * EXAMPLE: <code>UTF8::int_to_hex(241); // 'U+00f1'</code>
3328
     *
3329
     * @param int    $int    <p>The integer to be converted to hexadecimal code point.</p>
3330
     * @param string $prefix [optional]
3331
     *
3332
     * @psalm-pure
3333
     *
3334
     * @return string the code point, or empty string on failure
3335
     */
3336 6
    public static function int_to_hex(int $int, string $prefix = 'U+'): string
3337
    {
3338 6
        $hex = \dechex($int);
3339
3340 6
        $hex = (\strlen($hex) < 4 ? \substr('0000' . $hex, -4) : $hex);
3341
3342 6
        return $prefix . $hex . '';
3343
    }
3344
3345
    /**
3346
     * Checks whether intl-char is available on the server.
3347
     *
3348
     * @psalm-pure
3349
     *
3350
     * @return bool
3351
     *              <p><strong>true</strong> if available, <strong>false</strong> otherwise</p>
3352
     *
3353
     * @internal <p>Please do not use it anymore, we will make is private in next major version.</p>
3354
     */
3355
    public static function intlChar_loaded(): bool
3356
    {
3357
        return \class_exists('IntlChar');
3358
    }
3359
3360
    /**
3361
     * Checks whether intl is available on the server.
3362
     *
3363
     * @psalm-pure
3364
     *
3365
     * @return bool
3366
     *              <p><strong>true</strong> if available, <strong>false</strong> otherwise</p>
3367
     *
3368
     * @internal <p>Please do not use it anymore, we will make is private in next major version.</p>
3369
     */
3370 5
    public static function intl_loaded(): bool
3371
    {
3372 5
        return \extension_loaded('intl');
3373
    }
3374
3375
    /**
3376
     * Returns true if the string contains only alphabetic chars, false otherwise.
3377
     *
3378
     * @param string $str <p>The input string.</p>
3379
     *
3380
     * @psalm-pure
3381
     *
3382
     * @return bool
3383
     *              <p>Whether or not $str contains only alphabetic chars.</p>
3384
     */
3385 10
    public static function is_alpha(string $str): bool
3386
    {
3387 10
        if (self::$SUPPORT['mbstring'] === true) {
3388 10
            return \mb_ereg_match('^[[:alpha:]]*$', $str);
3389
        }
3390
3391
        return self::str_matches_pattern($str, '^[[:alpha:]]*$');
3392
    }
3393
3394
    /**
3395
     * Returns true if the string contains only alphabetic and numeric chars, false otherwise.
3396
     *
3397
     * @param string $str <p>The input string.</p>
3398
     *
3399
     * @psalm-pure
3400
     *
3401
     * @return bool
3402
     *              <p>Whether or not $str contains only alphanumeric chars.</p>
3403
     */
3404 13
    public static function is_alphanumeric(string $str): bool
3405
    {
3406 13
        if (self::$SUPPORT['mbstring'] === true) {
3407 13
            return \mb_ereg_match('^[[:alnum:]]*$', $str);
3408
        }
3409
3410
        return self::str_matches_pattern($str, '^[[:alnum:]]*$');
3411
    }
3412
3413
    /**
3414
     * Returns true if the string contains only punctuation chars, false otherwise.
3415
     *
3416
     * @param string $str <p>The input string.</p>
3417
     *
3418
     * @psalm-pure
3419
     *
3420
     * @return bool
3421
     *              <p>Whether or not $str contains only punctuation chars.</p>
3422
     */
3423 10
    public static function is_punctuation(string $str): bool
3424
    {
3425 10
        return self::str_matches_pattern($str, '^[[:punct:]]*$');
3426
    }
3427
3428
    /**
3429
     * Returns true if the string contains only printable (non-invisible) chars, false otherwise.
3430
     *
3431
     * @param string $str                       <p>The input string.</p>
3432
     * @param bool   $ignore_control_characters [optional] <p>Ignore control characters like [LRM] or [LSEP].</p>
3433
     *
3434
     * @psalm-pure
3435
     *
3436
     * @return bool
3437
     *              <p>Whether or not $str contains only printable (non-invisible) chars.</p>
3438
     */
3439 1
    public static function is_printable(string $str, bool $ignore_control_characters = false): bool
3440
    {
3441 1
        return self::remove_invisible_characters($str, false, '', $ignore_control_characters) === $str;
3442
    }
3443
3444
    /**
3445
     * Checks if a string is 7 bit ASCII.
3446
     *
3447
     * EXAMPLE: <code>UTF8::is_ascii('白'); // false</code>
3448
     *
3449
     * @param string $str <p>The string to check.</p>
3450
     *
3451
     * @psalm-pure
3452
     *
3453
     * @return bool
3454
     *              <p>
3455
     *              <strong>true</strong> if it is ASCII<br>
3456
     *              <strong>false</strong> otherwise
3457
     *              </p>
3458
     */
3459 8
    public static function is_ascii(string $str): bool
3460
    {
3461 8
        return ASCII::is_ascii($str);
3462
    }
3463
3464
    /**
3465
     * Returns true if the string is base64 encoded, false otherwise.
3466
     *
3467
     * EXAMPLE: <code>UTF8::is_base64('4KSu4KWL4KSo4KS/4KSa'); // true</code>
3468
     *
3469
     * @param string|null $str                   <p>The input string.</p>
3470
     * @param bool        $empty_string_is_valid [optional] <p>Is an empty string valid base64 or not?</p>
3471
     *
3472
     * @psalm-pure
3473
     *
3474
     * @return bool
3475
     *              <p>Whether or not $str is base64 encoded.</p>
3476
     */
3477 16
    public static function is_base64($str, bool $empty_string_is_valid = false): bool
3478
    {
3479
        if (
3480 16
            !$empty_string_is_valid
3481
            &&
3482 16
            $str === ''
3483
        ) {
3484 3
            return false;
3485
        }
3486
3487 15
        if (!\is_string($str)) {
3488 2
            return false;
3489
        }
3490
3491 15
        $base64String = \base64_decode($str, true);
3492
3493 15
        return $base64String !== false && \base64_encode($base64String) === $str;
3494
    }
3495
3496
    /**
3497
     * Check if the input is binary... (is look like a hack).
3498
     *
3499
     * EXAMPLE: <code>UTF8::is_binary(01); // true</code>
3500
     *
3501
     * @param int|string $input
3502
     * @param bool       $strict
3503
     *
3504
     * @psalm-pure
3505
     *
3506
     * @return bool
3507
     */
3508 39
    public static function is_binary($input, bool $strict = false): bool
3509
    {
3510 39
        $input = (string) $input;
3511 39
        if ($input === '') {
3512 10
            return false;
3513
        }
3514
3515 39
        if (\preg_match('~^[01]+$~', $input)) {
3516 13
            return true;
3517
        }
3518
3519 39
        $ext = self::get_file_type($input);
3520 39
        if ($ext['type'] === 'binary') {
3521 7
            return true;
3522
        }
3523
3524 38
        $test_length = \strlen($input);
3525 38
        $test_null_counting = \substr_count($input, "\x0", 0, $test_length);
3526 38
        if (($test_null_counting / $test_length) > 0.25) {
3527 15
            return true;
3528
        }
3529
3530 34
        if ($strict) {
3531 34
            if (self::$SUPPORT['finfo'] === false) {
3532
                throw new \RuntimeException('ext-fileinfo: is not installed');
3533
            }
3534
3535
            /**
3536
             * @psalm-suppress ImpureMethodCall - it will return the same result for the same file ...
3537
             */
3538 34
            $finfo_encoding = (new \finfo(\FILEINFO_MIME_ENCODING))->buffer($input);
3539 34
            if ($finfo_encoding && $finfo_encoding === 'binary') {
3540 14
                return true;
3541
            }
3542
        }
3543
3544 31
        return false;
3545
    }
3546
3547
    /**
3548
     * Check if the file is binary.
3549
     *
3550
     * EXAMPLE: <code>UTF8::is_binary('./utf32.txt'); // true</code>
3551
     *
3552
     * @param string $file
3553
     *
3554
     * @return bool
3555
     */
3556 6
    public static function is_binary_file($file): bool
3557
    {
3558
        // init
3559 6
        $block = '';
3560
3561 6
        $fp = \fopen($file, 'rb');
3562 6
        if (\is_resource($fp)) {
3563 6
            $block = \fread($fp, 512);
3564 6
            \fclose($fp);
3565
        }
3566
3567 6
        if ($block === '' || $block === false) {
3568 2
            return false;
3569
        }
3570
3571 6
        return self::is_binary($block, true);
3572
    }
3573
3574
    /**
3575
     * Returns true if the string contains only whitespace chars, false otherwise.
3576
     *
3577
     * @param string $str <p>The input string.</p>
3578
     *
3579
     * @psalm-pure
3580
     *
3581
     * @return bool
3582
     *              <p>Whether or not $str contains only whitespace characters.</p>
3583
     */
3584 15
    public static function is_blank(string $str): bool
3585
    {
3586 15
        if (self::$SUPPORT['mbstring'] === true) {
3587 15
            return \mb_ereg_match('^[[:space:]]*$', $str);
3588
        }
3589
3590
        return self::str_matches_pattern($str, '^[[:space:]]*$');
3591
    }
3592
3593
    /**
3594
     * Checks if the given string is equal to any "Byte Order Mark".
3595
     *
3596
     * WARNING: Use "UTF8::string_has_bom()" if you will check BOM in a string.
3597
     *
3598
     * EXAMPLE: <code>UTF8::is_bom("\xef\xbb\xbf"); // true</code>
3599
     *
3600
     * @param string $str <p>The input string.</p>
3601
     *
3602
     * @psalm-pure
3603
     *
3604
     * @return bool
3605
     *              <p><strong>true</strong> if the $utf8_chr is Byte Order Mark, <strong>false</strong> otherwise.</p>
3606
     */
3607 2
    public static function is_bom($str): bool
3608
    {
3609
        /** @noinspection PhpUnusedLocalVariableInspection */
3610 2
        foreach (self::$BOM as $bom_string => &$bom_byte_length) {
3611 2
            if ($str === $bom_string) {
3612 2
                return true;
3613
            }
3614
        }
3615
3616 2
        return false;
3617
    }
3618
3619
    /**
3620
     * Determine whether the string is considered to be empty.
3621
     *
3622
     * A variable is considered empty if it does not exist or if its value equals FALSE.
3623
     * empty() does not generate a warning if the variable does not exist.
3624
     *
3625
     * @param array|float|int|string $str
3626
     *
3627
     * @psalm-pure
3628
     *
3629
     * @return bool
3630
     *              <p>Whether or not $str is empty().</p>
3631
     */
3632 1
    public static function is_empty($str): bool
3633
    {
3634 1
        return empty($str);
3635
    }
3636
3637
    /**
3638
     * Returns true if the string contains only hexadecimal chars, false otherwise.
3639
     *
3640
     * @param string $str <p>The input string.</p>
3641
     *
3642
     * @psalm-pure
3643
     *
3644
     * @return bool
3645
     *              <p>Whether or not $str contains only hexadecimal chars.</p>
3646
     */
3647 13
    public static function is_hexadecimal(string $str): bool
3648
    {
3649 13
        if (self::$SUPPORT['mbstring'] === true) {
3650 13
            return \mb_ereg_match('^[[:xdigit:]]*$', $str);
3651
        }
3652
3653
        return self::str_matches_pattern($str, '^[[:xdigit:]]*$');
3654
    }
3655
3656
    /**
3657
     * Check if the string contains any HTML tags.
3658
     *
3659
     * EXAMPLE: <code>UTF8::is_html('<b>lall</b>'); // true</code>
3660
     *
3661
     * @param string $str <p>The input string.</p>
3662
     *
3663
     * @psalm-pure
3664
     *
3665
     * @return bool
3666
     *              <p>Whether or not $str contains html elements.</p>
3667
     */
3668 3
    public static function is_html(string $str): bool
3669
    {
3670 3
        if ($str === '') {
3671 3
            return false;
3672
        }
3673
3674
        // init
3675 3
        $matches = [];
3676
3677 3
        $str = self::emoji_encode($str); // hack for emoji support :/
3678
3679 3
        \preg_match("/<\\/?\\w+(?:(?:\\s+\\w+(?:\\s*=\\s*(?:\".*?\"|'.*?'|[^'\">\\s]+))?)*\\s*|\\s*)\\/?>/u", $str, $matches);
3680
3681 3
        return $matches !== [];
3682
    }
3683
3684
    /**
3685
     * Check if $url is an correct url.
3686
     *
3687
     * @param string $url
3688
     * @param bool   $disallow_localhost
3689
     *
3690
     * @psalm-pure
3691
     *
3692
     * @return bool
3693
     */
3694 1
    public static function is_url(string $url, bool $disallow_localhost = false): bool
3695
    {
3696 1
        if ($url === '') {
3697 1
            return false;
3698
        }
3699
3700
        // WARNING: keep this as hack protection
3701 1
        if (!self::str_istarts_with_any($url, ['http://', 'https://'])) {
3702 1
            return false;
3703
        }
3704
3705
        // e.g. -> the server itself connect to "https://foo.localhost/phpmyadmin/...
3706 1
        if ($disallow_localhost) {
3707 1
            if (self::str_istarts_with_any(
3708 1
                $url,
3709
                [
3710 1
                    'http://localhost',
3711
                    'https://localhost',
3712
                    'http://127.0.0.1',
3713
                    'https://127.0.0.1',
3714
                    'http://::1',
3715
                    'https://::1',
3716
                ]
3717
            )) {
3718 1
                return false;
3719
            }
3720
3721 1
            $regex = '/^(?:http(?:s)?:\/\/).*?(?:\.localhost)/iu';
3722 1
            if (\preg_match($regex, $url)) {
3723 1
                return false;
3724
            }
3725
        }
3726
3727
        // INFO: this is needed for e.g. "http://müller.de/" (internationalized domain names) and non ASCII-parameters
3728 1
        $regex = '/^(?:http(?:s)?:\\/\\/)(?:[\p{L}0-9][\p{L}0-9_-]*(?:\\.[\p{L}0-9][\p{L}0-9_-]*))(?:\\d+)?(?:\\/\\.*)?/iu';
3729 1
        if (\preg_match($regex, $url)) {
3730 1
            return true;
3731
        }
3732
3733 1
        return \filter_var($url, \FILTER_VALIDATE_URL) !== false;
3734
    }
3735
3736
    /**
3737
     * Try to check if "$str" is a JSON-string.
3738
     *
3739
     * EXAMPLE: <code>UTF8::is_json('{"array":[1,"¥","ä"]}'); // true</code>
3740
     *
3741
     * @param string $str                                    <p>The input string.</p>
3742
     * @param bool   $only_array_or_object_results_are_valid [optional] <p>Only array and objects are valid json
3743
     *                                                       results.</p>
3744
     *
3745
     * @return bool
3746
     *              <p>Whether or not the $str is in JSON format.</p>
3747
     */
3748 42
    public static function is_json(string $str, bool $only_array_or_object_results_are_valid = true): bool
3749
    {
3750 42
        if ($str === '') {
3751 4
            return false;
3752
        }
3753
3754 40
        if (self::$SUPPORT['json'] === false) {
3755
            throw new \RuntimeException('ext-json: is not installed');
3756
        }
3757
3758 40
        $jsonOrNull = self::json_decode($str);
3759 40
        if ($jsonOrNull === null && \strtoupper($str) !== 'NULL') {
3760 18
            return false;
3761
        }
3762
3763
        if (
3764 24
            $only_array_or_object_results_are_valid
3765
            &&
3766 24
            !\is_object($jsonOrNull)
3767
            &&
3768 24
            !\is_array($jsonOrNull)
3769
        ) {
3770 5
            return false;
3771
        }
3772
3773 19
        return \json_last_error() === \JSON_ERROR_NONE;
3774
    }
3775
3776
    /**
3777
     * @param string $str <p>The input string.</p>
3778
     *
3779
     * @psalm-pure
3780
     *
3781
     * @return bool
3782
     *              <p>Whether or not $str contains only lowercase chars.</p>
3783
     */
3784 8
    public static function is_lowercase(string $str): bool
3785
    {
3786 8
        if (self::$SUPPORT['mbstring'] === true) {
3787 8
            return \mb_ereg_match('^[[:lower:]]*$', $str);
3788
        }
3789
3790
        return self::str_matches_pattern($str, '^[[:lower:]]*$');
3791
    }
3792
3793
    /**
3794
     * Returns true if the string is serialized, false otherwise.
3795
     *
3796
     * @param string $str <p>The input string.</p>
3797
     *
3798
     * @psalm-pure
3799
     *
3800
     * @return bool
3801
     *              <p>Whether or not $str is serialized.</p>
3802
     */
3803 7
    public static function is_serialized(string $str): bool
3804
    {
3805 7
        if ($str === '') {
3806 1
            return false;
3807
        }
3808
3809
        /** @noinspection PhpUsageOfSilenceOperatorInspection */
3810
        /** @noinspection UnserializeExploitsInspection */
3811 6
        return $str === 'b:0;'
3812
               ||
3813 6
               @\unserialize($str, []) !== false;
3814
    }
3815
3816
    /**
3817
     * Returns true if the string contains only lower case chars, false
3818
     * otherwise.
3819
     *
3820
     * @param string $str <p>The input string.</p>
3821
     *
3822
     * @psalm-pure
3823
     *
3824
     * @return bool
3825
     *              <p>Whether or not $str contains only lower case characters.</p>
3826
     */
3827 8
    public static function is_uppercase(string $str): bool
3828
    {
3829 8
        if (self::$SUPPORT['mbstring'] === true) {
3830 8
            return \mb_ereg_match('^[[:upper:]]*$', $str);
3831
        }
3832
3833
        return self::str_matches_pattern($str, '^[[:upper:]]*$');
3834
    }
3835
3836
    /**
3837
     * Check if the string is UTF-16.
3838
     *
3839
     * EXAMPLE: <code>
3840
     * UTF8::is_utf16(file_get_contents('utf-16-le.txt')); // 1
3841
     * //
3842
     * UTF8::is_utf16(file_get_contents('utf-16-be.txt')); // 2
3843
     * //
3844
     * UTF8::is_utf16(file_get_contents('utf-8.txt')); // false
3845
     * </code>
3846
     *
3847
     * @param string $str                       <p>The input string.</p>
3848
     * @param bool   $check_if_string_is_binary
3849
     *
3850
     * @psalm-pure
3851
     *
3852
     * @return false|int
3853
     *                   <strong>false</strong> if is't not UTF-16,<br>
3854
     *                   <strong>1</strong> for UTF-16LE,<br>
3855
     *                   <strong>2</strong> for UTF-16BE
3856
     */
3857 21
    public static function is_utf16($str, bool $check_if_string_is_binary = true)
3858
    {
3859
        // init
3860 21
        $str = (string) $str;
3861 21
        $str_chars = [];
3862
3863
        if (
3864 21
            $check_if_string_is_binary
3865
            &&
3866 21
            !self::is_binary($str, true)
3867
        ) {
3868 2
            return false;
3869
        }
3870
3871 21
        if (self::$SUPPORT['mbstring'] === false) {
3872
            /**
3873
             * @psalm-suppress ImpureFunctionCall - is is only a warning
3874
             */
3875 3
            \trigger_error('UTF8::is_utf16() without mbstring may did not work correctly', \E_USER_WARNING);
3876
        }
3877
3878 21
        $str = self::remove_bom($str);
3879
3880 21
        $maybe_utf16le = 0;
3881 21
        $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-16LE');
3882 21
        if ($test) {
3883 21
            $test2 = \mb_convert_encoding($test, 'UTF-16LE', 'UTF-8');
3884 21
            $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-16LE');
3885 21
            if ($test3 === $test) {
3886
                /**
3887
                 * @psalm-suppress RedundantCondition
3888
                 */
3889 21
                if ($str_chars === []) {
3890 21
                    $str_chars = self::count_chars($str, true, false);
3891
                }
3892 21
                foreach (self::count_chars($test3) as $test3char => &$test3charEmpty) {
0 ignored issues
show
Bug introduced by
The expression self::count_chars($test3) cannot be used as a reference.

Let?s assume that you have the following foreach statement:

foreach ($array as &$itemValue) { }

$itemValue is assigned by reference. This is possible because the expression (in the example $array) can be used as a reference target.

However, if we were to replace $array with something different like the result of a function call as in

foreach (getArray() as &$itemValue) { }

then assigning by reference is not possible anymore as there is no target that could be modified.

Available Fixes

1. Do not assign by reference
foreach (getArray() as $itemValue) { }
2. Assign to a local variable first
$array = getArray();
foreach ($array as &$itemValue) {}
3. Return a reference
function &getArray() { $array = array(); return $array; }

foreach (getArray() as &$itemValue) { }
Loading history...
Bug introduced by
It seems like $test3 can also be of type array; however, parameter $str of voku\helper\UTF8::count_chars() does only seem to accept string, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

3892
                foreach (self::count_chars(/** @scrutinizer ignore-type */ $test3) as $test3char => &$test3charEmpty) {
Loading history...
3893 21
                    if (\in_array($test3char, $str_chars, true)) {
3894 5
                        ++$maybe_utf16le;
3895
                    }
3896
                }
3897 21
                unset($test3charEmpty);
3898
            }
3899
        }
3900
3901 21
        $maybe_utf16be = 0;
3902 21
        $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-16BE');
3903 21
        if ($test) {
3904 21
            $test2 = \mb_convert_encoding($test, 'UTF-16BE', 'UTF-8');
3905 21
            $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-16BE');
3906 21
            if ($test3 === $test) {
3907 21
                if ($str_chars === []) {
3908 11
                    $str_chars = self::count_chars($str, true, false);
3909
                }
3910 21
                foreach (self::count_chars($test3) as $test3char => &$test3charEmpty) {
0 ignored issues
show
Bug introduced by
The expression self::count_chars($test3) cannot be used as a reference.

Let?s assume that you have the following foreach statement:

foreach ($array as &$itemValue) { }

$itemValue is assigned by reference. This is possible because the expression (in the example $array) can be used as a reference target.

However, if we were to replace $array with something different like the result of a function call as in

foreach (getArray() as &$itemValue) { }

then assigning by reference is not possible anymore as there is no target that could be modified.

Available Fixes

1. Do not assign by reference
foreach (getArray() as $itemValue) { }
2. Assign to a local variable first
$array = getArray();
foreach ($array as &$itemValue) {}
3. Return a reference
function &getArray() { $array = array(); return $array; }

foreach (getArray() as &$itemValue) { }
Loading history...
3911 21
                    if (\in_array($test3char, $str_chars, true)) {
3912 6
                        ++$maybe_utf16be;
3913
                    }
3914
                }
3915 21
                unset($test3charEmpty);
3916
            }
3917
        }
3918
3919 21
        if ($maybe_utf16be !== $maybe_utf16le) {
3920 7
            if ($maybe_utf16le > $maybe_utf16be) {
3921 5
                return 1;
3922
            }
3923
3924 6
            return 2;
3925
        }
3926
3927 17
        return false;
3928
    }
3929
3930
    /**
3931
     * Check if the string is UTF-32.
3932
     *
3933
     * EXAMPLE: <code>
3934
     * UTF8::is_utf32(file_get_contents('utf-32-le.txt')); // 1
3935
     * //
3936
     * UTF8::is_utf32(file_get_contents('utf-32-be.txt')); // 2
3937
     * //
3938
     * UTF8::is_utf32(file_get_contents('utf-8.txt')); // false
3939
     * </code>
3940
     *
3941
     * @param string $str                       <p>The input string.</p>
3942
     * @param bool   $check_if_string_is_binary
3943
     *
3944
     * @psalm-pure
3945
     *
3946
     * @return false|int
3947
     *                   <strong>false</strong> if is't not UTF-32,<br>
3948
     *                   <strong>1</strong> for UTF-32LE,<br>
3949
     *                   <strong>2</strong> for UTF-32BE
3950
     */
3951 19
    public static function is_utf32($str, bool $check_if_string_is_binary = true)
3952
    {
3953
        // init
3954 19
        $str = (string) $str;
3955 19
        $str_chars = [];
3956
3957
        if (
3958 19
            $check_if_string_is_binary
3959
            &&
3960 19
            !self::is_binary($str, true)
3961
        ) {
3962 2
            return false;
3963
        }
3964
3965 19
        if (self::$SUPPORT['mbstring'] === false) {
3966
            /**
3967
             * @psalm-suppress ImpureFunctionCall - is is only a warning
3968
             */
3969 3
            \trigger_error('UTF8::is_utf32() without mbstring may did not work correctly', \E_USER_WARNING);
3970
        }
3971
3972 19
        $str = self::remove_bom($str);
3973
3974 19
        $maybe_utf32le = 0;
3975 19
        $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-32LE');
3976 19
        if ($test) {
3977 19
            $test2 = \mb_convert_encoding($test, 'UTF-32LE', 'UTF-8');
3978 19
            $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-32LE');
3979 19
            if ($test3 === $test) {
3980
                /**
3981
                 * @psalm-suppress RedundantCondition
3982
                 */
3983 19
                if ($str_chars === []) {
3984 19
                    $str_chars = self::count_chars($str, true, false);
3985
                }
3986 19
                foreach (self::count_chars($test3) as $test3char => &$test3charEmpty) {
0 ignored issues
show
Bug introduced by
It seems like $test3 can also be of type array; however, parameter $str of voku\helper\UTF8::count_chars() does only seem to accept string, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

3986
                foreach (self::count_chars(/** @scrutinizer ignore-type */ $test3) as $test3char => &$test3charEmpty) {
Loading history...
Bug introduced by
The expression self::count_chars($test3) cannot be used as a reference.

Let?s assume that you have the following foreach statement:

foreach ($array as &$itemValue) { }

$itemValue is assigned by reference. This is possible because the expression (in the example $array) can be used as a reference target.

However, if we were to replace $array with something different like the result of a function call as in

foreach (getArray() as &$itemValue) { }

then assigning by reference is not possible anymore as there is no target that could be modified.

Available Fixes

1. Do not assign by reference
foreach (getArray() as $itemValue) { }
2. Assign to a local variable first
$array = getArray();
foreach ($array as &$itemValue) {}
3. Return a reference
function &getArray() { $array = array(); return $array; }

foreach (getArray() as &$itemValue) { }
Loading history...
3987 19
                    if (\in_array($test3char, $str_chars, true)) {
3988 2
                        ++$maybe_utf32le;
3989
                    }
3990
                }
3991 19
                unset($test3charEmpty);
3992
            }
3993
        }
3994
3995 19
        $maybe_utf32be = 0;
3996 19
        $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-32BE');
3997 19
        if ($test) {
3998 19
            $test2 = \mb_convert_encoding($test, 'UTF-32BE', 'UTF-8');
3999 19
            $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-32BE');
4000 19
            if ($test3 === $test) {
4001 19
                if ($str_chars === []) {
4002 11
                    $str_chars = self::count_chars($str, true, false);
4003
                }
4004 19
                foreach (self::count_chars($test3) as $test3char => &$test3charEmpty) {
0 ignored issues
show
Bug introduced by
The expression self::count_chars($test3) cannot be used as a reference.

Let?s assume that you have the following foreach statement:

foreach ($array as &$itemValue) { }

$itemValue is assigned by reference. This is possible because the expression (in the example $array) can be used as a reference target.

However, if we were to replace $array with something different like the result of a function call as in

foreach (getArray() as &$itemValue) { }

then assigning by reference is not possible anymore as there is no target that could be modified.

Available Fixes

1. Do not assign by reference
foreach (getArray() as $itemValue) { }
2. Assign to a local variable first
$array = getArray();
foreach ($array as &$itemValue) {}
3. Return a reference
function &getArray() { $array = array(); return $array; }

foreach (getArray() as &$itemValue) { }
Loading history...
4005 19
                    if (\in_array($test3char, $str_chars, true)) {
4006 3
                        ++$maybe_utf32be;
4007
                    }
4008
                }
4009 19
                unset($test3charEmpty);
4010
            }
4011
        }
4012
4013 19
        if ($maybe_utf32be !== $maybe_utf32le) {
4014 3
            if ($maybe_utf32le > $maybe_utf32be) {
4015 2
                return 1;
4016
            }
4017
4018 3
            return 2;
4019
        }
4020
4021 19
        return false;
4022
    }
4023
4024
    /**
4025
     * Checks whether the passed input contains only byte sequences that appear valid UTF-8.
4026
     *
4027
     * EXAMPLE: <code>
4028
     * UTF8::is_utf8(['Iñtërnâtiônàlizætiøn', 'foo']); // true
4029
     * //
4030
     * UTF8::is_utf8(["Iñtërnâtiônàlizætiøn\xA0\xA1", 'bar']); // false
4031
     * </code>
4032
     *
4033
     * @param int|string|string[]|null $str    <p>The input to be checked.</p>
4034
     * @param bool                     $strict <p>Check also if the string is not UTF-16 or UTF-32.</p>
4035
     *
4036
     * @psalm-pure
4037
     *
4038
     * @return bool
4039
     */
4040 83
    public static function is_utf8($str, bool $strict = false): bool
4041
    {
4042 83
        if (\is_array($str)) {
4043 2
            foreach ($str as &$v) {
4044 2
                if (!self::is_utf8($v, $strict)) {
4045 2
                    return false;
4046
                }
4047
            }
4048
4049
            return true;
4050
        }
4051
4052 83
        return self::is_utf8_string((string) $str, $strict);
4053
    }
4054
4055
    /**
4056
     * (PHP 5 &gt;= 5.2.0, PECL json &gt;= 1.2.0)<br/>
4057
     * Decodes a JSON string
4058
     *
4059
     * EXAMPLE: <code>UTF8::json_decode('[1,"\u00a5","\u00e4"]'); // array(1, '¥', 'ä')</code>
4060
     *
4061
     * @see http://php.net/manual/en/function.json-decode.php
4062
     *
4063
     * @param string $json    <p>
4064
     *                        The <i>json</i> string being decoded.
4065
     *                        </p>
4066
     *                        <p>
4067
     *                        This function only works with UTF-8 encoded strings.
4068
     *                        </p>
4069
     *                        <p>PHP implements a superset of
4070
     *                        JSON - it will also encode and decode scalar types and <b>NULL</b>. The JSON standard
4071
     *                        only supports these values when they are nested inside an array or an object.
4072
     *                        </p>
4073
     * @param bool   $assoc   [optional] <p>
4074
     *                        When <b>TRUE</b>, returned objects will be converted into
4075
     *                        associative arrays.
4076
     *                        </p>
4077
     * @param int    $depth   [optional] <p>
4078
     *                        User specified recursion depth.
4079
     *                        </p>
4080
     * @param int    $options [optional] <p>
4081
     *                        Bitmask of JSON decode options. Currently only
4082
     *                        <b>JSON_BIGINT_AS_STRING</b>
4083
     *                        is supported (default is to cast large integers as floats)
4084
     *                        </p>
4085
     *
4086
     * @psalm-pure
4087
     *
4088
     * @return mixed
4089
     *               <p>The value encoded in <i>json</i> in appropriate PHP type. Values true, false and
4090
     *               null (case-insensitive) are returned as <b>TRUE</b>, <b>FALSE</b> and <b>NULL</b> respectively.
4091
     *               <b>NULL</b> is returned if the <i>json</i> cannot be decoded or if the encoded data
4092
     *               is deeper than the recursion limit.</p>
4093
     */
4094 43
    public static function json_decode(
4095
        string $json,
4096
        bool $assoc = false,
4097
        int $depth = 512,
4098
        int $options = 0
4099
    ) {
4100 43
        $json = self::filter($json);
4101
4102 43
        if (self::$SUPPORT['json'] === false) {
4103
            throw new \RuntimeException('ext-json: is not installed');
4104
        }
4105
4106 43
        return \json_decode($json, $assoc, $depth, $options);
4107
    }
4108
4109
    /**
4110
     * (PHP 5 &gt;= 5.2.0, PECL json &gt;= 1.2.0)<br/>
4111
     * Returns the JSON representation of a value.
4112
     *
4113
     * EXAMPLE: <code>UTF8::json_enocde(array(1, '¥', 'ä')); // '[1,"\u00a5","\u00e4"]'</code>
4114
     *
4115
     * @see http://php.net/manual/en/function.json-encode.php
4116
     *
4117
     * @param mixed $value   <p>
4118
     *                       The <i>value</i> being encoded. Can be any type except
4119
     *                       a resource.
4120
     *                       </p>
4121
     *                       <p>
4122
     *                       All string data must be UTF-8 encoded.
4123
     *                       </p>
4124
     *                       <p>PHP implements a superset of
4125
     *                       JSON - it will also encode and decode scalar types and <b>NULL</b>. The JSON standard
4126
     *                       only supports these values when they are nested inside an array or an object.
4127
     *                       </p>
4128
     * @param int   $options [optional] <p>
4129
     *                       Bitmask consisting of <b>JSON_HEX_QUOT</b>,
4130
     *                       <b>JSON_HEX_TAG</b>,
4131
     *                       <b>JSON_HEX_AMP</b>,
4132
     *                       <b>JSON_HEX_APOS</b>,
4133
     *                       <b>JSON_NUMERIC_CHECK</b>,
4134
     *                       <b>JSON_PRETTY_PRINT</b>,
4135
     *                       <b>JSON_UNESCAPED_SLASHES</b>,
4136
     *                       <b>JSON_FORCE_OBJECT</b>,
4137
     *                       <b>JSON_UNESCAPED_UNICODE</b>. The behaviour of these
4138
     *                       constants is described on
4139
     *                       the JSON constants page.
4140
     *                       </p>
4141
     * @param int   $depth   [optional] <p>
4142
     *                       Set the maximum depth. Must be greater than zero.
4143
     *                       </p>
4144
     *
4145
     * @psalm-pure
4146
     *
4147
     * @return false|string
4148
     *                      A JSON encoded <strong>string</strong> on success or<br>
4149
     *                      <strong>FALSE</strong> on failure
4150
     */
4151 5
    public static function json_encode($value, int $options = 0, int $depth = 512)
4152
    {
4153 5
        $value = self::filter($value);
4154
4155 5
        if (self::$SUPPORT['json'] === false) {
4156
            throw new \RuntimeException('ext-json: is not installed');
4157
        }
4158
4159 5
        return \json_encode($value, $options, $depth);
4160
    }
4161
4162
    /**
4163
     * Checks whether JSON is available on the server.
4164
     *
4165
     * @psalm-pure
4166
     *
4167
     * @return bool
4168
     *              <p><strong>true</strong> if available, <strong>false</strong> otherwise</p>
4169
     *
4170
     * @internal <p>Please do not use it anymore, we will make is private in next major version.</p>
4171
     */
4172
    public static function json_loaded(): bool
4173
    {
4174
        return \function_exists('json_decode');
4175
    }
4176
4177
    /**
4178
     * Makes string's first char lowercase.
4179
     *
4180
     * EXAMPLE: <code>UTF8::lcfirst('ÑTËRNÂTIÔNÀLIZÆTIØN'); // ñTËRNÂTIÔNÀLIZÆTIØN</code>
4181
     *
4182
     * @param string      $str                           <p>The input string</p>
4183
     * @param string      $encoding                      [optional] <p>Set the charset for e.g. "mb_" function</p>
4184
     * @param bool        $clean_utf8                    [optional] <p>Remove non UTF-8 chars from the string.</p>
4185
     * @param string|null $lang                          [optional] <p>Set the language for special cases: az, el, lt,
4186
     *                                                   tr</p>
4187
     * @param bool        $try_to_keep_the_string_length [optional] <p>true === try to keep the string length: e.g. ẞ
4188
     *                                                   -> ß</p>
4189
     *
4190
     * @psalm-pure
4191
     *
4192
     * @return string the resulting string
4193
     */
4194 46
    public static function lcfirst(
4195
        string $str,
4196
        string $encoding = 'UTF-8',
4197
        bool $clean_utf8 = false,
4198
        string $lang = null,
4199
        bool $try_to_keep_the_string_length = false
4200
    ): string {
4201 46
        if ($clean_utf8) {
4202
            $str = self::clean($str);
4203
        }
4204
4205 46
        $use_mb_functions = ($lang === null && !$try_to_keep_the_string_length);
4206
4207 46
        if ($encoding === 'UTF-8') {
4208 43
            $str_part_two = (string) \mb_substr($str, 1);
4209
4210 43
            if ($use_mb_functions) {
4211 43
                $str_part_one = \mb_strtolower(
4212 43
                    (string) \mb_substr($str, 0, 1)
4213
                );
4214
            } else {
4215 43
                $str_part_one = self::strtolower(
4216
                    (string) \mb_substr($str, 0, 1),
4217
                    $encoding,
4218
                    false,
4219
                    $lang,
4220
                    $try_to_keep_the_string_length
4221
                );
4222
            }
4223
        } else {
4224 3
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
4225
4226 3
            $str_part_two = (string) self::substr($str, 1, null, $encoding);
4227
4228 3
            $str_part_one = self::strtolower(
4229 3
                (string) self::substr($str, 0, 1, $encoding),
4230
                $encoding,
4231 3
                false,
4232
                $lang,
4233
                $try_to_keep_the_string_length
4234
            );
4235
        }
4236
4237 46
        return $str_part_one . $str_part_two;
4238
    }
4239
4240
    /**
4241
     * Lowercase for all words in the string.
4242
     *
4243
     * @param string      $str                           <p>The input string.</p>
4244
     * @param string[]    $exceptions                    [optional] <p>Exclusion for some words.</p>
4245
     * @param string      $char_list                     [optional] <p>Additional chars that contains to words and do
4246
     *                                                   not start a new word.</p>
4247
     * @param string      $encoding                      [optional] <p>Set the charset.</p>
4248
     * @param bool        $clean_utf8                    [optional] <p>Remove non UTF-8 chars from the string.</p>
4249
     * @param string|null $lang                          [optional] <p>Set the language for special cases: az, el, lt,
4250
     *                                                   tr</p>
4251
     * @param bool        $try_to_keep_the_string_length [optional] <p>true === try to keep the string length: e.g. ẞ
4252
     *                                                   -> ß</p>
4253
     *
4254
     * @psalm-pure
4255
     *
4256
     * @return string
4257
     */
4258 4
    public static function lcwords(
4259
        string $str,
4260
        array $exceptions = [],
4261
        string $char_list = '',
4262
        string $encoding = 'UTF-8',
4263
        bool $clean_utf8 = false,
4264
        string $lang = null,
4265
        bool $try_to_keep_the_string_length = false
4266
    ): string {
4267 4
        if (!$str) {
4268 2
            return '';
4269
        }
4270
4271 4
        $words = self::str_to_words($str, $char_list);
4272 4
        $use_exceptions = $exceptions !== [];
4273
4274 4
        $words_str = '';
4275 4
        foreach ($words as &$word) {
4276 4
            if (!$word) {
4277 4
                continue;
4278
            }
4279
4280
            if (
4281 4
                !$use_exceptions
4282
                ||
4283 4
                !\in_array($word, $exceptions, true)
4284
            ) {
4285 4
                $words_str .= self::lcfirst($word, $encoding, $clean_utf8, $lang, $try_to_keep_the_string_length);
4286
            } else {
4287 2
                $words_str .= $word;
4288
            }
4289
        }
4290
4291 4
        return $words_str;
4292
    }
4293
4294
    /**
4295
     * Strip whitespace or other characters from the beginning of a UTF-8 string.
4296
     *
4297
     * EXAMPLE: <code>UTF8::ltrim(' 中文空白  '); // '中文空白  '</code>
4298
     *
4299
     * @param string      $str   <p>The string to be trimmed</p>
4300
     * @param string|null $chars <p>Optional characters to be stripped</p>
4301
     *
4302
     * @psalm-pure
4303
     *
4304
     * @return string the string with unwanted characters stripped from the left
4305
     */
4306 23
    public static function ltrim(string $str = '', string $chars = null): string
4307
    {
4308 23
        if ($str === '') {
4309 3
            return '';
4310
        }
4311
4312 22
        if (self::$SUPPORT['mbstring'] === true) {
4313 22
            if ($chars !== null) {
4314
                /** @noinspection PregQuoteUsageInspection */
4315 11
                $chars = \preg_quote($chars);
4316 11
                $pattern = "^[${chars}]+";
4317
            } else {
4318 14
                $pattern = '^[\\s]+';
4319
            }
4320
4321 22
            return (string) \mb_ereg_replace($pattern, '', $str);
4322
        }
4323
4324
        if ($chars !== null) {
4325
            $chars = \preg_quote($chars, '/');
4326
            $pattern = "^[${chars}]+";
4327
        } else {
4328
            $pattern = '^[\\s]+';
4329
        }
4330
4331
        return self::regex_replace($str, $pattern, '');
4332
    }
4333
4334
    /**
4335
     * Returns the UTF-8 character with the maximum code point in the given data.
4336
     *
4337
     * EXAMPLE: <code>UTF8::max('abc-äöü-中文空白'); // 'ø'</code>
4338
     *
4339
     * @param array<string>|string $arg <p>A UTF-8 encoded string or an array of such strings.</p>
4340
     *
4341
     * @psalm-pure
4342
     *
4343
     * @return string|null the character with the highest code point than others, returns null on failure or empty input
4344
     */
4345 2
    public static function max($arg)
4346
    {
4347 2
        if (\is_array($arg)) {
4348 2
            $arg = \implode('', $arg);
4349
        }
4350
4351 2
        $codepoints = self::codepoints($arg);
4352 2
        if ($codepoints === []) {
4353 2
            return null;
4354
        }
4355
4356 2
        $codepoint_max = \max($codepoints);
4357
4358 2
        return self::chr((int) $codepoint_max);
4359
    }
4360
4361
    /**
4362
     * Calculates and returns the maximum number of bytes taken by any
4363
     * UTF-8 encoded character in the given string.
4364
     *
4365
     * EXAMPLE: <code>UTF8::max_chr_width('Intërnâtiônàlizætiøn'); // 2</code>
4366
     *
4367
     * @param string $str <p>The original Unicode string.</p>
4368
     *
4369
     * @psalm-pure
4370
     *
4371
     * @return int
4372
     *             <p>Max byte lengths of the given chars.</p>
4373
     */
4374 2
    public static function max_chr_width(string $str): int
4375
    {
4376 2
        $bytes = self::chr_size_list($str);
4377 2
        if ($bytes !== []) {
4378 2
            return (int) \max($bytes);
4379
        }
4380
4381 2
        return 0;
4382
    }
4383
4384
    /**
4385
     * Checks whether mbstring is available on the server.
4386
     *
4387
     * @psalm-pure
4388
     *
4389
     * @return bool
4390
     *              <p><strong>true</strong> if available, <strong>false</strong> otherwise</p>
4391
     *
4392
     * @internal <p>Please do not use it anymore, we will make is private in next major version.</p>
4393
     */
4394 29
    public static function mbstring_loaded(): bool
4395
    {
4396 29
        return \extension_loaded('mbstring');
4397
    }
4398
4399
    /**
4400
     * Returns the UTF-8 character with the minimum code point in the given data.
4401
     *
4402
     * EXAMPLE: <code>UTF8::min('abc-äöü-中文空白'); // '-'</code>
4403
     *
4404
     * @param string|string[] $arg <strong>A UTF-8 encoded string or an array of such strings.</strong>
4405
     *
4406
     * @psalm-pure
4407
     *
4408
     * @return string|null
4409
     *                     <p>The character with the lowest code point than others, returns null on failure or empty input.</p>
4410
     */
4411 2
    public static function min($arg)
4412
    {
4413 2
        if (\is_array($arg)) {
4414 2
            $arg = \implode('', $arg);
4415
        }
4416
4417 2
        $codepoints = self::codepoints($arg);
4418 2
        if ($codepoints === []) {
4419 2
            return null;
4420
        }
4421
4422 2
        $codepoint_min = \min($codepoints);
4423
4424 2
        return self::chr((int) $codepoint_min);
4425
    }
4426
4427
    /**
4428
     * Normalize the encoding-"name" input.
4429
     *
4430
     * EXAMPLE: <code>UTF8::normalize_encoding('UTF8'); // 'UTF-8'</code>
4431
     *
4432
     * @param mixed $encoding <p>e.g.: ISO, UTF8, WINDOWS-1251 etc.</p>
4433
     * @param mixed $fallback <p>e.g.: UTF-8</p>
4434
     *
4435
     * @psalm-pure
4436
     *
4437
     * @return mixed|string
4438
     *                      <p>e.g.: ISO-8859-1, UTF-8, WINDOWS-1251 etc.<br>Will return a empty string as fallback (by default)</p>
4439
     *
4440
     * @template TNormalizeEncodingFallback
4441
     * @phpstan-param string|TNormalizeEncodingFallback $fallback
4442
     * @phpstan-return string|TNormalizeEncodingFallback
4443
     */
4444 339
    public static function normalize_encoding($encoding, $fallback = '')
4445
    {
4446
        /**
4447
         * @psalm-suppress ImpureStaticVariable
4448
         *
4449
         * @var array<string,string>
4450
         */
4451 339
        static $STATIC_NORMALIZE_ENCODING_CACHE = [];
4452
4453
        // init
4454 339
        $encoding = (string) $encoding;
4455
4456 339
        if (!$encoding) {
4457 290
            return $fallback;
4458
        }
4459
4460
        if (
4461 53
            $encoding === 'UTF-8'
4462
            ||
4463 53
            $encoding === 'UTF8'
4464
        ) {
4465 29
            return 'UTF-8';
4466
        }
4467
4468
        if (
4469 44
            $encoding === '8BIT'
4470
            ||
4471 44
            $encoding === 'BINARY'
4472
        ) {
4473
            return 'CP850';
4474
        }
4475
4476
        if (
4477 44
            $encoding === 'HTML'
4478
            ||
4479 44
            $encoding === 'HTML-ENTITIES'
4480
        ) {
4481 2
            return 'HTML-ENTITIES';
4482
        }
4483
4484
        if (
4485 44
            $encoding === 'ISO'
4486
            ||
4487 44
            $encoding === 'ISO-8859-1'
4488
        ) {
4489 41
            return 'ISO-8859-1';
4490
        }
4491
4492
        if (
4493 11
            $encoding === '1' // only a fallback, for non "strict_types" usage ...
4494
            ||
4495 11
            $encoding === '0' // only a fallback, for non "strict_types" usage ...
4496
        ) {
4497
            return $fallback;
4498
        }
4499
4500 11
        if (isset($STATIC_NORMALIZE_ENCODING_CACHE[$encoding])) {
4501 8
            return $STATIC_NORMALIZE_ENCODING_CACHE[$encoding];
4502
        }
4503
4504 5
        if (self::$ENCODINGS === null) {
4505 1
            self::$ENCODINGS = self::getData('encodings');
4506
        }
4507
4508 5
        if (\in_array($encoding, self::$ENCODINGS, true)) {
4509 3
            $STATIC_NORMALIZE_ENCODING_CACHE[$encoding] = $encoding;
4510
4511 3
            return $encoding;
4512
        }
4513
4514 4
        $encoding_original = $encoding;
4515 4
        $encoding = \strtoupper($encoding);
4516 4
        $encoding_upper_helper = (string) \preg_replace('/[^a-zA-Z0-9]/u', '', $encoding);
4517
4518 4
        $equivalences = [
4519
            'ISO8859'     => 'ISO-8859-1',
4520
            'ISO88591'    => 'ISO-8859-1',
4521
            'ISO'         => 'ISO-8859-1',
4522
            'LATIN'       => 'ISO-8859-1',
4523
            'LATIN1'      => 'ISO-8859-1', // Western European
4524
            'ISO88592'    => 'ISO-8859-2',
4525
            'LATIN2'      => 'ISO-8859-2', // Central European
4526
            'ISO88593'    => 'ISO-8859-3',
4527
            'LATIN3'      => 'ISO-8859-3', // Southern European
4528
            'ISO88594'    => 'ISO-8859-4',
4529
            'LATIN4'      => 'ISO-8859-4', // Northern European
4530
            'ISO88595'    => 'ISO-8859-5',
4531
            'ISO88596'    => 'ISO-8859-6', // Greek
4532
            'ISO88597'    => 'ISO-8859-7',
4533
            'ISO88598'    => 'ISO-8859-8', // Hebrew
4534
            'ISO88599'    => 'ISO-8859-9',
4535
            'LATIN5'      => 'ISO-8859-9', // Turkish
4536
            'ISO885911'   => 'ISO-8859-11',
4537
            'TIS620'      => 'ISO-8859-11', // Thai
4538
            'ISO885910'   => 'ISO-8859-10',
4539
            'LATIN6'      => 'ISO-8859-10', // Nordic
4540
            'ISO885913'   => 'ISO-8859-13',
4541
            'LATIN7'      => 'ISO-8859-13', // Baltic
4542
            'ISO885914'   => 'ISO-8859-14',
4543
            'LATIN8'      => 'ISO-8859-14', // Celtic
4544
            'ISO885915'   => 'ISO-8859-15',
4545
            'LATIN9'      => 'ISO-8859-15', // Western European (with some extra chars e.g. €)
4546
            'ISO885916'   => 'ISO-8859-16',
4547
            'LATIN10'     => 'ISO-8859-16', // Southeast European
4548
            'CP1250'      => 'WINDOWS-1250',
4549
            'WIN1250'     => 'WINDOWS-1250',
4550
            'WINDOWS1250' => 'WINDOWS-1250',
4551
            'CP1251'      => 'WINDOWS-1251',
4552
            'WIN1251'     => 'WINDOWS-1251',
4553
            'WINDOWS1251' => 'WINDOWS-1251',
4554
            'CP1252'      => 'WINDOWS-1252',
4555
            'WIN1252'     => 'WINDOWS-1252',
4556
            'WINDOWS1252' => 'WINDOWS-1252',
4557
            'CP1253'      => 'WINDOWS-1253',
4558
            'WIN1253'     => 'WINDOWS-1253',
4559
            'WINDOWS1253' => 'WINDOWS-1253',
4560
            'CP1254'      => 'WINDOWS-1254',
4561
            'WIN1254'     => 'WINDOWS-1254',
4562
            'WINDOWS1254' => 'WINDOWS-1254',
4563
            'CP1255'      => 'WINDOWS-1255',
4564
            'WIN1255'     => 'WINDOWS-1255',
4565
            'WINDOWS1255' => 'WINDOWS-1255',
4566
            'CP1256'      => 'WINDOWS-1256',
4567
            'WIN1256'     => 'WINDOWS-1256',
4568
            'WINDOWS1256' => 'WINDOWS-1256',
4569
            'CP1257'      => 'WINDOWS-1257',
4570
            'WIN1257'     => 'WINDOWS-1257',
4571
            'WINDOWS1257' => 'WINDOWS-1257',
4572
            'CP1258'      => 'WINDOWS-1258',
4573
            'WIN1258'     => 'WINDOWS-1258',
4574
            'WINDOWS1258' => 'WINDOWS-1258',
4575
            'UTF16'       => 'UTF-16',
4576
            'UTF32'       => 'UTF-32',
4577
            'UTF8'        => 'UTF-8',
4578
            'UTF'         => 'UTF-8',
4579
            'UTF7'        => 'UTF-7',
4580
            '8BIT'        => 'CP850',
4581
            'BINARY'      => 'CP850',
4582
        ];
4583
4584 4
        if (!empty($equivalences[$encoding_upper_helper])) {
4585 3
            $encoding = $equivalences[$encoding_upper_helper];
4586
        }
4587
4588 4
        $STATIC_NORMALIZE_ENCODING_CACHE[$encoding_original] = $encoding;
4589
4590 4
        return $encoding;
4591
    }
4592
4593
    /**
4594
     * Standardize line ending to unix-like.
4595
     *
4596
     * @param string          $str      <p>The input string.</p>
4597
     * @param string|string[] $replacer <p>The replacer char e.g. "\n" (Linux) or "\r\n" (Windows). You can also use \PHP_EOL
4598
     *                                  here.</p>
4599
     *
4600
     * @psalm-pure
4601
     *
4602
     * @return string
4603
     *                <p>A string with normalized line ending.</p>
4604
     */
4605 4
    public static function normalize_line_ending(string $str, $replacer = "\n"): string
4606
    {
4607 4
        return \str_replace(["\r\n", "\r", "\n"], $replacer, $str);
4608
    }
4609
4610
    /**
4611
     * Normalize some MS Word special characters.
4612
     *
4613
     * EXAMPLE: <code>UTF8::normalize_msword('„Abcdef…”'); // '"Abcdef..."'</code>
4614
     *
4615
     * @param string $str <p>The string to be normalized.</p>
4616
     *
4617
     * @psalm-pure
4618
     *
4619
     * @return string
4620
     *                <p>A string with normalized characters for commonly used chars in Word documents.</p>
4621
     */
4622 10
    public static function normalize_msword(string $str): string
4623
    {
4624 10
        return ASCII::normalize_msword($str);
4625
    }
4626
4627
    /**
4628
     * Normalize the whitespace.
4629
     *
4630
     * EXAMPLE: <code>UTF8::normalize_whitespace("abc-\xc2\xa0-öäü-\xe2\x80\xaf-\xE2\x80\xAC", true); // "abc-\xc2\xa0-öäü- -"</code>
4631
     *
4632
     * @param string $str                          <p>The string to be normalized.</p>
4633
     * @param bool   $keep_non_breaking_space      [optional] <p>Set to true, to keep non-breaking-spaces.</p>
4634
     * @param bool   $keep_bidi_unicode_controls   [optional] <p>Set to true, to keep non-printable (for the web)
4635
     *                                             bidirectional text chars.</p>
4636
     * @param bool   $normalize_control_characters [optional] <p>Set to true, to convert e.g. LINE-, PARAGRAPH-SEPARATOR with "\n" and LINE TABULATION with "\t".</p>
4637
     *
4638
     * @psalm-pure
4639
     *
4640
     * @return string
4641
     *                <p>A string with normalized whitespace.</p>
4642
     */
4643 61
    public static function normalize_whitespace(
4644
        string $str,
4645
        bool $keep_non_breaking_space = false,
4646
        bool $keep_bidi_unicode_controls = false,
4647
        bool $normalize_control_characters = false
4648
    ): string {
4649 61
        return ASCII::normalize_whitespace(
4650 61
            $str,
4651
            $keep_non_breaking_space,
4652
            $keep_bidi_unicode_controls,
4653
            $normalize_control_characters
4654
        );
4655
    }
4656
4657
    /**
4658
     * Calculates Unicode code point of the given UTF-8 encoded character.
4659
     *
4660
     * INFO: opposite to UTF8::chr()
4661
     *
4662
     * EXAMPLE: <code>UTF8::ord('☃'); // 0x2603</code>
4663
     *
4664
     * @param string $chr      <p>The character of which to calculate code point.<p/>
4665
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
4666
     *
4667
     * @psalm-pure
4668
     *
4669
     * @return int
4670
     *             <p>Unicode code point of the given character,<br>
4671
     *             0 on invalid UTF-8 byte sequence</p>
4672
     */
4673 27
    public static function ord($chr, string $encoding = 'UTF-8'): int
4674
    {
4675
        /**
4676
         * @psalm-suppress ImpureStaticVariable
4677
         *
4678
         * @var array<string,int>
4679
         */
4680 27
        static $CHAR_CACHE = [];
4681
4682
        // init
4683 27
        $chr = (string) $chr;
4684
4685 27
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
4686 5
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
4687
        }
4688
4689 27
        $cache_key = $chr . '_' . $encoding;
4690 27
        if (isset($CHAR_CACHE[$cache_key])) {
4691 27
            return $CHAR_CACHE[$cache_key];
4692
        }
4693
4694
        // check again, if it's still not UTF-8
4695 11
        if ($encoding !== 'UTF-8') {
4696 3
            $chr = self::encode($encoding, $chr);
4697
        }
4698
4699 11
        if (self::$ORD === null) {
4700 1
            self::$ORD = self::getData('ord');
4701
        }
4702
4703 11
        if (isset(self::$ORD[$chr])) {
4704 11
            return $CHAR_CACHE[$cache_key] = self::$ORD[$chr];
4705
        }
4706
4707
        //
4708
        // fallback via "IntlChar"
4709
        //
4710
4711 6
        if (self::$SUPPORT['intlChar'] === true) {
4712 5
            $code = \IntlChar::ord($chr);
4713 5
            if ($code) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $code of type integer|null is loosely compared to true; this is ambiguous if the integer can be 0. You might want to explicitly use !== null instead.

In PHP, under loose comparison (like ==, or !=, or switch conditions), values of different types might be equal.

For integer values, zero is a special case, in particular the following results might be unexpected:

0   == false // true
0   == null  // true
123 == false // false
123 == null  // false

// It is often better to use strict comparison
0 === false // false
0 === null  // false
Loading history...
4714 5
                return $CHAR_CACHE[$cache_key] = $code;
4715
            }
4716
        }
4717
4718
        //
4719
        // fallback via vanilla php
4720
        //
4721
4722 1
        $chr = \unpack('C*', (string) \substr($chr, 0, 4));
4723
        /** @noinspection PhpSillyAssignmentInspection - hack for phpstan */
4724
        /** @var int[] $chr - "unpack": only false if the format string contains errors */
4725 1
        $chr = $chr;
4726 1
        $code = $chr ? $chr[1] : 0;
4727
4728 1
        if ($code >= 0xF0 && isset($chr[4])) {
4729
            return $CHAR_CACHE[$cache_key] = ((($code - 0xF0) << 18) + (($chr[2] - 0x80) << 12) + (($chr[3] - 0x80) << 6) + $chr[4] - 0x80);
4730
        }
4731
4732 1
        if ($code >= 0xE0 && isset($chr[3])) {
4733 1
            return $CHAR_CACHE[$cache_key] = ((($code - 0xE0) << 12) + (($chr[2] - 0x80) << 6) + $chr[3] - 0x80);
4734
        }
4735
4736 1
        if ($code >= 0xC0 && isset($chr[2])) {
4737 1
            return $CHAR_CACHE[$cache_key] = ((($code - 0xC0) << 6) + $chr[2] - 0x80);
4738
        }
4739
4740
        return $CHAR_CACHE[$cache_key] = $code;
4741
    }
4742
4743
    /**
4744
     * Parses the string into an array (into the the second parameter).
4745
     *
4746
     * WARNING: Unlike "parse_str()", this method does not (re-)place variables in the current scope,
4747
     *          if the second parameter is not set!
4748
     *
4749
     * EXAMPLE: <code>
4750
     * UTF8::parse_str('Iñtërnâtiônéàlizætiøn=測試&arr[]=foo+測試&arr[]=ການທົດສອບ', $array);
4751
     * echo $array['Iñtërnâtiônéàlizætiøn']; // '測試'
4752
     * </code>
4753
     *
4754
     * @see http://php.net/manual/en/function.parse-str.php
4755
     *
4756
     * @param string $str        <p>The input string.</p>
4757
     * @param array  $result     <p>The result will be returned into this reference parameter.</p>
4758
     * @param bool   $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
4759
     *
4760
     * @psalm-pure
4761
     *
4762
     * @return bool
4763
     *              <p>Will return <strong>false</strong> if php can't parse the string and we haven't any $result.</p>
4764
     */
4765 2
    public static function parse_str(string $str, &$result, bool $clean_utf8 = false): bool
4766
    {
4767 2
        if ($clean_utf8) {
4768 2
            $str = self::clean($str);
4769
        }
4770
4771 2
        if (self::$SUPPORT['mbstring'] === true) {
4772 2
            $return = \mb_parse_str($str, $result);
4773
4774 2
            return $return !== false && $result !== [];
4775
        }
4776
4777
        /**
4778
         * @psalm-suppress ImpureFunctionCall - we use the second parameter, so we don't change variables by magic
4779
         */
4780
        \parse_str($str, $result);
4781
4782
        return $result !== [];
4783
    }
4784
4785
    /**
4786
     * Checks if \u modifier is available that enables Unicode support in PCRE.
4787
     *
4788
     * @psalm-pure
4789
     *
4790
     * @return bool
4791
     *              <p>
4792
     *              <strong>true</strong> if support is available,<br>
4793
     *              <strong>false</strong> otherwise
4794
     *              </p>
4795
     */
4796
    public static function pcre_utf8_support(): bool
4797
    {
4798
        /** @noinspection PhpUsageOfSilenceOperatorInspection */
4799
        return (bool) @\preg_match('//u', '');
4800
    }
4801
4802
    /**
4803
     * Create an array containing a range of UTF-8 characters.
4804
     *
4805
     * EXAMPLE: <code>UTF8::range('κ', 'ζ'); // array('κ', 'ι', 'θ', 'η', 'ζ',)</code>
4806
     *
4807
     * @param int|string $var1      <p>Numeric or hexadecimal code points, or a UTF-8 character to start from.</p>
4808
     * @param int|string $var2      <p>Numeric or hexadecimal code points, or a UTF-8 character to end at.</p>
4809
     * @param bool       $use_ctype <p>use ctype to detect numeric and hexadecimal, otherwise we will use a simple
4810
     *                              "is_numeric"</p>
4811
     * @param string     $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
4812
     * @param float|int  $step      [optional] <p>
4813
     *                              If a step value is given, it will be used as the
4814
     *                              increment between elements in the sequence. step
4815
     *                              should be given as a positive number. If not specified,
4816
     *                              step will default to 1.
4817
     *                              </p>
4818
     *
4819
     * @psalm-pure
4820
     *
4821
     * @return string[]
4822
     */
4823 2
    public static function range(
4824
        $var1,
4825
        $var2,
4826
        bool $use_ctype = true,
4827
        string $encoding = 'UTF-8',
4828
        $step = 1
4829
    ): array {
4830 2
        if (!$var1 || !$var2) {
4831 2
            return [];
4832
        }
4833
4834 2
        if ($step !== 1) {
4835
            /**
4836
             * @psalm-suppress RedundantConditionGivenDocblockType
4837
             * @psalm-suppress DocblockTypeContradiction
4838
             */
4839 1
            if (!\is_numeric($step)) {
0 ignored issues
show
introduced by
The condition is_numeric($step) is always true.
Loading history...
4840
                throw new \InvalidArgumentException('$step need to be a number, type given: ' . \gettype($step));
4841
            }
4842
4843
            /**
4844
             * @psalm-suppress RedundantConditionGivenDocblockType - false-positive from psalm?
4845
             */
4846 1
            if ($step <= 0) {
4847
                throw new \InvalidArgumentException('$step need to be a positive number, given: ' . $step);
4848
            }
4849
        }
4850
4851 2
        if ($use_ctype && self::$SUPPORT['ctype'] === false) {
4852
            throw new \RuntimeException('ext-ctype: is not installed');
4853
        }
4854
4855 2
        $is_digit = false;
4856 2
        $is_xdigit = false;
4857
4858 2
        if ($use_ctype && \ctype_digit((string) $var1) && \ctype_digit((string) $var2)) {
4859 2
            $is_digit = true;
4860 2
            $start = (int) $var1;
4861 2
        } elseif ($use_ctype && \ctype_xdigit($var1) && \ctype_xdigit($var2)) {
4862
            $is_xdigit = true;
4863
            $start = (int) self::hex_to_int((string) $var1);
4864 2
        } elseif (!$use_ctype && \is_numeric($var1)) {
4865 1
            $start = (int) $var1;
4866
        } else {
4867 2
            $start = self::ord((string) $var1);
4868
        }
4869
4870 2
        if (!$start) {
4871
            return [];
4872
        }
4873
4874 2
        if ($is_digit) {
4875 2
            $end = (int) $var2;
4876 2
        } elseif ($is_xdigit) {
4877
            $end = (int) self::hex_to_int((string) $var2);
4878 2
        } elseif (!$use_ctype && \is_numeric($var2)) {
4879 1
            $end = (int) $var2;
4880
        } else {
4881 2
            $end = self::ord((string) $var2);
4882
        }
4883
4884 2
        if (!$end) {
4885
            return [];
4886
        }
4887
4888 2
        $array = [];
4889 2
        foreach (\range($start, $end, $step) as $i) {
4890 2
            $array[] = (string) self::chr((int) $i, $encoding);
4891
        }
4892
4893 2
        return $array;
4894
    }
4895
4896
    /**
4897
     * Multi decode HTML entity + fix urlencoded-win1252-chars.
4898
     *
4899
     * EXAMPLE: <code>UTF8::rawurldecode('tes%20öäü%20\u00edtest+test'); // 'tes öäü ítest+test'</code>
4900
     *
4901
     * e.g:
4902
     * 'test+test'                     => 'test+test'
4903
     * 'D&#252;sseldorf'               => 'Düsseldorf'
4904
     * 'D%FCsseldorf'                  => 'Düsseldorf'
4905
     * 'D&#xFC;sseldorf'               => 'Düsseldorf'
4906
     * 'D%26%23xFC%3Bsseldorf'         => 'Düsseldorf'
4907
     * 'Düsseldorf'                   => 'Düsseldorf'
4908
     * 'D%C3%BCsseldorf'               => 'Düsseldorf'
4909
     * 'D%C3%83%C2%BCsseldorf'         => 'Düsseldorf'
4910
     * 'D%25C3%2583%25C2%25BCsseldorf' => 'Düsseldorf'
4911
     *
4912
     * @param string $str          <p>The input string.</p>
4913
     * @param bool   $multi_decode <p>Decode as often as possible.</p>
4914
     *
4915
     * @psalm-pure
4916
     *
4917
     * @return string
4918
     *                <p>The decoded URL, as a string.</p>
4919
     */
4920 6
    public static function rawurldecode(string $str, bool $multi_decode = true): string
4921
    {
4922 6
        if ($str === '') {
4923 4
            return '';
4924
        }
4925
4926 6
        $str = self::urldecode_unicode_helper($str);
4927
4928 6
        if ($multi_decode) {
4929
            do {
4930 5
                $str_compare = $str;
4931
4932
                /**
4933
                 * @psalm-suppress PossiblyInvalidArgument
4934
                 */
4935 5
                $str = \rawurldecode(
4936 5
                    self::html_entity_decode(
4937 5
                        self::to_utf8($str),
4938 5
                        \ENT_QUOTES | \ENT_HTML5
4939
                    )
4940
                );
4941 5
            } while ($str_compare !== $str);
4942
        } else {
4943
            /**
4944
             * @psalm-suppress PossiblyInvalidArgument
4945
             */
4946 1
            $str = \rawurldecode(
4947 1
                self::html_entity_decode(
4948 1
                    self::to_utf8($str),
4949 1
                    \ENT_QUOTES | \ENT_HTML5
4950
                )
4951
            );
4952
        }
4953
4954 6
        return self::fix_simple_utf8($str);
4955
    }
4956
4957
    /**
4958
     * Replaces all occurrences of $pattern in $str by $replacement.
4959
     *
4960
     * @param string $str         <p>The input string.</p>
4961
     * @param string $pattern     <p>The regular expression pattern.</p>
4962
     * @param string $replacement <p>The string to replace with.</p>
4963
     * @param string $options     [optional] <p>Matching conditions to be used.</p>
4964
     * @param string $delimiter   [optional] <p>Delimiter the the regex. Default: '/'</p>
4965
     *
4966
     * @psalm-pure
4967
     *
4968
     * @return string
4969
     */
4970 18
    public static function regex_replace(
4971
        string $str,
4972
        string $pattern,
4973
        string $replacement,
4974
        string $options = '',
4975
        string $delimiter = '/'
4976
    ): string {
4977 18
        if ($options === 'msr') {
4978 9
            $options = 'ms';
4979
        }
4980
4981
        // fallback
4982 18
        if (!$delimiter) {
4983
            $delimiter = '/';
4984
        }
4985
4986 18
        return (string) \preg_replace(
4987 18
            $delimiter . $pattern . $delimiter . 'u' . $options,
4988 18
            $replacement,
4989 18
            $str
4990
        );
4991
    }
4992
4993
    /**
4994
     * Remove the BOM from UTF-8 / UTF-16 / UTF-32 strings.
4995
     *
4996
     * EXAMPLE: <code>UTF8::remove_bom("\xEF\xBB\xBFΜπορώ να"); // 'Μπορώ να'</code>
4997
     *
4998
     * @param string $str <p>The input string.</p>
4999
     *
5000
     * @psalm-pure
5001
     *
5002
     * @return string
5003
     *                <p>A string without UTF-BOM.</p>
5004
     */
5005 54
    public static function remove_bom(string $str): string
5006
    {
5007 54
        if ($str === '') {
5008 9
            return '';
5009
        }
5010
5011 54
        $str_length = \strlen($str);
5012 54
        foreach (self::$BOM as $bom_string => $bom_byte_length) {
5013 54
            if (\strncmp($str, $bom_string, $bom_byte_length) === 0) {
5014
                /** @var false|string $str_tmp - needed for PhpStan (stubs error) */
5015 11
                $str_tmp = \substr($str, $bom_byte_length, $str_length);
5016 11
                if ($str_tmp === false) {
5017
                    return '';
5018
                }
5019
5020 11
                $str_length -= $bom_byte_length;
5021
5022 11
                $str = (string) $str_tmp;
5023
            }
5024
        }
5025
5026 54
        return $str;
5027
    }
5028
5029
    /**
5030
     * Removes duplicate occurrences of a string in another string.
5031
     *
5032
     * EXAMPLE: <code>UTF8::remove_duplicates('öäü-κόσμεκόσμε-äöü', 'κόσμε'); // 'öäü-κόσμε-äöü'</code>
5033
     *
5034
     * @param string          $str  <p>The base string.</p>
5035
     * @param string|string[] $what <p>String to search for in the base string.</p>
5036
     *
5037
     * @psalm-pure
5038
     *
5039
     * @return string
5040
     *                <p>A string with removed duplicates.</p>
5041
     */
5042 2
    public static function remove_duplicates(string $str, $what = ' '): string
5043
    {
5044 2
        if (\is_string($what)) {
5045 2
            $what = [$what];
5046
        }
5047
5048
        /**
5049
         * @psalm-suppress RedundantConditionGivenDocblockType
5050
         */
5051 2
        if (\is_array($what)) {
0 ignored issues
show
introduced by
The condition is_array($what) is always true.
Loading history...
5052 2
            foreach ($what as $item) {
5053 2
                $str = (string) \preg_replace('/(' . \preg_quote($item, '/') . ')+/u', $item, $str);
5054
            }
5055
        }
5056
5057 2
        return $str;
5058
    }
5059
5060
    /**
5061
     * Remove html via "strip_tags()" from the string.
5062
     *
5063
     * @param string $str            <p>The input string.</p>
5064
     * @param string $allowable_tags [optional] <p>You can use the optional second parameter to specify tags which
5065
     *                               should not be stripped. Default: null
5066
     *                               </p>
5067
     *
5068
     * @psalm-pure
5069
     *
5070
     * @return string
5071
     *                <p>A string with without html tags.</p>
5072
     */
5073 6
    public static function remove_html(string $str, string $allowable_tags = ''): string
5074
    {
5075 6
        return \strip_tags($str, $allowable_tags);
5076
    }
5077
5078
    /**
5079
     * Remove all breaks [<br> | \r\n | \r | \n | ...] from the string.
5080
     *
5081
     * @param string $str         <p>The input string.</p>
5082
     * @param string $replacement [optional] <p>Default is a empty string.</p>
5083
     *
5084
     * @psalm-pure
5085
     *
5086
     * @return string
5087
     *                <p>A string without breaks.</p>
5088
     */
5089 6
    public static function remove_html_breaks(string $str, string $replacement = ''): string
5090
    {
5091 6
        return (string) \preg_replace("#/\r\n|\r|\n|<br.*/?>#isU", $replacement, $str);
5092
    }
5093
5094
    /**
5095
     * Remove invisible characters from a string.
5096
     *
5097
     * e.g.: This prevents sandwiching null characters between ascii characters, like Java\0script.
5098
     *
5099
     * EXAMPLE: <code>UTF8::remove_invisible_characters("κόσ\0με"); // 'κόσμε'</code>
5100
     *
5101
     * copy&past from https://github.com/bcit-ci/CodeIgniter/blob/develop/system/core/Common.php
5102
     *
5103
     * @param string $str                           <p>The input string.</p>
5104
     * @param bool   $url_encoded                   [optional] <p>
5105
     *                                              Try to remove url encoded control character.
5106
     *                                              WARNING: maybe contains false-positives e.g. aa%0Baa -> aaaa.
5107
     *                                              <br>
5108
     *                                              Default: false
5109
     *                                              </p>
5110
     * @param string $replacement                   [optional] <p>The replacement character.</p>
5111
     * @param bool   $keep_basic_control_characters [optional] <p>Keep control characters like [LRM] or [LSEP].</p>
5112
     *
5113
     * @psalm-pure
5114
     *
5115
     * @return string
5116
     *                <p>A string without invisible chars.</p>
5117
     */
5118 96
    public static function remove_invisible_characters(
5119
        string $str,
5120
        bool $url_encoded = false,
5121
        string $replacement = '',
5122
        bool $keep_basic_control_characters = true
5123
    ): string {
5124 96
        return ASCII::remove_invisible_characters(
5125 96
            $str,
5126
            $url_encoded,
5127
            $replacement,
5128
            $keep_basic_control_characters
5129
        );
5130
    }
5131
5132
    /**
5133
     * Returns a new string with the prefix $substring removed, if present.
5134
     *
5135
     * @param string $str       <p>The input string.</p>
5136
     * @param string $substring <p>The prefix to remove.</p>
5137
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
5138
     *
5139
     * @psalm-pure
5140
     *
5141
     * @return string
5142
     *                <p>A string without the prefix $substring.</p>
5143
     */
5144 12
    public static function remove_left(
5145
        string $str,
5146
        string $substring,
5147
        string $encoding = 'UTF-8'
5148
    ): string {
5149
        if (
5150 12
            $substring
5151
            &&
5152 12
            \strpos($str, $substring) === 0
5153
        ) {
5154 6
            if ($encoding === 'UTF-8') {
5155 4
                return (string) \mb_substr(
5156 4
                    $str,
5157 4
                    (int) \mb_strlen($substring)
5158
                );
5159
            }
5160
5161 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
5162
5163 2
            return (string) self::substr(
5164 2
                $str,
5165 2
                (int) self::strlen($substring, $encoding),
5166 2
                null,
5167
                $encoding
5168
            );
5169
        }
5170
5171 6
        return $str;
5172
    }
5173
5174
    /**
5175
     * Returns a new string with the suffix $substring removed, if present.
5176
     *
5177
     * @param string $str
5178
     * @param string $substring <p>The suffix to remove.</p>
5179
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
5180
     *
5181
     * @psalm-pure
5182
     *
5183
     * @return string
5184
     *                <p>A string having a $str without the suffix $substring.</p>
5185
     */
5186 12
    public static function remove_right(
5187
        string $str,
5188
        string $substring,
5189
        string $encoding = 'UTF-8'
5190
    ): string {
5191 12
        if ($substring && \substr($str, -\strlen($substring)) === $substring) {
5192 6
            if ($encoding === 'UTF-8') {
5193 4
                return (string) \mb_substr(
5194 4
                    $str,
5195 4
                    0,
5196 4
                    (int) \mb_strlen($str) - (int) \mb_strlen($substring)
5197
                );
5198
            }
5199
5200 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
5201
5202 2
            return (string) self::substr(
5203 2
                $str,
5204 2
                0,
5205 2
                (int) self::strlen($str, $encoding) - (int) self::strlen($substring, $encoding),
5206
                $encoding
5207
            );
5208
        }
5209
5210 6
        return $str;
5211
    }
5212
5213
    /**
5214
     * Replaces all occurrences of $search in $str by $replacement.
5215
     *
5216
     * @param string $str            <p>The input string.</p>
5217
     * @param string $search         <p>The needle to search for.</p>
5218
     * @param string $replacement    <p>The string to replace with.</p>
5219
     * @param bool   $case_sensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
5220
     *
5221
     * @psalm-pure
5222
     *
5223
     * @return string
5224
     *                <p>A string with replaced parts.</p>
5225
     */
5226 29
    public static function replace(
5227
        string $str,
5228
        string $search,
5229
        string $replacement,
5230
        bool $case_sensitive = true
5231
    ): string {
5232 29
        if ($case_sensitive) {
5233 22
            return \str_replace($search, $replacement, $str);
5234
        }
5235
5236 7
        return self::str_ireplace($search, $replacement, $str);
0 ignored issues
show
Bug Best Practice introduced by
The expression return self::str_ireplac...ch, $replacement, $str) could return the type string[] which is incompatible with the type-hinted return string. Consider adding an additional type-check to rule them out.
Loading history...
5237
    }
5238
5239
    /**
5240
     * Replaces all occurrences of $search in $str by $replacement.
5241
     *
5242
     * @param string       $str            <p>The input string.</p>
5243
     * @param array        $search         <p>The elements to search for.</p>
5244
     * @param array|string $replacement    <p>The string to replace with.</p>
5245
     * @param bool         $case_sensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
5246
     *
5247
     * @psalm-pure
5248
     *
5249
     * @return string
5250
     *                <p>A string with replaced parts.</p>
5251
     */
5252 30
    public static function replace_all(
5253
        string $str,
5254
        array $search,
5255
        $replacement,
5256
        bool $case_sensitive = true
5257
    ): string {
5258 30
        if ($case_sensitive) {
5259 23
            return \str_replace($search, $replacement, $str);
5260
        }
5261
5262 7
        return self::str_ireplace($search, $replacement, $str);
0 ignored issues
show
Bug Best Practice introduced by
The expression return self::str_ireplac...ch, $replacement, $str) could return the type string[] which is incompatible with the type-hinted return string. Consider adding an additional type-check to rule them out.
Loading history...
5263
    }
5264
5265
    /**
5266
     * Replace the diamond question mark (�) and invalid-UTF8 chars with the replacement.
5267
     *
5268
     * EXAMPLE: <code>UTF8::replace_diamond_question_mark('中文空白�', ''); // '中文空白'</code>
5269
     *
5270
     * @param string $str                        <p>The input string</p>
5271
     * @param string $replacement_char           <p>The replacement character.</p>
5272
     * @param bool   $process_invalid_utf8_chars <p>Convert invalid UTF-8 chars </p>
5273
     *
5274
     * @psalm-pure
5275
     *
5276
     * @return string
5277
     *                <p>A string without diamond question marks (�).</p>
5278
     */
5279 35
    public static function replace_diamond_question_mark(
5280
        string $str,
5281
        string $replacement_char = '',
5282
        bool $process_invalid_utf8_chars = true
5283
    ): string {
5284 35
        if ($str === '') {
5285 9
            return '';
5286
        }
5287
5288 35
        if ($process_invalid_utf8_chars) {
5289 35
            if ($replacement_char === '') {
5290 35
                $replacement_char_helper = 'none';
5291
            } else {
5292 2
                $replacement_char_helper = \ord($replacement_char);
5293
            }
5294
5295 35
            if (self::$SUPPORT['mbstring'] === false) {
5296
                // if there is no native support for "mbstring",
5297
                // then we need to clean the string before ...
5298
                $str = self::clean($str);
5299
            }
5300
5301
            /**
5302
             * @psalm-suppress ImpureFunctionCall - we will reset the value in the next step
5303
             */
5304 35
            $save = \mb_substitute_character();
5305
            /** @noinspection PhpUsageOfSilenceOperatorInspection - ignore "Unknown character" warnings, it's working anyway */
5306 35
            @\mb_substitute_character($replacement_char_helper);
0 ignored issues
show
Security Best Practice introduced by
It seems like you do not handle an error condition for mb_substitute_character(). This can introduce security issues, and is generally not recommended. ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-unhandled  annotation

5306
            /** @scrutinizer ignore-unhandled */ @\mb_substitute_character($replacement_char_helper);

If you suppress an error, we recommend checking for the error condition explicitly:

// For example instead of
@mkdir($dir);

// Better use
if (@mkdir($dir) === false) {
    throw new \RuntimeException('The directory '.$dir.' could not be created.');
}
Loading history...
5307
            // the polyfill maybe return false, so cast to string
5308 35
            $str = (string) \mb_convert_encoding($str, 'UTF-8', 'UTF-8');
5309 35
            \mb_substitute_character($save);
0 ignored issues
show
Bug introduced by
It seems like $save can also be of type true; however, parameter $substitute_character of mb_substitute_character() does only seem to accept integer|null|string, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

5309
            \mb_substitute_character(/** @scrutinizer ignore-type */ $save);
Loading history...
5310
        }
5311
5312 35
        return \str_replace(
5313
            [
5314 35
                "\xEF\xBF\xBD",
5315
                '�',
5316
            ],
5317
            [
5318 35
                $replacement_char,
5319 35
                $replacement_char,
5320
            ],
5321 35
            $str
5322
        );
5323
    }
5324
5325
    /**
5326
     * Strip whitespace or other characters from the end of a UTF-8 string.
5327
     *
5328
     * EXAMPLE: <code>UTF8::rtrim('-ABC-中文空白-  '); // '-ABC-中文空白-'</code>
5329
     *
5330
     * @param string      $str   <p>The string to be trimmed.</p>
5331
     * @param string|null $chars <p>Optional characters to be stripped.</p>
5332
     *
5333
     * @psalm-pure
5334
     *
5335
     * @return string
5336
     *                <p>A string with unwanted characters stripped from the right.</p>
5337
     */
5338 21
    public static function rtrim(string $str = '', string $chars = null): string
5339
    {
5340 21
        if ($str === '') {
5341 3
            return '';
5342
        }
5343
5344 20
        if (self::$SUPPORT['mbstring'] === true) {
5345 20
            if ($chars !== null) {
5346
                /** @noinspection PregQuoteUsageInspection */
5347 9
                $chars = \preg_quote($chars);
5348 9
                $pattern = "[${chars}]+$";
5349
            } else {
5350 14
                $pattern = '[\\s]+$';
5351
            }
5352
5353 20
            return (string) \mb_ereg_replace($pattern, '', $str);
5354
        }
5355
5356
        if ($chars !== null) {
5357
            $chars = \preg_quote($chars, '/');
5358
            $pattern = "[${chars}]+$";
5359
        } else {
5360
            $pattern = '[\\s]+$';
5361
        }
5362
5363
        return self::regex_replace($str, $pattern, '');
5364
    }
5365
5366
    /**
5367
     * WARNING: Print native UTF-8 support (libs) by default, e.g. for debugging.
5368
     *
5369
     * @param bool $useEcho
5370
     *
5371
     * @psalm-pure
5372
     *
5373
     * @return string|void
5374
     */
5375 2
    public static function showSupport(bool $useEcho = true)
5376
    {
5377
        // init
5378 2
        $html = '';
5379
5380 2
        $html .= '<pre>';
5381 2
        foreach (self::$SUPPORT as $key => &$value) {
5382 2
            $html .= $key . ' - ' . \print_r($value, true) . "\n<br>";
0 ignored issues
show
Bug introduced by
Are you sure print_r($value, true) of type string|true can be used in concatenation? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

5382
            $html .= $key . ' - ' . /** @scrutinizer ignore-type */ \print_r($value, true) . "\n<br>";
Loading history...
5383
        }
5384 2
        $html .= '</pre>';
5385
5386 2
        if ($useEcho) {
5387 1
            echo $html;
5388
        }
5389
5390 2
        return $html;
5391
    }
5392
5393
    /**
5394
     * Converts a UTF-8 character to HTML Numbered Entity like "&#123;".
5395
     *
5396
     * EXAMPLE: <code>UTF8::single_chr_html_encode('κ'); // '&#954;'</code>
5397
     *
5398
     * @param string $char             <p>The Unicode character to be encoded as numbered entity.</p>
5399
     * @param bool   $keep_ascii_chars <p>Set to <strong>true</strong> to keep ASCII chars.</>
5400
     * @param string $encoding         [optional] <p>Set the charset for e.g. "mb_" function</p>
5401
     *
5402
     * @psalm-pure
5403
     *
5404
     * @return string
5405
     *                <p>The HTML numbered entity for the given character.</p>
5406
     */
5407 2
    public static function single_chr_html_encode(
5408
        string $char,
5409
        bool $keep_ascii_chars = false,
5410
        string $encoding = 'UTF-8'
5411
    ): string {
5412 2
        if ($char === '') {
5413 2
            return '';
5414
        }
5415
5416
        if (
5417 2
            $keep_ascii_chars
5418
            &&
5419 2
            ASCII::is_ascii($char)
5420
        ) {
5421 2
            return $char;
5422
        }
5423
5424 2
        return '&#' . self::ord($char, $encoding) . ';';
5425
    }
5426
5427
    /**
5428
     * @param string $str
5429
     * @param int    $tab_length
5430
     *
5431
     * @psalm-pure
5432
     *
5433
     * @return string
5434
     */
5435 5
    public static function spaces_to_tabs(string $str, int $tab_length = 4): string
5436
    {
5437 5
        if ($tab_length === 4) {
5438 3
            $tab = '    ';
5439 2
        } elseif ($tab_length === 2) {
5440 1
            $tab = '  ';
5441
        } else {
5442 1
            $tab = \str_repeat(' ', $tab_length);
5443
        }
5444
5445 5
        return \str_replace($tab, "\t", $str);
5446
    }
5447
5448
    /**
5449
     * Returns a camelCase version of the string. Trims surrounding spaces,
5450
     * capitalizes letters following digits, spaces, dashes and underscores,
5451
     * and removes spaces, dashes, as well as underscores.
5452
     *
5453
     * @param string      $str                           <p>The input string.</p>
5454
     * @param string      $encoding                      [optional] <p>Default: 'UTF-8'</p>
5455
     * @param bool        $clean_utf8                    [optional] <p>Remove non UTF-8 chars from the string.</p>
5456
     * @param string|null $lang                          [optional] <p>Set the language for special cases: az, el, lt,
5457
     *                                                   tr</p>
5458
     * @param bool        $try_to_keep_the_string_length [optional] <p>true === try to keep the string length: e.g. ẞ
5459
     *                                                   -> ß</p>
5460
     *
5461
     * @psalm-pure
5462
     *
5463
     * @return string
5464
     */
5465 32
    public static function str_camelize(
5466
        string $str,
5467
        string $encoding = 'UTF-8',
5468
        bool $clean_utf8 = false,
5469
        string $lang = null,
5470
        bool $try_to_keep_the_string_length = false
5471
    ): string {
5472 32
        if ($clean_utf8) {
5473
            $str = self::clean($str);
5474
        }
5475
5476 32
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
5477 26
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
5478
        }
5479
5480 32
        $str = self::lcfirst(
5481 32
            \trim($str),
5482 32
            $encoding,
5483 32
            false,
5484 32
            $lang,
5485 32
            $try_to_keep_the_string_length
5486
        );
5487 32
        $str = (string) \preg_replace('/^[-_]+/', '', $str);
5488
5489 32
        $use_mb_functions = $lang === null && !$try_to_keep_the_string_length;
5490
5491 32
        $str = (string) \preg_replace_callback(
5492 32
            '/[-_\\s]+(.)?/u',
5493
            /**
5494
             * @param array $match
5495
             *
5496
             * @psalm-pure
5497
             *
5498
             * @return string
5499
             */
5500 32
            static function (array $match) use ($use_mb_functions, $encoding, $lang, $try_to_keep_the_string_length): string {
5501 27
                if (isset($match[1])) {
5502 27
                    if ($use_mb_functions) {
5503 27
                        if ($encoding === 'UTF-8') {
5504 27
                            return \mb_strtoupper($match[1]);
5505
                        }
5506
5507
                        return \mb_strtoupper($match[1], $encoding);
5508
                    }
5509
5510
                    return self::strtoupper($match[1], $encoding, false, $lang, $try_to_keep_the_string_length);
5511
                }
5512
5513 1
                return '';
5514 32
            },
5515 32
            $str
5516
        );
5517
5518 32
        return (string) \preg_replace_callback(
5519 32
            '/[\\p{N}]+(.)?/u',
5520
            /**
5521
             * @param array $match
5522
             *
5523
             * @psalm-pure
5524
             *
5525
             * @return string
5526
             */
5527 32
            static function (array $match) use ($use_mb_functions, $encoding, $clean_utf8, $lang, $try_to_keep_the_string_length): string {
5528 6
                if ($use_mb_functions) {
5529 6
                    if ($encoding === 'UTF-8') {
5530 6
                        return \mb_strtoupper($match[0]);
5531
                    }
5532
5533
                    return \mb_strtoupper($match[0], $encoding);
5534
                }
5535
5536
                return self::strtoupper($match[0], $encoding, $clean_utf8, $lang, $try_to_keep_the_string_length);
5537 32
            },
5538 32
            $str
5539
        );
5540
    }
5541
5542
    /**
5543
     * Returns the string with the first letter of each word capitalized,
5544
     * except for when the word is a name which shouldn't be capitalized.
5545
     *
5546
     * @param string $str
5547
     *
5548
     * @psalm-pure
5549
     *
5550
     * @return string
5551
     *                <p>A string with $str capitalized.</p>
5552
     */
5553 1
    public static function str_capitalize_name(string $str): string
5554
    {
5555 1
        return self::str_capitalize_name_helper(
5556 1
            self::str_capitalize_name_helper(
5557 1
                self::collapse_whitespace($str),
5558 1
                ' '
5559
            ),
5560 1
            '-'
5561
        );
5562
    }
5563
5564
    /**
5565
     * Returns true if the string contains $needle, false otherwise. By default
5566
     * the comparison is case-sensitive, but can be made insensitive by setting
5567
     * $case_sensitive to false.
5568
     *
5569
     * @param string $haystack       <p>The input string.</p>
5570
     * @param string $needle         <p>Substring to look for.</p>
5571
     * @param bool   $case_sensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
5572
     *
5573
     * @psalm-pure
5574
     *
5575
     * @return bool
5576
     *              <p>Whether or not $haystack contains $needle.</p>
5577
     */
5578 21
    public static function str_contains(
5579
        string $haystack,
5580
        string $needle,
5581
        bool $case_sensitive = true
5582
    ): bool {
5583 21
        if ($case_sensitive) {
5584 11
            if (\PHP_VERSION_ID >= 80000) {
5585
                /** @phpstan-ignore-next-line - only for PHP8 */
5586 11
                return \str_contains($haystack, $needle);
5587
            }
5588
5589
            return \strpos($haystack, $needle) !== false;
5590
        }
5591
5592 10
        return \mb_stripos($haystack, $needle) !== false;
5593
    }
5594
5595
    /**
5596
     * Returns true if the string contains all $needles, false otherwise. By
5597
     * default the comparison is case-sensitive, but can be made insensitive by
5598
     * setting $case_sensitive to false.
5599
     *
5600
     * @param string $haystack       <p>The input string.</p>
5601
     * @param array  $needles        <p>SubStrings to look for.</p>
5602
     * @param bool   $case_sensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
5603
     *
5604
     * @psalm-pure
5605
     *
5606
     * @return bool
5607
     *              <p>Whether or not $haystack contains $needle.</p>
5608
     */
5609 45
    public static function str_contains_all(
5610
        string $haystack,
5611
        array $needles,
5612
        bool $case_sensitive = true
5613
    ): bool {
5614 45
        if ($haystack === '' || $needles === []) {
5615 1
            return false;
5616
        }
5617
5618 44
        foreach ($needles as &$needle) {
5619 44
            if ($case_sensitive) {
5620 24
                if (!$needle || \strpos($haystack, $needle) === false) {
5621 12
                    return false;
5622
                }
5623
            }
5624
5625 33
            if (!$needle || \mb_stripos($haystack, $needle) === false) {
5626 8
                return false;
5627
            }
5628
        }
5629
5630 24
        return true;
5631
    }
5632
5633
    /**
5634
     * Returns true if the string contains any $needles, false otherwise. By
5635
     * default the comparison is case-sensitive, but can be made insensitive by
5636
     * setting $case_sensitive to false.
5637
     *
5638
     * @param string $haystack       <p>The input string.</p>
5639
     * @param array  $needles        <p>SubStrings to look for.</p>
5640
     * @param bool   $case_sensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
5641
     *
5642
     * @psalm-pure
5643
     *
5644
     * @return bool
5645
     *              <p>Whether or not $str contains $needle.</p>
5646
     */
5647 46
    public static function str_contains_any(
5648
        string $haystack,
5649
        array $needles,
5650
        bool $case_sensitive = true
5651
    ): bool {
5652 46
        if ($haystack === '' || $needles === []) {
5653 1
            return false;
5654
        }
5655
5656 45
        foreach ($needles as &$needle) {
5657 45
            if (!$needle) {
5658
                continue;
5659
            }
5660
5661 45
            if ($case_sensitive) {
5662 25
                if (\strpos($haystack, $needle) !== false) {
5663 14
                    return true;
5664
                }
5665
5666 13
                continue;
5667
            }
5668
5669 20
            if (\mb_stripos($haystack, $needle) !== false) {
5670 12
                return true;
5671
            }
5672
        }
5673
5674 19
        return false;
5675
    }
5676
5677
    /**
5678
     * Returns a lowercase and trimmed string separated by dashes. Dashes are
5679
     * inserted before uppercase characters (with the exception of the first
5680
     * character of the string), and in place of spaces as well as underscores.
5681
     *
5682
     * @param string $str      <p>The input string.</p>
5683
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
5684
     *
5685
     * @psalm-pure
5686
     *
5687
     * @return string
5688
     */
5689 19
    public static function str_dasherize(string $str, string $encoding = 'UTF-8'): string
5690
    {
5691 19
        return self::str_delimit($str, '-', $encoding);
5692
    }
5693
5694
    /**
5695
     * Returns a lowercase and trimmed string separated by the given delimiter.
5696
     * Delimiters are inserted before uppercase characters (with the exception
5697
     * of the first character of the string), and in place of spaces, dashes,
5698
     * and underscores. Alpha delimiters are not converted to lowercase.
5699
     *
5700
     * @param string      $str                           <p>The input string.</p>
5701
     * @param string      $delimiter                     <p>Sequence used to separate parts of the string.</p>
5702
     * @param string      $encoding                      [optional] <p>Set the charset for e.g. "mb_" function</p>
5703
     * @param bool        $clean_utf8                    [optional] <p>Remove non UTF-8 chars from the string.</p>
5704
     * @param string|null $lang                          [optional] <p>Set the language for special cases: az, el, lt,
5705
     *                                                   tr</p>
5706
     * @param bool        $try_to_keep_the_string_length [optional] <p>true === try to keep the string length: e.g. ẞ ->
5707
     *                                                   ß</p>
5708
     *
5709
     * @psalm-pure
5710
     *
5711
     * @return string
5712
     */
5713 49
    public static function str_delimit(
5714
        string $str,
5715
        string $delimiter,
5716
        string $encoding = 'UTF-8',
5717
        bool $clean_utf8 = false,
5718
        string $lang = null,
5719
        bool $try_to_keep_the_string_length = false
5720
    ): string {
5721 49
        if (self::$SUPPORT['mbstring'] === true) {
5722 49
            $str = (string) \mb_ereg_replace('\\B(\\p{Lu})', '-\1', \trim($str));
5723
5724 49
            $use_mb_functions = $lang === null && !$try_to_keep_the_string_length;
5725 49
            if ($use_mb_functions && $encoding === 'UTF-8') {
5726 22
                $str = \mb_strtolower($str);
5727
            } else {
5728 27
                $str = self::strtolower($str, $encoding, $clean_utf8, $lang, $try_to_keep_the_string_length);
5729
            }
5730
5731 49
            return (string) \mb_ereg_replace('[\\-_\\s]+', $delimiter, $str);
5732
        }
5733
5734
        $str = (string) \preg_replace('/\\B(\\p{Lu})/u', '-\1', \trim($str));
5735
5736
        $use_mb_functions = $lang === null && !$try_to_keep_the_string_length;
5737
        if ($use_mb_functions && $encoding === 'UTF-8') {
5738
            $str = \mb_strtolower($str);
5739
        } else {
5740
            $str = self::strtolower($str, $encoding, $clean_utf8, $lang, $try_to_keep_the_string_length);
5741
        }
5742
5743
        return (string) \preg_replace('/[\\-_\\s]+/u', $delimiter, $str);
5744
    }
5745
5746
    /**
5747
     * Optimized "mb_detect_encoding()"-function -> with support for UTF-16 and UTF-32.
5748
     *
5749
     * EXAMPLE: <code>
5750
     * UTF8::str_detect_encoding('中文空白'); // 'UTF-8'
5751
     * UTF8::str_detect_encoding('Abc'); // 'ASCII'
5752
     * </code>
5753
     *
5754
     * @param string $str <p>The input string.</p>
5755
     *
5756
     * @psalm-pure
5757
     *
5758
     * @return false|string
5759
     *                      <p>
5760
     *                      The detected string-encoding e.g. UTF-8 or UTF-16BE,<br>
5761
     *                      otherwise it will return false e.g. for BINARY or not detected encoding.
5762
     *                      </p>
5763
     */
5764 30
    public static function str_detect_encoding($str)
5765
    {
5766
        // init
5767 30
        $str = (string) $str;
5768
5769
        //
5770
        // 1.) check binary strings (010001001...) like UTF-16 / UTF-32 / PDF / Images / ...
5771
        //
5772
5773 30
        if (self::is_binary($str, true)) {
5774 10
            $is_utf32 = self::is_utf32($str, false);
5775 10
            if ($is_utf32 === 1) {
5776
                return 'UTF-32LE';
5777
            }
5778 10
            if ($is_utf32 === 2) {
5779 1
                return 'UTF-32BE';
5780
            }
5781
5782 10
            $is_utf16 = self::is_utf16($str, false);
5783 10
            if ($is_utf16 === 1) {
5784 3
                return 'UTF-16LE';
5785
            }
5786 10
            if ($is_utf16 === 2) {
5787 2
                return 'UTF-16BE';
5788
            }
5789
5790
            // is binary but not "UTF-16" or "UTF-32"
5791 8
            return false;
5792
        }
5793
5794
        //
5795
        // 2.) simple check for ASCII chars
5796
        //
5797
5798 27
        if (ASCII::is_ascii($str)) {
5799 10
            return 'ASCII';
5800
        }
5801
5802
        //
5803
        // 3.) simple check for UTF-8 chars
5804
        //
5805
5806 27
        if (self::is_utf8_string($str)) {
5807 19
            return 'UTF-8';
5808
        }
5809
5810
        //
5811
        // 4.) check via "mb_detect_encoding()"
5812
        //
5813
        // INFO: UTF-16, UTF-32, UCS2 and UCS4, encoding detection will fail always with "mb_detect_encoding()"
5814
5815 16
        $encoding_detecting_order = [
5816
            'ISO-8859-1',
5817
            'ISO-8859-2',
5818
            'ISO-8859-3',
5819
            'ISO-8859-4',
5820
            'ISO-8859-5',
5821
            'ISO-8859-6',
5822
            'ISO-8859-7',
5823
            'ISO-8859-8',
5824
            'ISO-8859-9',
5825
            'ISO-8859-10',
5826
            'ISO-8859-13',
5827
            'ISO-8859-14',
5828
            'ISO-8859-15',
5829
            'ISO-8859-16',
5830
            'WINDOWS-1251',
5831
            'WINDOWS-1252',
5832
            'WINDOWS-1254',
5833
            'CP932',
5834
            'CP936',
5835
            'CP950',
5836
            'CP866',
5837
            'CP850',
5838
            'CP51932',
5839
            'CP50220',
5840
            'CP50221',
5841
            'CP50222',
5842
            'ISO-2022-JP',
5843
            'ISO-2022-KR',
5844
            'JIS',
5845
            'JIS-ms',
5846
            'EUC-CN',
5847
            'EUC-JP',
5848
        ];
5849
5850 16
        if (self::$SUPPORT['mbstring'] === true) {
5851
            // info: do not use the symfony polyfill here
5852 16
            $encoding = \mb_detect_encoding($str, $encoding_detecting_order, true);
5853 16
            if ($encoding) {
5854 16
                return $encoding;
5855
            }
5856
        }
5857
5858
        //
5859
        // 5.) check via "iconv()"
5860
        //
5861
5862
        if (self::$ENCODINGS === null) {
5863
            self::$ENCODINGS = self::getData('encodings');
5864
        }
5865
5866
        foreach (self::$ENCODINGS as $encoding_tmp) {
5867
            // INFO: //IGNORE but still throw notice
5868
            /** @noinspection PhpUsageOfSilenceOperatorInspection */
5869
            if ((string) @\iconv($encoding_tmp, $encoding_tmp . '//IGNORE', $str) === $str) {
5870
                return $encoding_tmp;
5871
            }
5872
        }
5873
5874
        return false;
5875
    }
5876
5877
    /**
5878
     * Check if the string ends with the given substring.
5879
     *
5880
     * EXAMPLE: <code>
5881
     * UTF8::str_ends_with('BeginMiddleΚόσμε', 'Κόσμε'); // true
5882
     * UTF8::str_ends_with('BeginMiddleΚόσμε', 'κόσμε'); // false
5883
     * </code>
5884
     *
5885
     * @param string $haystack <p>The string to search in.</p>
5886
     * @param string $needle   <p>The substring to search for.</p>
5887
     *
5888
     * @psalm-pure
5889
     *
5890
     * @return bool
5891
     */
5892 9
    public static function str_ends_with(string $haystack, string $needle): bool
5893
    {
5894 9
        if ($needle === '') {
5895 2
            return true;
5896
        }
5897
5898 9
        if ($haystack === '') {
5899 1
            return false;
5900
        }
5901
5902 9
        if (\PHP_VERSION_ID >= 80000) {
5903
            /** @phpstan-ignore-next-line - only for PHP8 */
5904 9
            return \str_ends_with($haystack, $needle);
5905
        }
5906
5907
        return \substr($haystack, -\strlen($needle)) === $needle;
5908
    }
5909
5910
    /**
5911
     * Returns true if the string ends with any of $substrings, false otherwise.
5912
     *
5913
     * - case-sensitive
5914
     *
5915
     * @param string   $str        <p>The input string.</p>
5916
     * @param string[] $substrings <p>Substrings to look for.</p>
5917
     *
5918
     * @psalm-pure
5919
     *
5920
     * @return bool
5921
     *              <p>Whether or not $str ends with $substring.</p>
5922
     */
5923 7
    public static function str_ends_with_any(string $str, array $substrings): bool
5924
    {
5925 7
        if ($substrings === []) {
5926
            return false;
5927
        }
5928
5929 7
        foreach ($substrings as &$substring) {
5930 7
            if (\substr($str, -\strlen($substring)) === $substring) {
5931 1
                return true;
5932
            }
5933
        }
5934
5935 6
        return false;
5936
    }
5937
5938
    /**
5939
     * Ensures that the string begins with $substring. If it doesn't, it's
5940
     * prepended.
5941
     *
5942
     * @param string $str       <p>The input string.</p>
5943
     * @param string $substring <p>The substring to add if not present.</p>
5944
     *
5945
     * @psalm-pure
5946
     *
5947
     * @return string
5948
     */
5949 10
    public static function str_ensure_left(string $str, string $substring): string
5950
    {
5951
        if (
5952 10
            $substring !== ''
5953
            &&
5954 10
            \strpos($str, $substring) === 0
5955
        ) {
5956 6
            return $str;
5957
        }
5958
5959 4
        return $substring . $str;
5960
    }
5961
5962
    /**
5963
     * Ensures that the string ends with $substring. If it doesn't, it's appended.
5964
     *
5965
     * @param string $str       <p>The input string.</p>
5966
     * @param string $substring <p>The substring to add if not present.</p>
5967
     *
5968
     * @psalm-pure
5969
     *
5970
     * @return string
5971
     */
5972 10
    public static function str_ensure_right(string $str, string $substring): string
5973
    {
5974
        if (
5975 10
            $str === ''
5976
            ||
5977 10
            $substring === ''
5978
            ||
5979 10
            \substr($str, -\strlen($substring)) !== $substring
5980
        ) {
5981 4
            $str .= $substring;
5982
        }
5983
5984 10
        return $str;
5985
    }
5986
5987
    /**
5988
     * Capitalizes the first word of the string, replaces underscores with
5989
     * spaces, and strips '_id'.
5990
     *
5991
     * @param string $str
5992
     *
5993
     * @psalm-pure
5994
     *
5995
     * @return string
5996
     */
5997 3
    public static function str_humanize($str): string
5998
    {
5999 3
        $str = \str_replace(
6000
            [
6001 3
                '_id',
6002
                '_',
6003
            ],
6004
            [
6005 3
                '',
6006
                ' ',
6007
            ],
6008 3
            $str
6009
        );
6010
6011 3
        return self::ucfirst(\trim($str));
6012
    }
6013
6014
    /**
6015
     * Check if the string ends with the given substring, case-insensitive.
6016
     *
6017
     * EXAMPLE: <code>
6018
     * UTF8::str_iends_with('BeginMiddleΚόσμε', 'Κόσμε'); // true
6019
     * UTF8::str_iends_with('BeginMiddleΚόσμε', 'κόσμε'); // true
6020
     * </code>
6021
     *
6022
     * @param string $haystack <p>The string to search in.</p>
6023
     * @param string $needle   <p>The substring to search for.</p>
6024
     *
6025
     * @psalm-pure
6026
     *
6027
     * @return bool
6028
     */
6029 12
    public static function str_iends_with(string $haystack, string $needle): bool
6030
    {
6031 12
        if ($needle === '') {
6032 2
            return true;
6033
        }
6034
6035 12
        if ($haystack === '') {
6036
            return false;
6037
        }
6038
6039 12
        return self::strcasecmp(\substr($haystack, -\strlen($needle)), $needle) === 0;
6040
    }
6041
6042
    /**
6043
     * Returns true if the string ends with any of $substrings, false otherwise.
6044
     *
6045
     * - case-insensitive
6046
     *
6047
     * @param string   $str        <p>The input string.</p>
6048
     * @param string[] $substrings <p>Substrings to look for.</p>
6049
     *
6050
     * @psalm-pure
6051
     *
6052
     * @return bool
6053
     *              <p>Whether or not $str ends with $substring.</p>
6054
     */
6055 4
    public static function str_iends_with_any(string $str, array $substrings): bool
6056
    {
6057 4
        if ($substrings === []) {
6058
            return false;
6059
        }
6060
6061 4
        foreach ($substrings as &$substring) {
6062 4
            if (self::str_iends_with($str, $substring)) {
6063 4
                return true;
6064
            }
6065
        }
6066
6067
        return false;
6068
    }
6069
6070
    /**
6071
     * Inserts $substring into the string at the $index provided.
6072
     *
6073
     * @param string $str       <p>The input string.</p>
6074
     * @param string $substring <p>String to be inserted.</p>
6075
     * @param int    $index     <p>The index at which to insert the substring.</p>
6076
     * @param string $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
6077
     *
6078
     * @psalm-pure
6079
     *
6080
     * @return string
6081
     */
6082 8
    public static function str_insert(
6083
        string $str,
6084
        string $substring,
6085
        int $index,
6086
        string $encoding = 'UTF-8'
6087
    ): string {
6088 8
        if ($encoding === 'UTF-8') {
6089 4
            $len = (int) \mb_strlen($str);
6090 4
            if ($index > $len) {
6091
                return $str;
6092
            }
6093
6094
            /** @noinspection UnnecessaryCastingInspection */
6095 4
            return (string) \mb_substr($str, 0, $index) .
6096 4
                   $substring .
6097 4
                   (string) \mb_substr($str, $index, $len);
6098
        }
6099
6100 4
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
6101
6102 4
        $len = (int) self::strlen($str, $encoding);
6103 4
        if ($index > $len) {
6104 1
            return $str;
6105
        }
6106
6107 3
        return ((string) self::substr($str, 0, $index, $encoding)) .
6108 3
               $substring .
6109 3
               ((string) self::substr($str, $index, $len, $encoding));
6110
    }
6111
6112
    /**
6113
     * Case-insensitive and UTF-8 safe version of <function>str_replace</function>.
6114
     *
6115
     * EXAMPLE: <code>
6116
     * UTF8::str_ireplace('lIzÆ', 'lise', 'Iñtërnâtiônàlizætiøn'); // 'Iñtërnâtiônàlisetiøn'
6117
     * </code>
6118
     *
6119
     * @see http://php.net/manual/en/function.str-ireplace.php
6120
     *
6121
     * @param string|string[] $search      <p>
6122
     *                                     Every replacement with search array is
6123
     *                                     performed on the result of previous replacement.
6124
     *                                     </p>
6125
     * @param string|string[] $replacement <p>The replacement.</p>
6126
     * @param string|string[] $subject     <p>
6127
     *                                     If subject is an array, then the search and
6128
     *                                     replace is performed with every entry of
6129
     *                                     subject, and the return value is an array as
6130
     *                                     well.
6131
     *                                     </p>
6132
     * @param int             $count       [optional] <p>
6133
     *                                     The number of matched and replaced needles will
6134
     *                                     be returned in count which is passed by
6135
     *                                     reference.
6136
     *                                     </p>
6137
     *
6138
     * @psalm-pure
6139
     *
6140
     * @return string|string[]
6141
     *                         <p>A string or an array of replacements.</p>
6142
     *
6143
     * @template TStrIReplaceSubject
6144
     * @phpstan-param TStrIReplaceSubject $subject
6145
     * @phpstan-return TStrIReplaceSubject
6146
     */
6147 29
    public static function str_ireplace($search, $replacement, $subject, &$count = null)
6148
    {
6149 29
        $search = (array) $search;
6150
6151
        /** @noinspection AlterInForeachInspection */
6152 29
        foreach ($search as &$s) {
6153 29
            $s = (string) $s;
6154 29
            if ($s === '') {
6155 6
                $s = '/^(?<=.)$/';
6156
            } else {
6157 24
                $s = '/' . \preg_quote($s, '/') . '/ui';
6158
            }
6159
        }
6160
6161
        // fallback
6162
        /** @phpstan-ignore-next-line - only a fallback for PHP8 */
6163 29
        if ($replacement === null) {
0 ignored issues
show
introduced by
The condition $replacement === null is always false.
Loading history...
6164 1
            $replacement = '';
6165
        }
6166
        /** @phpstan-ignore-next-line - only a fallback for PHP8 */
6167 29
        if ($subject === null) {
6168 1
            $subject = '';
6169
        }
6170
6171
        /**
6172
         * @psalm-suppress PossiblyNullArgument
6173
         * @phpstan-var TStrIReplaceSubject $subject
6174
         */
6175 29
        $subject = \preg_replace($search, $replacement, $subject, -1, $count);
6176
6177 29
        return $subject;
6178
    }
6179
6180
    /**
6181
     * Replaces $search from the beginning of string with $replacement.
6182
     *
6183
     * @param string $str         <p>The input string.</p>
6184
     * @param string $search      <p>The string to search for.</p>
6185
     * @param string $replacement <p>The replacement.</p>
6186
     *
6187
     * @psalm-pure
6188
     *
6189
     * @return string
6190
     *                <p>The string after the replacement.</p>
6191
     */
6192 17
    public static function str_ireplace_beginning(string $str, string $search, string $replacement): string
6193
    {
6194 17
        if ($str === '') {
6195 4
            if ($replacement === '') {
6196 2
                return '';
6197
            }
6198
6199 2
            if ($search === '') {
6200 2
                return $replacement;
6201
            }
6202
        }
6203
6204 13
        if ($search === '') {
6205 2
            return $str . $replacement;
6206
        }
6207
6208 11
        $searchLength = \strlen($search);
6209 11
        if (\strncasecmp($str, $search, $searchLength) === 0) {
6210 10
            return $replacement . \substr($str, $searchLength);
6211
        }
6212
6213 1
        return $str;
6214
    }
6215
6216
    /**
6217
     * Replaces $search from the ending of string with $replacement.
6218
     *
6219
     * @param string $str         <p>The input string.</p>
6220
     * @param string $search      <p>The string to search for.</p>
6221
     * @param string $replacement <p>The replacement.</p>
6222
     *
6223
     * @psalm-pure
6224
     *
6225
     * @return string
6226
     *                <p>The string after the replacement.</p>
6227
     */
6228 17
    public static function str_ireplace_ending(string $str, string $search, string $replacement): string
6229
    {
6230 17
        if ($str === '') {
6231 4
            if ($replacement === '') {
6232 2
                return '';
6233
            }
6234
6235 2
            if ($search === '') {
6236 2
                return $replacement;
6237
            }
6238
        }
6239
6240 13
        if ($search === '') {
6241 2
            return $str . $replacement;
6242
        }
6243
6244 11
        if (\stripos($str, $search, \strlen($str) - \strlen($search)) !== false) {
6245 9
            $str = \substr($str, 0, -\strlen($search)) . $replacement;
6246
        }
6247
6248 11
        return $str;
6249
    }
6250
6251
    /**
6252
     * Check if the string starts with the given substring, case-insensitive.
6253
     *
6254
     * EXAMPLE: <code>
6255
     * UTF8::str_istarts_with('ΚόσμεMiddleEnd', 'Κόσμε'); // true
6256
     * UTF8::str_istarts_with('ΚόσμεMiddleEnd', 'κόσμε'); // true
6257
     * </code>
6258
     *
6259
     * @param string $haystack <p>The string to search in.</p>
6260
     * @param string $needle   <p>The substring to search for.</p>
6261
     *
6262
     * @psalm-pure
6263
     *
6264
     * @return bool
6265
     */
6266 13
    public static function str_istarts_with(string $haystack, string $needle): bool
6267
    {
6268 13
        if ($needle === '') {
6269 2
            return true;
6270
        }
6271
6272 13
        if ($haystack === '') {
6273
            return false;
6274
        }
6275
6276 13
        return self::stripos($haystack, $needle) === 0;
6277
    }
6278
6279
    /**
6280
     * Returns true if the string begins with any of $substrings, false otherwise.
6281
     *
6282
     * - case-insensitive
6283
     *
6284
     * @param string $str        <p>The input string.</p>
6285
     * @param array  $substrings <p>Substrings to look for.</p>
6286
     *
6287
     * @psalm-pure
6288
     *
6289
     * @return bool
6290
     *              <p>Whether or not $str starts with $substring.</p>
6291
     */
6292 5
    public static function str_istarts_with_any(string $str, array $substrings): bool
6293
    {
6294 5
        if ($str === '') {
6295
            return false;
6296
        }
6297
6298 5
        if ($substrings === []) {
6299
            return false;
6300
        }
6301
6302 5
        foreach ($substrings as &$substring) {
6303 5
            if (self::str_istarts_with($str, $substring)) {
6304 5
                return true;
6305
            }
6306
        }
6307
6308 1
        return false;
6309
    }
6310
6311
    /**
6312
     * Gets the substring after the first occurrence of a separator.
6313
     *
6314
     * @param string $str       <p>The input string.</p>
6315
     * @param string $separator <p>The string separator.</p>
6316
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
6317
     *
6318
     * @psalm-pure
6319
     *
6320
     * @return string
6321
     */
6322 1
    public static function str_isubstr_after_first_separator(
6323
        string $str,
6324
        string $separator,
6325
        string $encoding = 'UTF-8'
6326
    ): string {
6327 1
        if ($separator === '' || $str === '') {
6328 1
            return '';
6329
        }
6330
6331 1
        $offset = self::stripos($str, $separator);
6332 1
        if ($offset === false) {
6333 1
            return '';
6334
        }
6335
6336 1
        if ($encoding === 'UTF-8') {
6337 1
            return (string) \mb_substr(
6338 1
                $str,
6339 1
                $offset + (int) \mb_strlen($separator)
6340
            );
6341
        }
6342
6343
        return (string) self::substr(
6344
            $str,
6345
            $offset + (int) self::strlen($separator, $encoding),
6346
            null,
6347
            $encoding
6348
        );
6349
    }
6350
6351
    /**
6352
     * Gets the substring after the last occurrence of a separator.
6353
     *
6354
     * @param string $str       <p>The input string.</p>
6355
     * @param string $separator <p>The string separator.</p>
6356
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
6357
     *
6358
     * @psalm-pure
6359
     *
6360
     * @return string
6361
     */
6362 1
    public static function str_isubstr_after_last_separator(
6363
        string $str,
6364
        string $separator,
6365
        string $encoding = 'UTF-8'
6366
    ): string {
6367 1
        if ($separator === '' || $str === '') {
6368 1
            return '';
6369
        }
6370
6371 1
        $offset = self::strripos($str, $separator);
6372 1
        if ($offset === false) {
6373 1
            return '';
6374
        }
6375
6376 1
        if ($encoding === 'UTF-8') {
6377 1
            return (string) \mb_substr(
6378 1
                $str,
6379 1
                $offset + (int) self::strlen($separator)
6380
            );
6381
        }
6382
6383
        return (string) self::substr(
6384
            $str,
6385
            $offset + (int) self::strlen($separator, $encoding),
6386
            null,
6387
            $encoding
6388
        );
6389
    }
6390
6391
    /**
6392
     * Gets the substring before the first occurrence of a separator.
6393
     *
6394
     * @param string $str       <p>The input string.</p>
6395
     * @param string $separator <p>The string separator.</p>
6396
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
6397
     *
6398
     * @psalm-pure
6399
     *
6400
     * @return string
6401
     */
6402 1
    public static function str_isubstr_before_first_separator(
6403
        string $str,
6404
        string $separator,
6405
        string $encoding = 'UTF-8'
6406
    ): string {
6407 1
        if ($separator === '' || $str === '') {
6408 1
            return '';
6409
        }
6410
6411 1
        $offset = self::stripos($str, $separator);
6412 1
        if ($offset === false) {
6413 1
            return '';
6414
        }
6415
6416 1
        if ($encoding === 'UTF-8') {
6417 1
            return (string) \mb_substr($str, 0, $offset);
6418
        }
6419
6420
        return (string) self::substr($str, 0, $offset, $encoding);
6421
    }
6422
6423
    /**
6424
     * Gets the substring before the last occurrence of a separator.
6425
     *
6426
     * @param string $str       <p>The input string.</p>
6427
     * @param string $separator <p>The string separator.</p>
6428
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
6429
     *
6430
     * @psalm-pure
6431
     *
6432
     * @return string
6433
     */
6434 1
    public static function str_isubstr_before_last_separator(
6435
        string $str,
6436
        string $separator,
6437
        string $encoding = 'UTF-8'
6438
    ): string {
6439 1
        if ($separator === '' || $str === '') {
6440 1
            return '';
6441
        }
6442
6443 1
        if ($encoding === 'UTF-8') {
6444 1
            $offset = \mb_strripos($str, $separator);
6445 1
            if ($offset === false) {
6446 1
                return '';
6447
            }
6448
6449 1
            return (string) \mb_substr($str, 0, $offset);
6450
        }
6451
6452
        $offset = self::strripos($str, $separator, 0, $encoding);
6453
        if ($offset === false) {
6454
            return '';
6455
        }
6456
6457
        return (string) self::substr($str, 0, $offset, $encoding);
6458
    }
6459
6460
    /**
6461
     * Gets the substring after (or before via "$before_needle") the first occurrence of the "$needle".
6462
     *
6463
     * @param string $str           <p>The input string.</p>
6464
     * @param string $needle        <p>The string to look for.</p>
6465
     * @param bool   $before_needle [optional] <p>Default: false</p>
6466
     * @param string $encoding      [optional] <p>Default: 'UTF-8'</p>
6467
     *
6468
     * @psalm-pure
6469
     *
6470
     * @return string
6471
     */
6472 2
    public static function str_isubstr_first(
6473
        string $str,
6474
        string $needle,
6475
        bool $before_needle = false,
6476
        string $encoding = 'UTF-8'
6477
    ): string {
6478
        if (
6479 2
            $needle === ''
6480
            ||
6481 2
            $str === ''
6482
        ) {
6483 2
            return '';
6484
        }
6485
6486 2
        $part = self::stristr(
6487 2
            $str,
6488
            $needle,
6489
            $before_needle,
6490
            $encoding
6491
        );
6492 2
        if ($part === false) {
6493 2
            return '';
6494
        }
6495
6496 2
        return $part;
6497
    }
6498
6499
    /**
6500
     * Gets the substring after (or before via "$before_needle") the last occurrence of the "$needle".
6501
     *
6502
     * @param string $str           <p>The input string.</p>
6503
     * @param string $needle        <p>The string to look for.</p>
6504
     * @param bool   $before_needle [optional] <p>Default: false</p>
6505
     * @param string $encoding      [optional] <p>Default: 'UTF-8'</p>
6506
     *
6507
     * @psalm-pure
6508
     *
6509
     * @return string
6510
     */
6511 1
    public static function str_isubstr_last(
6512
        string $str,
6513
        string $needle,
6514
        bool $before_needle = false,
6515
        string $encoding = 'UTF-8'
6516
    ): string {
6517
        if (
6518 1
            $needle === ''
6519
            ||
6520 1
            $str === ''
6521
        ) {
6522 1
            return '';
6523
        }
6524
6525 1
        $part = self::strrichr(
6526 1
            $str,
6527
            $needle,
6528
            $before_needle,
6529
            $encoding
6530
        );
6531 1
        if ($part === false) {
6532 1
            return '';
6533
        }
6534
6535 1
        return $part;
6536
    }
6537
6538
    /**
6539
     * Returns the last $n characters of the string.
6540
     *
6541
     * @param string $str      <p>The input string.</p>
6542
     * @param int    $n        <p>Number of characters to retrieve from the end.</p>
6543
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6544
     *
6545
     * @psalm-pure
6546
     *
6547
     * @return string
6548
     */
6549 12
    public static function str_last_char(
6550
        string $str,
6551
        int $n = 1,
6552
        string $encoding = 'UTF-8'
6553
    ): string {
6554 12
        if ($str === '' || $n <= 0) {
6555 4
            return '';
6556
        }
6557
6558 8
        if ($encoding === 'UTF-8') {
6559 4
            return (string) \mb_substr($str, -$n);
6560
        }
6561
6562 4
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
6563
6564 4
        return (string) self::substr($str, -$n, null, $encoding);
6565
    }
6566
6567
    /**
6568
     * Limit the number of characters in a string.
6569
     *
6570
     * @param string $str        <p>The input string.</p>
6571
     * @param int    $length     [optional] <p>Default: 100</p>
6572
     * @param string $str_add_on [optional] <p>Default: …</p>
6573
     * @param string $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
6574
     *
6575
     * @psalm-pure
6576
     *
6577
     * @return string
6578
     */
6579 2
    public static function str_limit(
6580
        string $str,
6581
        int $length = 100,
6582
        string $str_add_on = '…',
6583
        string $encoding = 'UTF-8'
6584
    ): string {
6585 2
        if ($str === '' || $length <= 0) {
6586 2
            return '';
6587
        }
6588
6589 2
        if ($encoding === 'UTF-8') {
6590 2
            if ((int) \mb_strlen($str) <= $length) {
6591 2
                return $str;
6592
            }
6593
6594
            /** @noinspection UnnecessaryCastingInspection */
6595 2
            return (string) \mb_substr($str, 0, $length - (int) self::strlen($str_add_on)) . $str_add_on;
6596
        }
6597
6598
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
6599
6600
        if ((int) self::strlen($str, $encoding) <= $length) {
6601
            return $str;
6602
        }
6603
6604
        return ((string) self::substr($str, 0, $length - (int) self::strlen($str_add_on), $encoding)) . $str_add_on;
6605
    }
6606
6607
    /**
6608
     * Limit the number of characters in a string, but also after the next word.
6609
     *
6610
     * EXAMPLE: <code>UTF8::str_limit_after_word('fòô bàř fòô', 8, ''); // 'fòô bàř'</code>
6611
     *
6612
     * @param string $str        <p>The input string.</p>
6613
     * @param int    $length     [optional] <p>Default: 100</p>
6614
     * @param string $str_add_on [optional] <p>Default: …</p>
6615
     * @param string $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
6616
     *
6617
     * @psalm-pure
6618
     *
6619
     * @return string
6620
     */
6621 6
    public static function str_limit_after_word(
6622
        string $str,
6623
        int $length = 100,
6624
        string $str_add_on = '…',
6625
        string $encoding = 'UTF-8'
6626
    ): string {
6627 6
        if ($str === '' || $length <= 0) {
6628 2
            return '';
6629
        }
6630
6631 6
        if ($encoding === 'UTF-8') {
6632 2
            if ((int) \mb_strlen($str) <= $length) {
6633 2
                return $str;
6634
            }
6635
6636 2
            if (\mb_substr($str, $length - 1, 1) === ' ') {
6637 2
                return ((string) \mb_substr($str, 0, $length - 1)) . $str_add_on;
6638
            }
6639
6640 2
            $str = \mb_substr($str, 0, $length);
6641
6642 2
            $array = \explode(' ', $str, -1);
6643 2
            $new_str = \implode(' ', $array);
6644
6645 2
            if ($new_str === '') {
6646 2
                return ((string) \mb_substr($str, 0, $length - 1)) . $str_add_on;
6647
            }
6648
        } else {
6649 4
            if ((int) self::strlen($str, $encoding) <= $length) {
6650
                return $str;
6651
            }
6652
6653 4
            if (self::substr($str, $length - 1, 1, $encoding) === ' ') {
6654 3
                return ((string) self::substr($str, 0, $length - 1, $encoding)) . $str_add_on;
6655
            }
6656
6657
            /** @noinspection CallableParameterUseCaseInTypeContextInspection - FP */
6658 1
            $str = self::substr($str, 0, $length, $encoding);
6659 1
            if ($str === false) {
6660
                return '' . $str_add_on;
6661
            }
6662
6663 1
            $array = \explode(' ', $str, -1);
6664 1
            $new_str = \implode(' ', $array);
6665
6666 1
            if ($new_str === '') {
6667
                return ((string) self::substr($str, 0, $length - 1, $encoding)) . $str_add_on;
6668
            }
6669
        }
6670
6671 3
        return $new_str . $str_add_on;
6672
    }
6673
6674
    /**
6675
     * Returns the longest common prefix between the $str1 and $str2.
6676
     *
6677
     * @param string $str1     <p>The input sting.</p>
6678
     * @param string $str2     <p>Second string for comparison.</p>
6679
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6680
     *
6681
     * @psalm-pure
6682
     *
6683
     * @return string
6684
     */
6685 10
    public static function str_longest_common_prefix(
6686
        string $str1,
6687
        string $str2,
6688
        string $encoding = 'UTF-8'
6689
    ): string {
6690
        // init
6691 10
        $longest_common_prefix = '';
6692
6693 10
        if ($encoding === 'UTF-8') {
6694 5
            $max_length = (int) \min(
6695 5
                \mb_strlen($str1),
6696 5
                \mb_strlen($str2)
6697
            );
6698
6699 5
            for ($i = 0; $i < $max_length; ++$i) {
6700 4
                $char = \mb_substr($str1, $i, 1);
6701
6702
                if (
6703 4
                    $char !== false
6704
                    &&
6705 4
                    $char === \mb_substr($str2, $i, 1)
6706
                ) {
6707 3
                    $longest_common_prefix .= $char;
6708
                } else {
6709 3
                    break;
6710
                }
6711
            }
6712
        } else {
6713 5
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
6714
6715 5
            $max_length = (int) \min(
6716 5
                self::strlen($str1, $encoding),
6717 5
                self::strlen($str2, $encoding)
6718
            );
6719
6720 5
            for ($i = 0; $i < $max_length; ++$i) {
6721 4
                $char = self::substr($str1, $i, 1, $encoding);
6722
6723
                if (
6724 4
                    $char !== false
6725
                    &&
6726 4
                    $char === self::substr($str2, $i, 1, $encoding)
6727
                ) {
6728 3
                    $longest_common_prefix .= $char;
6729
                } else {
6730 3
                    break;
6731
                }
6732
            }
6733
        }
6734
6735 10
        return $longest_common_prefix;
6736
    }
6737
6738
    /**
6739
     * Returns the longest common substring between the $str1 and $str2.
6740
     * In the case of ties, it returns that which occurs first.
6741
     *
6742
     * @param string $str1
6743
     * @param string $str2     <p>Second string for comparison.</p>
6744
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6745
     *
6746
     * @psalm-pure
6747
     *
6748
     * @return string
6749
     *                <p>A string with its $str being the longest common substring.</p>
6750
     */
6751 11
    public static function str_longest_common_substring(
6752
        string $str1,
6753
        string $str2,
6754
        string $encoding = 'UTF-8'
6755
    ): string {
6756 11
        if ($str1 === '' || $str2 === '') {
6757 2
            return '';
6758
        }
6759
6760
        // Uses dynamic programming to solve
6761
        // http://en.wikipedia.org/wiki/Longest_common_substring_problem
6762
6763 9
        if ($encoding === 'UTF-8') {
6764 4
            $str_length = (int) \mb_strlen($str1);
6765 4
            $other_length = (int) \mb_strlen($str2);
6766
        } else {
6767 5
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
6768
6769 5
            $str_length = (int) self::strlen($str1, $encoding);
6770 5
            $other_length = (int) self::strlen($str2, $encoding);
6771
        }
6772
6773
        // Return if either string is empty
6774 9
        if ($str_length === 0 || $other_length === 0) {
6775
            return '';
6776
        }
6777
6778 9
        $len = 0;
6779 9
        $end = 0;
6780 9
        $table = \array_fill(
6781 9
            0,
6782 9
            $str_length + 1,
6783 9
            \array_fill(0, $other_length + 1, 0)
6784
        );
6785
6786 9
        if ($encoding === 'UTF-8') {
6787 9
            for ($i = 1; $i <= $str_length; ++$i) {
6788 9
                for ($j = 1; $j <= $other_length; ++$j) {
6789 9
                    $str_char = \mb_substr($str1, $i - 1, 1);
6790 9
                    $other_char = \mb_substr($str2, $j - 1, 1);
6791
6792 9
                    if ($str_char === $other_char) {
6793 8
                        $table[$i][$j] = $table[$i - 1][$j - 1] + 1;
6794 8
                        if ($table[$i][$j] > $len) {
6795 8
                            $len = $table[$i][$j];
6796 8
                            $end = $i;
6797
                        }
6798
                    } else {
6799 9
                        $table[$i][$j] = 0;
6800
                    }
6801
                }
6802
            }
6803
        } else {
6804
            for ($i = 1; $i <= $str_length; ++$i) {
6805
                for ($j = 1; $j <= $other_length; ++$j) {
6806
                    $str_char = self::substr($str1, $i - 1, 1, $encoding);
6807
                    $other_char = self::substr($str2, $j - 1, 1, $encoding);
6808
6809
                    if ($str_char === $other_char) {
6810
                        $table[$i][$j] = $table[$i - 1][$j - 1] + 1;
6811
                        if ($table[$i][$j] > $len) {
6812
                            $len = $table[$i][$j];
6813
                            $end = $i;
6814
                        }
6815
                    } else {
6816
                        $table[$i][$j] = 0;
6817
                    }
6818
                }
6819
            }
6820
        }
6821
6822 9
        if ($encoding === 'UTF-8') {
6823 9
            return (string) \mb_substr($str1, $end - $len, $len);
6824
        }
6825
6826
        return (string) self::substr($str1, $end - $len, $len, $encoding);
6827
    }
6828
6829
    /**
6830
     * Returns the longest common suffix between the $str1 and $str2.
6831
     *
6832
     * @param string $str1
6833
     * @param string $str2     <p>Second string for comparison.</p>
6834
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6835
     *
6836
     * @psalm-pure
6837
     *
6838
     * @return string
6839
     */
6840 10
    public static function str_longest_common_suffix(
6841
        string $str1,
6842
        string $str2,
6843
        string $encoding = 'UTF-8'
6844
    ): string {
6845 10
        if ($str1 === '' || $str2 === '') {
6846 2
            return '';
6847
        }
6848
6849 8
        if ($encoding === 'UTF-8') {
6850 4
            $max_length = (int) \min(
6851 4
                \mb_strlen($str1, $encoding),
6852 4
                \mb_strlen($str2, $encoding)
6853
            );
6854
6855 4
            $longest_common_suffix = '';
6856 4
            for ($i = 1; $i <= $max_length; ++$i) {
6857 4
                $char = \mb_substr($str1, -$i, 1);
6858
6859
                if (
6860 4
                    $char !== false
6861
                    &&
6862 4
                    $char === \mb_substr($str2, -$i, 1)
6863
                ) {
6864 3
                    $longest_common_suffix = $char . $longest_common_suffix;
6865
                } else {
6866 3
                    break;
6867
                }
6868
            }
6869
        } else {
6870 4
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
6871
6872 4
            $max_length = (int) \min(
6873 4
                self::strlen($str1, $encoding),
6874 4
                self::strlen($str2, $encoding)
6875
            );
6876
6877 4
            $longest_common_suffix = '';
6878 4
            for ($i = 1; $i <= $max_length; ++$i) {
6879 4
                $char = self::substr($str1, -$i, 1, $encoding);
6880
6881
                if (
6882 4
                    $char !== false
6883
                    &&
6884 4
                    $char === self::substr($str2, -$i, 1, $encoding)
6885
                ) {
6886 3
                    $longest_common_suffix = $char . $longest_common_suffix;
6887
                } else {
6888 3
                    break;
6889
                }
6890
            }
6891
        }
6892
6893 8
        return $longest_common_suffix;
6894
    }
6895
6896
    /**
6897
     * Returns true if $str matches the supplied pattern, false otherwise.
6898
     *
6899
     * @param string $str     <p>The input string.</p>
6900
     * @param string $pattern <p>Regex pattern to match against.</p>
6901
     *
6902
     * @psalm-pure
6903
     *
6904
     * @return bool
6905
     *              <p>Whether or not $str matches the pattern.</p>
6906
     */
6907 10
    public static function str_matches_pattern(string $str, string $pattern): bool
6908
    {
6909 10
        return (bool) \preg_match('/' . $pattern . '/u', $str);
6910
    }
6911
6912
    /**
6913
     * Returns whether or not a character exists at an index. Offsets may be
6914
     * negative to count from the last character in the string. Implements
6915
     * part of the ArrayAccess interface.
6916
     *
6917
     * @param string $str      <p>The input string.</p>
6918
     * @param int    $offset   <p>The index to check.</p>
6919
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6920
     *
6921
     * @psalm-pure
6922
     *
6923
     * @return bool
6924
     *              <p>Whether or not the index exists.</p>
6925
     */
6926 6
    public static function str_offset_exists(string $str, int $offset, string $encoding = 'UTF-8'): bool
6927
    {
6928
        // init
6929 6
        $length = (int) self::strlen($str, $encoding);
6930
6931 6
        if ($offset >= 0) {
6932 3
            return $length > $offset;
6933
        }
6934
6935 3
        return $length >= \abs($offset);
6936
    }
6937
6938
    /**
6939
     * Returns the character at the given index. Offsets may be negative to
6940
     * count from the last character in the string. Implements part of the
6941
     * ArrayAccess interface, and throws an OutOfBoundsException if the index
6942
     * does not exist.
6943
     *
6944
     * @param string $str      <p>The input string.</p>
6945
     * @param int    $index    <p>The <strong>index</strong> from which to retrieve the char.</p>
6946
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6947
     *
6948
     * @throws \OutOfBoundsException if the positive or negative offset does not exist
6949
     *
6950
     * @return string
6951
     *                <p>The character at the specified index.</p>
6952
     *
6953
     * @psalm-pure
6954
     */
6955 2
    public static function str_offset_get(string $str, int $index, string $encoding = 'UTF-8'): string
6956
    {
6957
        // init
6958 2
        $length = (int) self::strlen($str);
6959
6960
        if (
6961 2
            ($index >= 0 && $length <= $index)
6962
            ||
6963 2
            $length < \abs($index)
6964
        ) {
6965 1
            throw new \OutOfBoundsException('No character exists at the index');
6966
        }
6967
6968 1
        return self::char_at($str, $index, $encoding);
6969
    }
6970
6971
    /**
6972
     * Pad a UTF-8 string to a given length with another string.
6973
     *
6974
     * EXAMPLE: <code>UTF8::str_pad('中文空白', 10, '_', STR_PAD_BOTH); // '___中文空白___'</code>
6975
     *
6976
     * @param string     $str        <p>The input string.</p>
6977
     * @param int        $pad_length <p>The length of return string.</p>
6978
     * @param string     $pad_string [optional] <p>String to use for padding the input string.</p>
6979
     * @param int|string $pad_type   [optional] <p>
6980
     *                               Can be <strong>STR_PAD_RIGHT</strong> (default), [or string "right"]<br>
6981
     *                               <strong>STR_PAD_LEFT</strong> [or string "left"] or<br>
6982
     *                               <strong>STR_PAD_BOTH</strong> [or string "both"]
6983
     *                               </p>
6984
     * @param string     $encoding   [optional] <p>Default: 'UTF-8'</p>
6985
     *
6986
     * @psalm-pure
6987
     *
6988
     * @return string
6989
     *                <p>Returns the padded string.</p>
6990
     */
6991 41
    public static function str_pad(
6992
        string $str,
6993
        int $pad_length,
6994
        string $pad_string = ' ',
6995
        $pad_type = \STR_PAD_RIGHT,
6996
        string $encoding = 'UTF-8'
6997
    ): string {
6998 41
        if ($pad_length === 0 || $pad_string === '') {
6999 1
            return $str;
7000
        }
7001
7002 41
        if ($pad_type !== (int) $pad_type) {
7003 13
            if ($pad_type === 'left') {
7004 3
                $pad_type = \STR_PAD_LEFT;
7005 10
            } elseif ($pad_type === 'right') {
7006 6
                $pad_type = \STR_PAD_RIGHT;
7007 4
            } elseif ($pad_type === 'both') {
7008 3
                $pad_type = \STR_PAD_BOTH;
7009
            } else {
7010 1
                throw new \InvalidArgumentException(
7011 1
                    'Pad expects $pad_type to be "STR_PAD_*" or ' . "to be one of 'left', 'right' or 'both'"
7012
                );
7013
            }
7014
        }
7015
7016 40
        if ($encoding === 'UTF-8') {
7017 25
            $str_length = (int) \mb_strlen($str);
7018
7019 25
            if ($pad_length >= $str_length) {
7020 25
                switch ($pad_type) {
7021
                    case \STR_PAD_LEFT:
7022 8
                        $ps_length = (int) \mb_strlen($pad_string);
7023
7024 8
                        $diff = ($pad_length - $str_length);
7025
7026 8
                        $pre = (string) \mb_substr(
7027 8
                            \str_repeat($pad_string, (int) \ceil($diff / $ps_length)),
7028 8
                            0,
7029 8
                            $diff
7030
                        );
7031 8
                        $post = '';
7032
7033 8
                        break;
7034
7035
                    case \STR_PAD_BOTH:
7036 14
                        $diff = ($pad_length - $str_length);
7037
7038 14
                        $ps_length_left = (int) \floor($diff / 2);
7039
7040 14
                        $ps_length_right = (int) \ceil($diff / 2);
7041
7042 14
                        $pre = (string) \mb_substr(
7043 14
                            \str_repeat($pad_string, $ps_length_left),
7044 14
                            0,
7045 14
                            $ps_length_left
7046
                        );
7047 14
                        $post = (string) \mb_substr(
7048 14
                            \str_repeat($pad_string, $ps_length_right),
7049 14
                            0,
7050 14
                            $ps_length_right
7051
                        );
7052
7053 14
                        break;
7054
7055
                    case \STR_PAD_RIGHT:
7056
                    default:
7057 9
                        $ps_length = (int) \mb_strlen($pad_string);
7058
7059 9
                        $diff = ($pad_length - $str_length);
7060
7061 9
                        $post = (string) \mb_substr(
7062 9
                            \str_repeat($pad_string, (int) \ceil($diff / $ps_length)),
7063 9
                            0,
7064 9
                            $diff
7065
                        );
7066 9
                        $pre = '';
7067
                }
7068
7069 25
                return $pre . $str . $post;
7070
            }
7071
7072 3
            return $str;
7073
        }
7074
7075 15
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
7076
7077 15
        $str_length = (int) self::strlen($str, $encoding);
7078
7079 15
        if ($pad_length >= $str_length) {
7080 14
            switch ($pad_type) {
7081
                case \STR_PAD_LEFT:
7082 5
                    $ps_length = (int) self::strlen($pad_string, $encoding);
7083
7084 5
                    $diff = ($pad_length - $str_length);
7085
7086 5
                    $pre = (string) self::substr(
7087 5
                        \str_repeat($pad_string, (int) \ceil($diff / $ps_length)),
7088 5
                        0,
7089
                        $diff,
7090
                        $encoding
7091
                    );
7092 5
                    $post = '';
7093
7094 5
                    break;
7095
7096
                case \STR_PAD_BOTH:
7097 3
                    $diff = ($pad_length - $str_length);
7098
7099 3
                    $ps_length_left = (int) \floor($diff / 2);
7100
7101 3
                    $ps_length_right = (int) \ceil($diff / 2);
7102
7103 3
                    $pre = (string) self::substr(
7104 3
                        \str_repeat($pad_string, $ps_length_left),
7105 3
                        0,
7106
                        $ps_length_left,
7107
                        $encoding
7108
                    );
7109 3
                    $post = (string) self::substr(
7110 3
                        \str_repeat($pad_string, $ps_length_right),
7111 3
                        0,
7112
                        $ps_length_right,
7113
                        $encoding
7114
                    );
7115
7116 3
                    break;
7117
7118
                case \STR_PAD_RIGHT:
7119
                default:
7120 6
                    $ps_length = (int) self::strlen($pad_string, $encoding);
7121
7122 6
                    $diff = ($pad_length - $str_length);
7123
7124 6
                    $post = (string) self::substr(
7125 6
                        \str_repeat($pad_string, (int) \ceil($diff / $ps_length)),
7126 6
                        0,
7127
                        $diff,
7128
                        $encoding
7129
                    );
7130 6
                    $pre = '';
7131
            }
7132
7133 14
            return $pre . $str . $post;
7134
        }
7135
7136 1
        return $str;
7137
    }
7138
7139
    /**
7140
     * Returns a new string of a given length such that both sides of the
7141
     * string are padded. Alias for "UTF8::str_pad()" with a $pad_type of 'both'.
7142
     *
7143
     * @param string $str
7144
     * @param int    $length   <p>Desired string length after padding.</p>
7145
     * @param string $pad_str  [optional] <p>String used to pad, defaults to space. Default: ' '</p>
7146
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
7147
     *
7148
     * @psalm-pure
7149
     *
7150
     * @return string
7151
     *                <p>The string with padding applied.</p>
7152
     */
7153 11
    public static function str_pad_both(
7154
        string $str,
7155
        int $length,
7156
        string $pad_str = ' ',
7157
        string $encoding = 'UTF-8'
7158
    ): string {
7159 11
        return self::str_pad(
7160 11
            $str,
7161 11
            $length,
7162 11
            $pad_str,
7163 11
            \STR_PAD_BOTH,
7164 11
            $encoding
7165
        );
7166
    }
7167
7168
    /**
7169
     * Returns a new string of a given length such that the beginning of the
7170
     * string is padded. Alias for "UTF8::str_pad()" with a $pad_type of 'left'.
7171
     *
7172
     * @param string $str
7173
     * @param int    $length   <p>Desired string length after padding.</p>
7174
     * @param string $pad_str  [optional] <p>String used to pad, defaults to space. Default: ' '</p>
7175
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
7176
     *
7177
     * @psalm-pure
7178
     *
7179
     * @return string
7180
     *                <p>The string with left padding.</p>
7181
     */
7182 7
    public static function str_pad_left(
7183
        string $str,
7184
        int $length,
7185
        string $pad_str = ' ',
7186
        string $encoding = 'UTF-8'
7187
    ): string {
7188 7
        return self::str_pad(
7189 7
            $str,
7190 7
            $length,
7191 7
            $pad_str,
7192 7
            \STR_PAD_LEFT,
7193 7
            $encoding
7194
        );
7195
    }
7196
7197
    /**
7198
     * Returns a new string of a given length such that the end of the string
7199
     * is padded. Alias for "UTF8::str_pad()" with a $pad_type of 'right'.
7200
     *
7201
     * @param string $str
7202
     * @param int    $length   <p>Desired string length after padding.</p>
7203
     * @param string $pad_str  [optional] <p>String used to pad, defaults to space. Default: ' '</p>
7204
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
7205
     *
7206
     * @psalm-pure
7207
     *
7208
     * @return string
7209
     *                <p>The string with right padding.</p>
7210
     */
7211 7
    public static function str_pad_right(
7212
        string $str,
7213
        int $length,
7214
        string $pad_str = ' ',
7215
        string $encoding = 'UTF-8'
7216
    ): string {
7217 7
        return self::str_pad(
7218 7
            $str,
7219 7
            $length,
7220 7
            $pad_str,
7221 7
            \STR_PAD_RIGHT,
7222 7
            $encoding
7223
        );
7224
    }
7225
7226
    /**
7227
     * Repeat a string.
7228
     *
7229
     * EXAMPLE: <code>UTF8::str_repeat("°~\xf0\x90\x28\xbc", 2); // '°~ð(¼°~ð(¼'</code>
7230
     *
7231
     * @param string $str        <p>
7232
     *                           The string to be repeated.
7233
     *                           </p>
7234
     * @param int    $multiplier <p>
7235
     *                           Number of time the input string should be
7236
     *                           repeated.
7237
     *                           </p>
7238
     *                           <p>
7239
     *                           multiplier has to be greater than or equal to 0.
7240
     *                           If the multiplier is set to 0, the function
7241
     *                           will return an empty string.
7242
     *                           </p>
7243
     *
7244
     * @psalm-pure
7245
     *
7246
     * @return string
7247
     *                <p>The repeated string.</p>
7248
     */
7249 9
    public static function str_repeat(string $str, int $multiplier): string
7250
    {
7251 9
        $str = self::filter($str);
7252
7253 9
        return \str_repeat($str, $multiplier);
7254
    }
7255
7256
    /**
7257
     * INFO: This is only a wrapper for "str_replace()"  -> the original functions is already UTF-8 safe.
7258
     *
7259
     * Replace all occurrences of the search string with the replacement string
7260
     *
7261
     * @see http://php.net/manual/en/function.str-replace.php
7262
     *
7263
     * @param string|string[] $search  <p>
7264
     *                                 The value being searched for, otherwise known as the needle.
7265
     *                                 An array may be used to designate multiple needles.
7266
     *                                 </p>
7267
     * @param string|string[] $replace <p>
7268
     *                                 The replacement value that replaces found search
7269
     *                                 values. An array may be used to designate multiple replacements.
7270
     *                                 </p>
7271
     * @param string|string[] $subject <p>
7272
     *                                 The string or array of strings being searched and replaced on,
7273
     *                                 otherwise known as the haystack.
7274
     *                                 </p>
7275
     *                                 <p>
7276
     *                                 If subject is an array, then the search and
7277
     *                                 replace is performed with every entry of
7278
     *                                 subject, and the return value is an array as
7279
     *                                 well.
7280
     *                                 </p>
7281
     * @param int|null        $count   [optional] <p>
7282
     *                                 If passed, this will hold the number of matched and replaced needles.
7283
     *                                 </p>
7284
     *
7285
     * @psalm-pure
7286
     *
7287
     * @return string|string[]
7288
     *                         <p>This function returns a string or an array with the replaced values.</p>
7289
     *
7290
     * @template TStrReplaceSubject
7291
     * @phpstan-param TStrReplaceSubject $subject
7292
     * @phpstan-return TStrReplaceSubject
7293
     *
7294
     * @deprecated please use \str_replace() instead
7295
     */
7296 12
    public static function str_replace(
7297
        $search,
7298
        $replace,
7299
        $subject,
7300
        int &$count = null
7301
    ) {
7302
        /**
7303
         * @psalm-suppress PossiblyNullArgument
7304
         * @phpstan-var TStrReplaceSubject $return;
7305
         */
7306 12
        $return = \str_replace(
7307 12
            $search,
7308 12
            $replace,
7309 12
            $subject,
7310 12
            $count
7311
        );
7312
7313 12
        return $return;
7314
    }
7315
7316
    /**
7317
     * Replaces $search from the beginning of string with $replacement.
7318
     *
7319
     * @param string $str         <p>The input string.</p>
7320
     * @param string $search      <p>The string to search for.</p>
7321
     * @param string $replacement <p>The replacement.</p>
7322
     *
7323
     * @psalm-pure
7324
     *
7325
     * @return string
7326
     *                <p>A string after the replacements.</p>
7327
     */
7328 17
    public static function str_replace_beginning(
7329
        string $str,
7330
        string $search,
7331
        string $replacement
7332
    ): string {
7333 17
        if ($str === '') {
7334 4
            if ($replacement === '') {
7335 2
                return '';
7336
            }
7337
7338 2
            if ($search === '') {
7339 2
                return $replacement;
7340
            }
7341
        }
7342
7343 13
        if ($search === '') {
7344 2
            return $str . $replacement;
7345
        }
7346
7347 11
        $searchLength = \strlen($search);
7348 11
        if (\strncmp($str, $search, $searchLength) === 0) {
7349 9
            return $replacement . \substr($str, $searchLength);
7350
        }
7351
7352 2
        return $str;
7353
    }
7354
7355
    /**
7356
     * Replaces $search from the ending of string with $replacement.
7357
     *
7358
     * @param string $str         <p>The input string.</p>
7359
     * @param string $search      <p>The string to search for.</p>
7360
     * @param string $replacement <p>The replacement.</p>
7361
     *
7362
     * @psalm-pure
7363
     *
7364
     * @return string
7365
     *                <p>A string after the replacements.</p>
7366
     */
7367 17
    public static function str_replace_ending(
7368
        string $str,
7369
        string $search,
7370
        string $replacement
7371
    ): string {
7372 17
        if ($str === '') {
7373 4
            if ($replacement === '') {
7374 2
                return '';
7375
            }
7376
7377 2
            if ($search === '') {
7378 2
                return $replacement;
7379
            }
7380
        }
7381
7382 13
        if ($search === '') {
7383 2
            return $str . $replacement;
7384
        }
7385
7386 11
        if (\strpos($str, $search, \strlen($str) - \strlen($search)) !== false) {
7387 8
            $str = \substr($str, 0, -\strlen($search)) . $replacement;
7388
        }
7389
7390 11
        return $str;
7391
    }
7392
7393
    /**
7394
     * Replace the first "$search"-term with the "$replace"-term.
7395
     *
7396
     * @param string $search
7397
     * @param string $replace
7398
     * @param string $subject
7399
     *
7400
     * @psalm-pure
7401
     *
7402
     * @return string
7403
     *
7404
     * @psalm-suppress InvalidReturnType
7405
     */
7406 2
    public static function str_replace_first(
7407
        string $search,
7408
        string $replace,
7409
        string $subject
7410
    ): string {
7411 2
        $pos = self::strpos($subject, $search);
7412
7413 2
        if ($pos !== false) {
7414
            /**
7415
             * @psalm-suppress InvalidReturnStatement
7416
             */
7417 2
            return self::substr_replace(
7418 2
                $subject,
7419
                $replace,
7420
                $pos,
7421 2
                (int) self::strlen($search)
7422
            );
7423
        }
7424
7425
        return $subject;
7426
    }
7427
7428
    /**
7429
     * Replace the last "$search"-term with the "$replace"-term.
7430
     *
7431
     * @param string $search
7432
     * @param string $replace
7433
     * @param string $subject
7434
     *
7435
     * @psalm-pure
7436
     *
7437
     * @return string
7438
     *
7439
     * @psalm-suppress InvalidReturnType
7440
     */
7441 2
    public static function str_replace_last(
7442
        string $search,
7443
        string $replace,
7444
        string $subject
7445
    ): string {
7446 2
        $pos = self::strrpos($subject, $search);
7447 2
        if ($pos !== false) {
7448
            /**
7449
             * @psalm-suppress InvalidReturnStatement
7450
             */
7451 2
            return self::substr_replace(
7452 2
                $subject,
7453
                $replace,
7454
                $pos,
7455 2
                (int) self::strlen($search)
7456
            );
7457
        }
7458
7459
        return $subject;
7460
    }
7461
7462
    /**
7463
     * Shuffles all the characters in the string.
7464
     *
7465
     * INFO: uses random algorithm which is weak for cryptography purposes
7466
     *
7467
     * EXAMPLE: <code>UTF8::str_shuffle('fòô bàř fòô'); // 'àòôřb ffòô '</code>
7468
     *
7469
     * @param string $str      <p>The input string</p>
7470
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
7471
     *
7472
     * @return string
7473
     *                <p>The shuffled string.</p>
7474
     */
7475 5
    public static function str_shuffle(string $str, string $encoding = 'UTF-8'): string
7476
    {
7477 5
        if ($encoding === 'UTF-8') {
7478 5
            $indexes = \range(0, (int) \mb_strlen($str) - 1);
7479 5
            \shuffle($indexes);
7480
7481
            // init
7482 5
            $shuffled_str = '';
7483
7484 5
            foreach ($indexes as &$i) {
7485 5
                $tmp_sub_str = \mb_substr($str, $i, 1);
7486 5
                if ($tmp_sub_str !== false) {
7487 5
                    $shuffled_str .= $tmp_sub_str;
7488
                }
7489
            }
7490
        } else {
7491
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
7492
7493
            $indexes = \range(0, (int) self::strlen($str, $encoding) - 1);
7494
            \shuffle($indexes);
7495
7496
            // init
7497
            $shuffled_str = '';
7498
7499
            foreach ($indexes as &$i) {
7500
                $tmp_sub_str = self::substr($str, $i, 1, $encoding);
7501
                if ($tmp_sub_str !== false) {
7502
                    $shuffled_str .= $tmp_sub_str;
7503
                }
7504
            }
7505
        }
7506
7507 5
        return $shuffled_str;
7508
    }
7509
7510
    /**
7511
     * Returns the substring beginning at $start, and up to, but not including
7512
     * the index specified by $end. If $end is omitted, the function extracts
7513
     * the remaining string. If $end is negative, it is computed from the end
7514
     * of the string.
7515
     *
7516
     * @param string   $str
7517
     * @param int      $start    <p>Initial index from which to begin extraction.</p>
7518
     * @param int|null $end      [optional] <p>Index at which to end extraction. Default: null</p>
7519
     * @param string   $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
7520
     *
7521
     * @psalm-pure
7522
     *
7523
     * @return false|string
7524
     *                      <p>The extracted substring.</p><p>If <i>str</i> is shorter than <i>start</i>
7525
     *                      characters long, <b>FALSE</b> will be returned.
7526
     */
7527 18
    public static function str_slice(
7528
        string $str,
7529
        int $start,
7530
        int $end = null,
7531
        string $encoding = 'UTF-8'
7532
    ) {
7533 18
        if ($encoding === 'UTF-8') {
7534 7
            if ($end === null) {
7535 1
                $length = (int) \mb_strlen($str);
7536 6
            } elseif ($end >= 0 && $end <= $start) {
7537 2
                return '';
7538 4
            } elseif ($end < 0) {
7539 1
                $length = (int) \mb_strlen($str) + $end - $start;
7540
            } else {
7541 3
                $length = $end - $start;
7542
            }
7543
7544 5
            return \mb_substr($str, $start, $length);
7545
        }
7546
7547 11
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
7548
7549 11
        if ($end === null) {
7550 5
            $length = (int) self::strlen($str, $encoding);
7551 6
        } elseif ($end >= 0 && $end <= $start) {
7552 2
            return '';
7553 4
        } elseif ($end < 0) {
7554 1
            $length = (int) self::strlen($str, $encoding) + $end - $start;
7555
        } else {
7556 3
            $length = $end - $start;
7557
        }
7558
7559 9
        return self::substr($str, $start, $length, $encoding);
7560
    }
7561
7562
    /**
7563
     * Convert a string to e.g.: "snake_case"
7564
     *
7565
     * @param string $str
7566
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
7567
     *
7568
     * @psalm-pure
7569
     *
7570
     * @return string
7571
     *                <p>A string in snake_case.</p>
7572
     */
7573 22
    public static function str_snakeize(string $str, string $encoding = 'UTF-8'): string
7574
    {
7575 22
        if ($str === '') {
7576
            return '';
7577
        }
7578
7579 22
        $str = \str_replace(
7580 22
            '-',
7581 22
            '_',
7582 22
            self::normalize_whitespace($str)
7583
        );
7584
7585 22
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
7586 19
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
7587
        }
7588
7589 22
        $str = (string) \preg_replace_callback(
7590 22
            '/([\\p{N}|\\p{Lu}])/u',
7591
            /**
7592
             * @param string[] $matches
7593
             *
7594
             * @psalm-pure
7595
             *
7596
             * @return string
7597
             */
7598 22
            static function (array $matches) use ($encoding): string {
7599 9
                $match = $matches[1];
7600 9
                $match_int = (int) $match;
7601
7602 9
                if ((string) $match_int === $match) {
7603 4
                    return '_' . $match . '_';
7604
                }
7605
7606 5
                if ($encoding === 'UTF-8') {
7607 5
                    return '_' . \mb_strtolower($match);
7608
                }
7609
7610
                return '_' . self::strtolower($match, $encoding);
7611 22
            },
7612 22
            $str
7613
        );
7614
7615 22
        $str = (string) \preg_replace(
7616
            [
7617 22
                '/\\s+/u',           // convert spaces to "_"
7618
                '/^\\s+|\\s+$/u', // trim leading & trailing spaces
7619
                '/_+/',                 // remove double "_"
7620
            ],
7621
            [
7622 22
                '_',
7623
                '',
7624
                '_',
7625
            ],
7626 22
            $str
7627
        );
7628
7629 22
        return \trim(\trim($str, '_')); // trim leading & trailing "_" + whitespace
7630
    }
7631
7632
    /**
7633
     * Sort all characters according to code points.
7634
     *
7635
     * EXAMPLE: <code>UTF8::str_sort('  -ABC-中文空白-  '); // '    ---ABC中文白空'</code>
7636
     *
7637
     * @param string $str    <p>A UTF-8 string.</p>
7638
     * @param bool   $unique <p>Sort unique. If <strong>true</strong>, repeated characters are ignored.</p>
7639
     * @param bool   $desc   <p>If <strong>true</strong>, will sort characters in reverse code point order.</p>
7640
     *
7641
     * @psalm-pure
7642
     *
7643
     * @return string
7644
     *                <p>A string of sorted characters.</p>
7645
     */
7646 2
    public static function str_sort(string $str, bool $unique = false, bool $desc = false): string
7647
    {
7648 2
        $array = self::codepoints($str);
7649
7650 2
        if ($unique) {
7651 2
            $array = \array_flip(\array_flip($array));
7652
        }
7653
7654 2
        if ($desc) {
7655 2
            \arsort($array);
7656
        } else {
7657 2
            \asort($array);
7658
        }
7659
7660 2
        return self::string($array);
7661
    }
7662
7663
    /**
7664
     * Convert a string to an array of Unicode characters.
7665
     *
7666
     * EXAMPLE: <code>
7667
     * UTF8::str_split_array(['中文空白', 'test'], 2); // [['中文', '空白'], ['te', 'st']]
7668
     * </code>
7669
     *
7670
     * @param int[]|string[] $input                   <p>The string[] or int[] to split into array.</p>
7671
     * @param int            $length                  [optional] <p>Max character length of each array
7672
     *                                                lement.</p>
7673
     * @param bool           $clean_utf8              [optional] <p>Remove non UTF-8 chars from the
7674
     *                                                string.</p>
7675
     * @param bool           $try_to_use_mb_functions [optional] <p>Set to false, if you don't want to use
7676
     *                                                "mb_substr"</p>
7677
     *
7678
     * @psalm-pure
7679
     *
7680
     * @return string[][]
7681
     *                    <p>An array containing chunks of the input.</p>
7682
     */
7683 1
    public static function str_split_array(
7684
        array $input,
7685
        int $length = 1,
7686
        bool $clean_utf8 = false,
7687
        bool $try_to_use_mb_functions = true
7688
    ): array {
7689 1
        foreach ($input as &$v) {
7690 1
            $v = self::str_split(
7691 1
                $v,
7692
                $length,
7693
                $clean_utf8,
7694
                $try_to_use_mb_functions
7695
            );
7696
        }
7697
7698
        /** @var string[][] $input */
7699 1
        return $input;
7700
    }
7701
7702
    /**
7703
     * Convert a string to an array of unicode characters.
7704
     *
7705
     * EXAMPLE: <code>UTF8::str_split('中文空白'); // array('中', '文', '空', '白')</code>
7706
     *
7707
     * @param int|string $input                   <p>The string or int to split into array.</p>
7708
     * @param int        $length                  [optional] <p>Max character length of each array
7709
     *                                            element.</p>
7710
     * @param bool       $clean_utf8              [optional] <p>Remove non UTF-8 chars from the
7711
     *                                            string.</p>
7712
     * @param bool       $try_to_use_mb_functions [optional] <p>Set to false, if you don't want to use
7713
     *                                            "mb_substr"</p>
7714
     *
7715
     * @psalm-pure
7716
     *
7717
     * @return string[]
7718
     *                  <p>An array containing chunks of chars from the input.</p>
7719
     */
7720 96
    public static function str_split(
7721
        $input,
7722
        int $length = 1,
7723
        bool $clean_utf8 = false,
7724
        bool $try_to_use_mb_functions = true
7725
    ): array {
7726 96
        if ($length <= 0) {
7727 3
            return [];
7728
        }
7729
7730
        // this is only an old fallback
7731
        /** @noinspection PhpSillyAssignmentInspection - hack for phpstan */
7732
        /** @var int|int[]|string|string[] $input */
7733 95
        $input = $input;
7734 95
        if (\is_array($input)) {
7735
            /** @psalm-suppress InvalidReturnStatement */
7736
            /** @phpstan-ignore-next-line - old code :/ */
7737
            return self::str_split_array(
0 ignored issues
show
Bug Best Practice introduced by
The expression return self::str_split_a...ry_to_use_mb_functions) returns the type array<mixed,string[]> which is incompatible with the documented return type string[].
Loading history...
7738
                $input,
7739
                $length,
7740
                $clean_utf8,
7741
                $try_to_use_mb_functions
7742
            );
7743
        }
7744
7745
        // init
7746 95
        $input = (string) $input;
7747
7748 95
        if ($input === '') {
7749 14
            return [];
7750
        }
7751
7752 92
        if ($clean_utf8) {
7753 25
            $input = self::clean($input);
7754
        }
7755
7756
        if (
7757 92
            $try_to_use_mb_functions
7758
            &&
7759 92
            self::$SUPPORT['mbstring'] === true
7760
        ) {
7761 87
            if (\function_exists('mb_str_split')) {
7762
                /**
7763
                 * @psalm-suppress ImpureFunctionCall - why?
7764
                 */
7765 87
                $return = \mb_str_split($input, $length);
7766 87
                if ($return !== false) {
7767 87
                    return $return;
7768
                }
7769
            }
7770
7771
            $i_max = \mb_strlen($input);
7772
            if ($i_max <= 127) {
7773
                $ret = [];
7774
                for ($i = 0; $i < $i_max; ++$i) {
7775
                    $ret[] = \mb_substr($input, $i, 1);
7776
                }
7777
            } else {
7778
                $return_array = [];
7779
                \preg_match_all('/./us', $input, $return_array);
7780
                $ret = $return_array[0] ?? [];
7781
            }
7782 29
        } elseif (self::$SUPPORT['pcre_utf8'] === true) {
7783 22
            $return_array = [];
7784 22
            \preg_match_all('/./us', $input, $return_array);
7785 22
            $ret = $return_array[0] ?? [];
7786
        } else {
7787
7788
            // fallback
7789
7790 9
            $ret = [];
7791 9
            $len = \strlen($input);
7792
7793 9
            for ($i = 0; $i < $len; ++$i) {
7794 9
                if (($input[$i] & "\x80") === "\x00") {
7795 9
                    $ret[] = $input[$i];
7796
                } elseif (
7797 8
                    isset($input[$i + 1])
7798
                    &&
7799 8
                    ($input[$i] & "\xE0") === "\xC0"
7800
                ) {
7801 4
                    if (($input[$i + 1] & "\xC0") === "\x80") {
7802 4
                        $ret[] = $input[$i] . $input[$i + 1];
7803
7804 4
                        ++$i;
7805
                    }
7806
                } elseif (
7807 6
                    isset($input[$i + 2])
7808
                    &&
7809 6
                    ($input[$i] & "\xF0") === "\xE0"
7810
                ) {
7811
                    if (
7812 6
                        ($input[$i + 1] & "\xC0") === "\x80"
7813
                        &&
7814 6
                        ($input[$i + 2] & "\xC0") === "\x80"
7815
                    ) {
7816 6
                        $ret[] = $input[$i] . $input[$i + 1] . $input[$i + 2];
7817
7818 6
                        $i += 2;
7819
                    }
7820
                } elseif (
7821
                    isset($input[$i + 3])
7822
                    &&
7823
                    ($input[$i] & "\xF8") === "\xF0"
7824
                ) {
7825
                    if (
7826
                        ($input[$i + 1] & "\xC0") === "\x80"
7827
                        &&
7828
                        ($input[$i + 2] & "\xC0") === "\x80"
7829
                        &&
7830
                        ($input[$i + 3] & "\xC0") === "\x80"
7831
                    ) {
7832
                        $ret[] = $input[$i] . $input[$i + 1] . $input[$i + 2] . $input[$i + 3];
7833
7834
                        $i += 3;
7835
                    }
7836
                }
7837
            }
7838
        }
7839
7840 29
        if ($length > 1) {
7841 2
            return \array_map(
7842 2
                static function (array $item): string {
7843 2
                    return \implode('', $item);
7844 2
                },
7845 2
                \array_chunk($ret, $length)
7846
            );
7847
        }
7848
7849 29
        if (isset($ret[0]) && $ret[0] === '') {
7850
            return [];
7851
        }
7852
7853 29
        return $ret;
7854
    }
7855
7856
    /**
7857
     * Splits the string with the provided regular expression, returning an
7858
     * array of strings. An optional integer $limit will truncate the
7859
     * results.
7860
     *
7861
     * @param string $str
7862
     * @param string $pattern <p>The regex with which to split the string.</p>
7863
     * @param int    $limit   [optional] <p>Maximum number of results to return. Default: -1 === no limit</p>
7864
     *
7865
     * @psalm-pure
7866
     *
7867
     * @return string[]
7868
     *                  <p>An array of strings.</p>
7869
     */
7870 16
    public static function str_split_pattern(string $str, string $pattern, int $limit = -1): array
7871
    {
7872 16
        if ($limit === 0) {
7873 2
            return [];
7874
        }
7875
7876 14
        if ($pattern === '') {
7877 1
            return [$str];
7878
        }
7879
7880 13
        if (self::$SUPPORT['mbstring'] === true) {
7881 13
            if ($limit >= 0) {
7882 8
                $result_tmp = \mb_split($pattern, $str);
7883 8
                if ($result_tmp === false) {
7884
                    return [];
7885
                }
7886
7887 8
                $result = [];
7888 8
                foreach ($result_tmp as $item_tmp) {
7889 8
                    if ($limit === 0) {
7890 4
                        break;
7891
                    }
7892 8
                    --$limit;
7893
7894 8
                    $result[] = $item_tmp;
7895
                }
7896
7897 8
                return $result;
7898
            }
7899
7900 5
            $result = \mb_split($pattern, $str);
7901 5
            if ($result === false) {
7902
                return [];
7903
            }
7904
7905 5
            return $result;
7906
        }
7907
7908
        if ($limit > 0) {
7909
            ++$limit;
7910
        } else {
7911
            $limit = -1;
7912
        }
7913
7914
        $array = \preg_split('/' . \preg_quote($pattern, '/') . '/u', $str, $limit);
7915
        if ($array === false) {
7916
            return [];
7917
        }
7918
7919
        if ($limit > 0 && \count($array) === $limit) {
7920
            \array_pop($array);
7921
        }
7922
7923
        return $array;
7924
    }
7925
7926
    /**
7927
     * Check if the string starts with the given substring.
7928
     *
7929
     * EXAMPLE: <code>
7930
     * UTF8::str_starts_with('ΚόσμεMiddleEnd', 'Κόσμε'); // true
7931
     * UTF8::str_starts_with('ΚόσμεMiddleEnd', 'κόσμε'); // false
7932
     * </code>
7933
     *
7934
     * @param string $haystack <p>The string to search in.</p>
7935
     * @param string $needle   <p>The substring to search for.</p>
7936
     *
7937
     * @psalm-pure
7938
     *
7939
     * @return bool
7940
     */
7941 19
    public static function str_starts_with(string $haystack, string $needle): bool
7942
    {
7943 19
        if ($needle === '') {
7944 2
            return true;
7945
        }
7946
7947 19
        if ($haystack === '') {
7948 1
            return false;
7949
        }
7950
7951 19
        if (\PHP_VERSION_ID >= 80000) {
7952
            /** @phpstan-ignore-next-line - only for PHP8 */
7953 19
            return \str_starts_with($haystack, $needle);
7954
        }
7955
7956
        return \strncmp($haystack, $needle, \strlen($needle)) === 0;
7957
    }
7958
7959
    /**
7960
     * Returns true if the string begins with any of $substrings, false otherwise.
7961
     *
7962
     * - case-sensitive
7963
     *
7964
     * @param string $str        <p>The input string.</p>
7965
     * @param array  $substrings <p>Substrings to look for.</p>
7966
     *
7967
     * @psalm-pure
7968
     *
7969
     * @return bool
7970
     *              <p>Whether or not $str starts with $substring.</p>
7971
     */
7972 8
    public static function str_starts_with_any(string $str, array $substrings): bool
7973
    {
7974 8
        if ($str === '') {
7975
            return false;
7976
        }
7977
7978 8
        if ($substrings === []) {
7979
            return false;
7980
        }
7981
7982 8
        foreach ($substrings as &$substring) {
7983 8
            if (self::str_starts_with($str, $substring)) {
7984 2
                return true;
7985
            }
7986
        }
7987
7988 6
        return false;
7989
    }
7990
7991
    /**
7992
     * Gets the substring after the first occurrence of a separator.
7993
     *
7994
     * @param string $str       <p>The input string.</p>
7995
     * @param string $separator <p>The string separator.</p>
7996
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
7997
     *
7998
     * @psalm-pure
7999
     *
8000
     * @return string
8001
     */
8002 1
    public static function str_substr_after_first_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
8003
    {
8004 1
        if ($separator === '' || $str === '') {
8005 1
            return '';
8006
        }
8007
8008 1
        if ($encoding === 'UTF-8') {
8009 1
            $offset = \mb_strpos($str, $separator);
8010 1
            if ($offset === false) {
8011 1
                return '';
8012
            }
8013
8014 1
            return (string) \mb_substr(
8015 1
                $str,
8016 1
                $offset + (int) \mb_strlen($separator)
8017
            );
8018
        }
8019
8020
        $offset = self::strpos($str, $separator, 0, $encoding);
8021
        if ($offset === false) {
8022
            return '';
8023
        }
8024
8025
        return (string) \mb_substr(
8026
            $str,
8027
            $offset + (int) self::strlen($separator, $encoding),
8028
            null,
8029
            $encoding
8030
        );
8031
    }
8032
8033
    /**
8034
     * Gets the substring after the last occurrence of a separator.
8035
     *
8036
     * @param string $str       <p>The input string.</p>
8037
     * @param string $separator <p>The string separator.</p>
8038
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
8039
     *
8040
     * @psalm-pure
8041
     *
8042
     * @return string
8043
     */
8044 1
    public static function str_substr_after_last_separator(
8045
        string $str,
8046
        string $separator,
8047
        string $encoding = 'UTF-8'
8048
    ): string {
8049 1
        if ($separator === '' || $str === '') {
8050 1
            return '';
8051
        }
8052
8053 1
        if ($encoding === 'UTF-8') {
8054 1
            $offset = \mb_strrpos($str, $separator);
8055 1
            if ($offset === false) {
8056 1
                return '';
8057
            }
8058
8059 1
            return (string) \mb_substr(
8060 1
                $str,
8061 1
                $offset + (int) \mb_strlen($separator)
8062
            );
8063
        }
8064
8065
        $offset = self::strrpos($str, $separator, 0, $encoding);
8066
        if ($offset === false) {
8067
            return '';
8068
        }
8069
8070
        return (string) self::substr(
8071
            $str,
8072
            $offset + (int) self::strlen($separator, $encoding),
8073
            null,
8074
            $encoding
8075
        );
8076
    }
8077
8078
    /**
8079
     * Gets the substring before the first occurrence of a separator.
8080
     *
8081
     * @param string $str       <p>The input string.</p>
8082
     * @param string $separator <p>The string separator.</p>
8083
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
8084
     *
8085
     * @psalm-pure
8086
     *
8087
     * @return string
8088
     */
8089 1
    public static function str_substr_before_first_separator(
8090
        string $str,
8091
        string $separator,
8092
        string $encoding = 'UTF-8'
8093
    ): string {
8094 1
        if ($separator === '' || $str === '') {
8095 1
            return '';
8096
        }
8097
8098 1
        if ($encoding === 'UTF-8') {
8099 1
            $offset = \mb_strpos($str, $separator);
8100 1
            if ($offset === false) {
8101 1
                return '';
8102
            }
8103
8104 1
            return (string) \mb_substr(
8105 1
                $str,
8106 1
                0,
8107 1
                $offset
8108
            );
8109
        }
8110
8111
        $offset = self::strpos($str, $separator, 0, $encoding);
8112
        if ($offset === false) {
8113
            return '';
8114
        }
8115
8116
        return (string) self::substr(
8117
            $str,
8118
            0,
8119
            $offset,
8120
            $encoding
8121
        );
8122
    }
8123
8124
    /**
8125
     * Gets the substring before the last occurrence of a separator.
8126
     *
8127
     * @param string $str       <p>The input string.</p>
8128
     * @param string $separator <p>The string separator.</p>
8129
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
8130
     *
8131
     * @psalm-pure
8132
     *
8133
     * @return string
8134
     */
8135 1
    public static function str_substr_before_last_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
8136
    {
8137 1
        if ($separator === '' || $str === '') {
8138 1
            return '';
8139
        }
8140
8141 1
        if ($encoding === 'UTF-8') {
8142 1
            $offset = \mb_strrpos($str, $separator);
8143 1
            if ($offset === false) {
8144 1
                return '';
8145
            }
8146
8147 1
            return (string) \mb_substr(
8148 1
                $str,
8149 1
                0,
8150 1
                $offset
8151
            );
8152
        }
8153
8154
        $offset = self::strrpos($str, $separator, 0, $encoding);
8155
        if ($offset === false) {
8156
            return '';
8157
        }
8158
8159
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
8160
8161
        return (string) self::substr(
8162
            $str,
8163
            0,
8164
            $offset,
8165
            $encoding
8166
        );
8167
    }
8168
8169
    /**
8170
     * Gets the substring after (or before via "$before_needle") the first occurrence of the "$needle".
8171
     *
8172
     * @param string $str           <p>The input string.</p>
8173
     * @param string $needle        <p>The string to look for.</p>
8174
     * @param bool   $before_needle [optional] <p>Default: false</p>
8175
     * @param string $encoding      [optional] <p>Default: 'UTF-8'</p>
8176
     *
8177
     * @psalm-pure
8178
     *
8179
     * @return string
8180
     */
8181 2
    public static function str_substr_first(
8182
        string $str,
8183
        string $needle,
8184
        bool $before_needle = false,
8185
        string $encoding = 'UTF-8'
8186
    ): string {
8187 2
        if ($str === '' || $needle === '') {
8188 2
            return '';
8189
        }
8190
8191 2
        if ($encoding === 'UTF-8') {
8192 2
            if ($before_needle) {
8193 1
                $part = \mb_strstr(
8194 1
                    $str,
8195 1
                    $needle,
8196 1
                    $before_needle
8197
                );
8198
            } else {
8199 2
                $part = \mb_strstr(
8200 1
                    $str,
8201 1
                    $needle
8202
                );
8203
            }
8204
        } else {
8205
            $part = self::strstr(
8206
                $str,
8207
                $needle,
8208
                $before_needle,
8209
                $encoding
8210
            );
8211
        }
8212
8213 2
        return $part === false ? '' : $part;
8214
    }
8215
8216
    /**
8217
     * Gets the substring after (or before via "$before_needle") the last occurrence of the "$needle".
8218
     *
8219
     * @param string $str           <p>The input string.</p>
8220
     * @param string $needle        <p>The string to look for.</p>
8221
     * @param bool   $before_needle [optional] <p>Default: false</p>
8222
     * @param string $encoding      [optional] <p>Default: 'UTF-8'</p>
8223
     *
8224
     * @psalm-pure
8225
     *
8226
     * @return string
8227
     */
8228 2
    public static function str_substr_last(
8229
        string $str,
8230
        string $needle,
8231
        bool $before_needle = false,
8232
        string $encoding = 'UTF-8'
8233
    ): string {
8234 2
        if ($str === '' || $needle === '') {
8235 2
            return '';
8236
        }
8237
8238 2
        if ($encoding === 'UTF-8') {
8239 2
            if ($before_needle) {
8240 1
                $part = \mb_strrchr(
8241 1
                    $str,
8242 1
                    $needle,
8243 1
                    $before_needle
8244
                );
8245
            } else {
8246 2
                $part = \mb_strrchr(
8247 1
                    $str,
8248 1
                    $needle
8249
                );
8250
            }
8251
        } else {
8252
            $part = self::strrchr(
8253
                $str,
8254
                $needle,
8255
                $before_needle,
8256
                $encoding
8257
            );
8258
        }
8259
8260 2
        return $part === false ? '' : $part;
8261
    }
8262
8263
    /**
8264
     * Surrounds $str with the given substring.
8265
     *
8266
     * @param string $str
8267
     * @param string $substring <p>The substring to add to both sides.</p>
8268
     *
8269
     * @psalm-pure
8270
     *
8271
     * @return string
8272
     *                <p>A string with the substring both prepended and appended.</p>
8273
     */
8274 5
    public static function str_surround(string $str, string $substring): string
8275
    {
8276 5
        return $substring . $str . $substring;
8277
    }
8278
8279
    /**
8280
     * Returns a trimmed string with the first letter of each word capitalized.
8281
     * Also accepts an array, $ignore, allowing you to list words not to be
8282
     * capitalized.
8283
     *
8284
     * @param string              $str
8285
     * @param array|string[]|null $ignore                        [optional] <p>An array of words not to capitalize or
8286
     *                                                           null. Default: null</p>
8287
     * @param string              $encoding                      [optional] <p>Default: 'UTF-8'</p>
8288
     * @param bool                $clean_utf8                    [optional] <p>Remove non UTF-8 chars from the
8289
     *                                                           string.</p>
8290
     * @param string|null         $lang                          [optional] <p>Set the language for special cases: az,
8291
     *                                                           el, lt, tr</p>
8292
     * @param bool                $try_to_keep_the_string_length [optional] <p>true === try to keep the string length:
8293
     *                                                           e.g. ẞ -> ß</p>
8294
     * @param bool                $use_trim_first                [optional] <p>true === trim the input string,
8295
     *                                                           first</p>
8296
     * @param string|null         $word_define_chars             [optional] <p>An string of chars that will be used as
8297
     *                                                           whitespace separator === words.</p>
8298
     *
8299
     * @psalm-pure
8300
     *
8301
     * @return string
8302
     *                <p>The titleized string.</p>
8303
     */
8304 10
    public static function str_titleize(
8305
        string $str,
8306
        array $ignore = null,
8307
        string $encoding = 'UTF-8',
8308
        bool $clean_utf8 = false,
8309
        string $lang = null,
8310
        bool $try_to_keep_the_string_length = false,
8311
        bool $use_trim_first = true,
8312
        string $word_define_chars = null
8313
    ): string {
8314 10
        if ($str === '') {
8315
            return '';
8316
        }
8317
8318 10
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
8319 9
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
8320
        }
8321
8322 10
        if ($use_trim_first) {
8323 10
            $str = \trim($str);
8324
        }
8325
8326 10
        if ($clean_utf8) {
8327
            $str = self::clean($str);
8328
        }
8329
8330 10
        $use_mb_functions = $lang === null && !$try_to_keep_the_string_length;
8331
8332 10
        if ($word_define_chars) {
8333 4
            $word_define_chars = \preg_quote($word_define_chars, '/');
8334
        } else {
8335 6
            $word_define_chars = '';
8336
        }
8337
8338 10
        $str = (string) \preg_replace_callback(
8339 10
            '/([^\\s' . $word_define_chars . ']+)/u',
8340 10
            static function (array $match) use ($try_to_keep_the_string_length, $lang, $ignore, $use_mb_functions, $encoding): string {
8341 10
                if ($ignore !== null && \in_array($match[0], $ignore, true)) {
8342 4
                    return $match[0];
8343
                }
8344
8345 10
                if ($use_mb_functions) {
8346 10
                    if ($encoding === 'UTF-8') {
8347 10
                        return \mb_strtoupper(\mb_substr($match[0], 0, 1))
8348 10
                               . \mb_strtolower(\mb_substr($match[0], 1));
8349
                    }
8350
8351
                    return \mb_strtoupper(\mb_substr($match[0], 0, 1, $encoding), $encoding)
8352
                           . \mb_strtolower(\mb_substr($match[0], 1, null, $encoding), $encoding);
8353
                }
8354
8355
                return self::ucfirst(
8356
                    self::strtolower(
8357
                        $match[0],
8358
                        $encoding,
8359
                        false,
8360
                        $lang,
8361
                        $try_to_keep_the_string_length
8362
                    ),
8363
                    $encoding,
8364
                    false,
8365
                    $lang,
8366
                    $try_to_keep_the_string_length
8367
                );
8368 10
            },
8369 10
            $str
8370
        );
8371
8372 10
        return $str;
8373
    }
8374
8375
    /**
8376
     * Convert a string into a obfuscate string.
8377
     *
8378
     * EXAMPLE: <code>
8379
     *
8380
     * UTF8::str_obfuscate('[email protected]', 0.5, '*', ['@', '.']); // e.g. "l***@m**lleke*.*r*"
8381
     * </code>
8382
     *
8383
     * @param string   $str
8384
     * @param float    $percent
8385
     * @param string   $obfuscateChar
8386
     * @param string[] $keepChars
8387
     *
8388
     * @psalm-pure
8389
     *
8390
     * @return string
8391
     *                <p>The obfuscate string.</p>
8392
     */
8393 1
    public static function str_obfuscate(
8394
        string $str,
8395
        float $percent = 0.5,
8396
        string $obfuscateChar = '*',
8397
        array $keepChars = []
8398
    ): string {
8399 1
        $obfuscateCharHelper = "\u{2603}";
8400 1
        $str = \str_replace($obfuscateChar, $obfuscateCharHelper, $str);
8401
8402 1
        $chars = self::chars($str);
8403 1
        $charsMax = \count($chars);
8404 1
        $charsMaxChange = \round($charsMax * $percent);
8405 1
        $charsCounter = 0;
8406 1
        $charKeyDone = [];
8407
8408 1
        while ($charsCounter < $charsMaxChange) {
8409 1
            foreach ($chars as $charKey => $char) {
8410 1
                if (isset($charKeyDone[$charKey])) {
8411 1
                    continue;
8412
                }
8413
8414 1
                if (\random_int(0, 100) > 50) {
8415 1
                    continue;
8416
                }
8417
8418 1
                if ($char === $obfuscateChar) {
8419
                    continue;
8420
                }
8421
8422 1
                ++$charsCounter;
8423 1
                $charKeyDone[$charKey] = true;
8424
8425 1
                if ($charsCounter > $charsMaxChange) {
8426
                    break;
8427
                }
8428
8429 1
                if (\in_array($char, $keepChars, true)) {
8430 1
                    continue;
8431
                }
8432
8433 1
                $chars[$charKey] = $obfuscateChar;
8434
            }
8435
        }
8436
8437 1
        $str = \implode('', $chars);
8438
8439 1
        return \str_replace($obfuscateCharHelper, $obfuscateChar, $str);
8440
    }
8441
8442
    /**
8443
     * Returns a trimmed string in proper title case.
8444
     *
8445
     * Also accepts an array, $ignore, allowing you to list words not to be
8446
     * capitalized.
8447
     *
8448
     * Adapted from John Gruber's script.
8449
     *
8450
     * @see https://gist.github.com/gruber/9f9e8650d68b13ce4d78
8451
     *
8452
     * @param string $str
8453
     * @param array  $ignore   <p>An array of words not to capitalize.</p>
8454
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
8455
     *
8456
     * @psalm-pure
8457
     *
8458
     * @return string
8459
     *                <p>The titleized string.</p>
8460
     */
8461 35
    public static function str_titleize_for_humans(
8462
        string $str,
8463
        array $ignore = [],
8464
        string $encoding = 'UTF-8'
8465
    ): string {
8466 35
        if ($str === '') {
8467
            return '';
8468
        }
8469
8470 35
        $small_words = [
8471
            '(?<!q&)a',
8472
            'an',
8473
            'and',
8474
            'as',
8475
            'at(?!&t)',
8476
            'but',
8477
            'by',
8478
            'en',
8479
            'for',
8480
            'if',
8481
            'in',
8482
            'of',
8483
            'on',
8484
            'or',
8485
            'the',
8486
            'to',
8487
            'v[.]?',
8488
            'via',
8489
            'vs[.]?',
8490
        ];
8491
8492 35
        if ($ignore !== []) {
8493 1
            $small_words = \array_merge($small_words, $ignore);
8494
        }
8495
8496 35
        $small_words_rx = \implode('|', $small_words);
8497 35
        $apostrophe_rx = '(?x: [\'’] [[:lower:]]* )?';
8498
8499 35
        $str = \trim($str);
8500
8501 35
        if (!self::has_lowercase($str)) {
8502 2
            $str = self::strtolower($str, $encoding);
8503
        }
8504
8505
        // the main substitutions
8506 35
        $str = (string) \preg_replace_callback(
8507
            '~\\b (_*) (?:                                                           # 1. Leading underscore and
8508
                        ( (?<=[ ][/\\\\]) [[:alpha:]]+ [-_[:alpha:]/\\\\]+ |                # 2. file path or 
8509 35
                          [-_[:alpha:]]+ [@.:] [-_[:alpha:]@.:/]+ ' . $apostrophe_rx . ' )  #    URL, domain, or email
8510
                        |
8511 35
                        ( (?i: ' . $small_words_rx . ' ) ' . $apostrophe_rx . ' )           # 3. or small word (case-insensitive)
8512
                        |
8513 35
                        ( [[:alpha:]] [[:lower:]\'’()\[\]{}]* ' . $apostrophe_rx . ' )     # 4. or word w/o internal caps
8514
                        |
8515 35
                        ( [[:alpha:]] [[:alpha:]\'’()\[\]{}]* ' . $apostrophe_rx . ' )     # 5. or some other word
8516
                      ) (_*) \\b                                                          # 6. With trailing underscore
8517
                    ~ux',
8518
            /**
8519
             * @param string[] $matches
8520
             *
8521
             * @psalm-pure
8522
             *
8523
             * @return string
8524
             */
8525 35
            static function (array $matches) use ($encoding): string {
8526
                // preserve leading underscore
8527 35
                $str = $matches[1];
8528 35
                if ($matches[2]) {
8529
                    // preserve URLs, domains, emails and file paths
8530 5
                    $str .= $matches[2];
8531 35
                } elseif ($matches[3]) {
8532
                    // lower-case small words
8533 25
                    $str .= self::strtolower($matches[3], $encoding);
8534 35
                } elseif ($matches[4]) {
8535
                    // capitalize word w/o internal caps
8536 34
                    $str .= static::ucfirst($matches[4], $encoding);
8537
                } else {
8538
                    // preserve other kinds of word (iPhone)
8539 7
                    $str .= $matches[5];
8540
                }
8541
                // preserve trailing underscore
8542 35
                $str .= $matches[6];
8543
8544 35
                return $str;
8545 35
            },
8546 35
            $str
8547
        );
8548
8549
        // Exceptions for small words: capitalize at start of title...
8550 35
        $str = (string) \preg_replace_callback(
8551
            '~(  \\A [[:punct:]]*            # start of title...
8552
                      |  [:.;?!][ ]+                # or of subsentence...
8553
                      |  [ ][\'"“‘(\[][ ]* )        # or of inserted subphrase...
8554 35
                      ( ' . $small_words_rx . ' ) \\b # ...followed by small word
8555
                     ~uxi',
8556
            /**
8557
             * @param string[] $matches
8558
             *
8559
             * @psalm-pure
8560
             *
8561
             * @return string
8562
             */
8563 35
            static function (array $matches) use ($encoding): string {
8564 11
                return $matches[1] . static::ucfirst($matches[2], $encoding);
8565 35
            },
8566 35
            $str
8567
        );
8568
8569
        // ...and end of title
8570 35
        $str = (string) \preg_replace_callback(
8571 35
            '~\\b ( ' . $small_words_rx . ' ) # small word...
8572
                      (?= [[:punct:]]* \Z          # ...at the end of the title...
8573
                      |   [\'"’”)\]] [ ] )         # ...or of an inserted subphrase?
8574
                     ~uxi',
8575
            /**
8576
             * @param string[] $matches
8577
             *
8578
             * @psalm-pure
8579
             *
8580
             * @return string
8581
             */
8582 35
            static function (array $matches) use ($encoding): string {
8583 3
                return static::ucfirst($matches[1], $encoding);
8584 35
            },
8585 35
            $str
8586
        );
8587
8588
        // Exceptions for small words in hyphenated compound words.
8589
        // e.g. "in-flight" -> In-Flight
8590 35
        $str = (string) \preg_replace_callback(
8591
            '~\\b
8592
                        (?<! -)                   # Negative lookbehind for a hyphen; we do not want to match man-in-the-middle but do want (in-flight)
8593 35
                        ( ' . $small_words_rx . ' )
8594
                        (?= -[[:alpha:]]+)        # lookahead for "-someword"
8595
                       ~uxi',
8596
            /**
8597
             * @param string[] $matches
8598
             *
8599
             * @psalm-pure
8600
             *
8601
             * @return string
8602
             */
8603 35
            static function (array $matches) use ($encoding): string {
8604
                return static::ucfirst($matches[1], $encoding);
8605 35
            },
8606 35
            $str
8607
        );
8608
8609
        // e.g. "Stand-in" -> "Stand-In" (Stand is already capped at this point)
8610 35
        $str = (string) \preg_replace_callback(
8611
            '~\\b
8612
                      (?<!…)                    # Negative lookbehind for a hyphen; we do not want to match man-in-the-middle but do want (stand-in)
8613
                      ( [[:alpha:]]+- )         # $1 = first word and hyphen, should already be properly capped
8614 35
                      ( ' . $small_words_rx . ' ) # ...followed by small word
8615
                      (?!	- )                 # Negative lookahead for another -
8616
                     ~uxi',
8617
            /**
8618
             * @param string[] $matches
8619
             *
8620
             * @psalm-pure
8621
             *
8622
             * @return string
8623
             */
8624 35
            static function (array $matches) use ($encoding): string {
8625
                return $matches[1] . static::ucfirst($matches[2], $encoding);
8626 35
            },
8627 35
            $str
8628
        );
8629
8630 35
        return $str;
8631
    }
8632
8633
    /**
8634
     * Get a binary representation of a specific string.
8635
     *
8636
     * EXAPLE: <code>UTF8::str_to_binary('😃'); // '11110000100111111001100010000011'</code>
8637
     *
8638
     * @param string $str <p>The input string.</p>
8639
     *
8640
     * @psalm-pure
8641
     *
8642
     * @return false|string
8643
     *                      <p>false on error</p>
8644
     */
8645 2
    public static function str_to_binary(string $str)
8646
    {
8647
        /** @var array|false $value - needed for PhpStan (stubs error) */
8648 2
        $value = \unpack('H*', $str);
8649 2
        if ($value === false) {
8650
            return false;
8651
        }
8652
8653
        /** @noinspection OffsetOperationsInspection */
8654 2
        return \base_convert($value[1], 16, 2);
8655
    }
8656
8657
    /**
8658
     * @param string   $str
8659
     * @param bool     $remove_empty_values <p>Remove empty values.</p>
8660
     * @param int|null $remove_short_values <p>The min. string length or null to disable</p>
8661
     *
8662
     * @psalm-pure
8663
     *
8664
     * @return string[]
8665
     */
8666 17
    public static function str_to_lines(string $str, bool $remove_empty_values = false, int $remove_short_values = null): array
8667
    {
8668 17
        if ($str === '') {
8669 1
            return $remove_empty_values ? [] : [''];
8670
        }
8671
8672 16
        if (self::$SUPPORT['mbstring'] === true) {
8673 16
            $return = \mb_split("[\r\n]{1,2}", $str);
8674
        } else {
8675
            $return = \preg_split("/[\r\n]{1,2}/u", $str);
8676
        }
8677
8678 16
        if ($return === false) {
8679
            return $remove_empty_values ? [] : [''];
8680
        }
8681
8682
        if (
8683 16
            $remove_short_values === null
8684
            &&
8685 16
            !$remove_empty_values
8686
        ) {
8687 16
            return $return;
8688
        }
8689
8690
        return self::reduce_string_array(
8691
            $return,
8692
            $remove_empty_values,
8693
            $remove_short_values
8694
        );
8695
    }
8696
8697
    /**
8698
     * Convert a string into an array of words.
8699
     *
8700
     * EXAMPLE: <code>UTF8::str_to_words('中文空白 oöäü#s', '#') // array('', '中文空白', ' ', 'oöäü#s', '')</code>
8701
     *
8702
     * @param string   $str
8703
     * @param string   $char_list           <p>Additional chars for the definition of "words".</p>
8704
     * @param bool     $remove_empty_values <p>Remove empty values.</p>
8705
     * @param int|null $remove_short_values <p>The min. string length or null to disable</p>
8706
     *
8707
     * @psalm-pure
8708
     *
8709
     * @return string[]
8710
     */
8711 16
    public static function str_to_words(
8712
        string $str,
8713
        string $char_list = '',
8714
        bool $remove_empty_values = false,
8715
        int $remove_short_values = null
8716
    ): array {
8717 16
        if ($str === '') {
8718 4
            return $remove_empty_values ? [] : [''];
8719
        }
8720
8721 16
        $char_list = self::rxClass($char_list, '\pL');
8722
8723 16
        $return = \preg_split("/({$char_list}+(?:[\p{Pd}’']{$char_list}+)*)/u", $str, -1, \PREG_SPLIT_DELIM_CAPTURE);
8724 16
        if ($return === false) {
8725
            return $remove_empty_values ? [] : [''];
8726
        }
8727
8728
        if (
8729 16
            $remove_short_values === null
8730
            &&
8731 16
            !$remove_empty_values
8732
        ) {
8733 16
            return $return;
0 ignored issues
show
Bug Best Practice introduced by
The expression return $return returns an array which contains values of type array which are incompatible with the documented value type string.
Loading history...
8734
        }
8735
8736 2
        $tmp_return = self::reduce_string_array(
8737 2
            $return,
8738
            $remove_empty_values,
8739
            $remove_short_values
8740
        );
8741
8742 2
        foreach ($tmp_return as &$item) {
8743 2
            $item = (string) $item;
8744
        }
8745
8746 2
        return $tmp_return;
8747
    }
8748
8749
    /**
8750
     * Truncates the string to a given length. If $substring is provided, and
8751
     * truncating occurs, the string is further truncated so that the substring
8752
     * may be appended without exceeding the desired length.
8753
     *
8754
     * @param string $str
8755
     * @param int    $length    <p>Desired length of the truncated string.</p>
8756
     * @param string $substring [optional] <p>The substring to append if it can fit. Default: ''</p>
8757
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
8758
     *
8759
     * @psalm-pure
8760
     *
8761
     * @return string
8762
     *                <p>A string after truncating.</p>
8763
     */
8764 22
    public static function str_truncate(
8765
        string $str,
8766
        int $length,
8767
        string $substring = '',
8768
        string $encoding = 'UTF-8'
8769
    ): string {
8770 22
        if ($str === '') {
8771
            return '';
8772
        }
8773
8774 22
        if ($encoding === 'UTF-8') {
8775 10
            if ($length >= (int) \mb_strlen($str)) {
8776 2
                return $str;
8777
            }
8778
8779 8
            if ($substring !== '') {
8780 4
                $length -= (int) \mb_strlen($substring);
8781
8782
                /** @noinspection UnnecessaryCastingInspection */
8783 4
                return (string) \mb_substr($str, 0, $length) . $substring;
8784
            }
8785
8786 4
            return (string) \mb_substr($str, 0, $length);
8787
        }
8788
8789 12
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
8790
8791 12
        if ($length >= (int) self::strlen($str, $encoding)) {
8792 2
            return $str;
8793
        }
8794
8795 10
        if ($substring !== '') {
8796 6
            $length -= (int) self::strlen($substring, $encoding);
8797
        }
8798
8799
        return (
8800 10
               (string) self::substr(
8801 10
                   $str,
8802 10
                   0,
8803
                   $length,
8804
                   $encoding
8805
               )
8806 10
               ) . $substring;
8807
    }
8808
8809
    /**
8810
     * Truncates the string to a given length, while ensuring that it does not
8811
     * split words. If $substring is provided, and truncating occurs, the
8812
     * string is further truncated so that the substring may be appended without
8813
     * exceeding the desired length.
8814
     *
8815
     * @param string $str
8816
     * @param int    $length                                 <p>Desired length of the truncated string.</p>
8817
     * @param string $substring                              [optional] <p>The substring to append if it can fit.
8818
     *                                                       Default:
8819
     *                                                       ''</p>
8820
     * @param string $encoding                               [optional] <p>Default: 'UTF-8'</p>
8821
     * @param bool   $ignore_do_not_split_words_for_one_word [optional] <p>Default: false</p>
8822
     *
8823
     * @psalm-pure
8824
     *
8825
     * @return string
8826
     *                <p>A string after truncating.</p>
8827
     */
8828 47
    public static function str_truncate_safe(
8829
        string $str,
8830
        int $length,
8831
        string $substring = '',
8832
        string $encoding = 'UTF-8',
8833
        bool $ignore_do_not_split_words_for_one_word = false
8834
    ): string {
8835 47
        if ($str === '' || $length <= 0) {
8836 1
            return $substring;
8837
        }
8838
8839 47
        if ($encoding === 'UTF-8') {
8840 21
            if ($length >= (int) \mb_strlen($str)) {
8841 5
                return $str;
8842
            }
8843
8844
            // need to further trim the string so we can append the substring
8845 17
            $length -= (int) \mb_strlen($substring);
8846 17
            if ($length <= 0) {
8847 1
                return $substring;
8848
            }
8849
8850
            /** @var false|string $truncated - needed for PhpStan (stubs error) */
8851 17
            $truncated = \mb_substr($str, 0, $length);
8852 17
            if ($truncated === false) {
8853
                return '';
8854
            }
8855
8856
            // if the last word was truncated
8857 17
            $space_position = \mb_strpos($str, ' ', $length - 1);
8858 17
            if ($space_position !== $length) {
8859
                // find pos of the last occurrence of a space, get up to that
8860 13
                $last_position = \mb_strrpos($truncated, ' ', 0);
8861
8862
                if (
8863 13
                    $last_position !== false
8864
                    ||
8865
                    (
8866 3
                        $space_position !== false
8867
                        &&
8868 13
                        !$ignore_do_not_split_words_for_one_word
8869
                    )
8870
                ) {
8871 17
                    $truncated = (string) \mb_substr($truncated, 0, (int) $last_position);
8872
                }
8873
            }
8874
        } else {
8875 26
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
8876
8877 26
            if ($length >= (int) self::strlen($str, $encoding)) {
8878 4
                return $str;
8879
            }
8880
8881
            // need to further trim the string so we can append the substring
8882 22
            $length -= (int) self::strlen($substring, $encoding);
8883 22
            if ($length <= 0) {
8884
                return $substring;
8885
            }
8886
8887 22
            $truncated = self::substr($str, 0, $length, $encoding);
8888
8889 22
            if ($truncated === false) {
8890
                return '';
8891
            }
8892
8893
            // if the last word was truncated
8894 22
            $space_position = self::strpos($str, ' ', $length - 1, $encoding);
8895 22
            if ($space_position !== $length) {
8896
                // find pos of the last occurrence of a space, get up to that
8897 12
                $last_position = self::strrpos($truncated, ' ', 0, $encoding);
8898
8899
                if (
8900 12
                    $last_position !== false
8901
                    ||
8902
                    (
8903 4
                        $space_position !== false
8904
                        &&
8905 12
                        !$ignore_do_not_split_words_for_one_word
8906
                    )
8907
                ) {
8908 9
                    $truncated = (string) self::substr($truncated, 0, (int) $last_position, $encoding);
8909
                }
8910
            }
8911
        }
8912
8913 39
        return $truncated . $substring;
8914
    }
8915
8916
    /**
8917
     * Returns a lowercase and trimmed string separated by underscores.
8918
     * Underscores are inserted before uppercase characters (with the exception
8919
     * of the first character of the string), and in place of spaces as well as
8920
     * dashes.
8921
     *
8922
     * @param string $str
8923
     *
8924
     * @psalm-pure
8925
     *
8926
     * @return string
8927
     *                <p>The underscored string.</p>
8928
     */
8929 16
    public static function str_underscored(string $str): string
8930
    {
8931 16
        return self::str_delimit($str, '_');
8932
    }
8933
8934
    /**
8935
     * Returns an UpperCamelCase version of the supplied string. It trims
8936
     * surrounding spaces, capitalizes letters following digits, spaces, dashes
8937
     * and underscores, and removes spaces, dashes, underscores.
8938
     *
8939
     * @param string      $str                           <p>The input string.</p>
8940
     * @param string      $encoding                      [optional] <p>Default: 'UTF-8'</p>
8941
     * @param bool        $clean_utf8                    [optional] <p>Remove non UTF-8 chars from the string.</p>
8942
     * @param string|null $lang                          [optional] <p>Set the language for special cases: az, el, lt,
8943
     *                                                   tr</p>
8944
     * @param bool        $try_to_keep_the_string_length [optional] <p>true === try to keep the string length: e.g. ẞ
8945
     *                                                   -> ß</p>
8946
     *
8947
     * @psalm-pure
8948
     *
8949
     * @return string
8950
     *                <p>A string in UpperCamelCase.</p>
8951
     */
8952 13
    public static function str_upper_camelize(
8953
        string $str,
8954
        string $encoding = 'UTF-8',
8955
        bool $clean_utf8 = false,
8956
        string $lang = null,
8957
        bool $try_to_keep_the_string_length = false
8958
    ): string {
8959 13
        return self::ucfirst(self::str_camelize($str, $encoding), $encoding, $clean_utf8, $lang, $try_to_keep_the_string_length);
8960
    }
8961
8962
    /**
8963
     * Get the number of words in a specific string.
8964
     *
8965
     * EXAMPLES: <code>
8966
     * // format: 0 -> return only word count (int)
8967
     * //
8968
     * UTF8::str_word_count('中文空白 öäü abc#c'); // 4
8969
     * UTF8::str_word_count('中文空白 öäü abc#c', 0, '#'); // 3
8970
     *
8971
     * // format: 1 -> return words (array)
8972
     * //
8973
     * UTF8::str_word_count('中文空白 öäü abc#c', 1); // array('中文空白', 'öäü', 'abc', 'c')
8974
     * UTF8::str_word_count('中文空白 öäü abc#c', 1, '#'); // array('中文空白', 'öäü', 'abc#c')
8975
     *
8976
     * // format: 2 -> return words with offset (array)
8977
     * //
8978
     * UTF8::str_word_count('中文空白 öäü ab#c', 2); // array(0 => '中文空白', 5 => 'öäü', 9 => 'abc', 13 => 'c')
8979
     * UTF8::str_word_count('中文空白 öäü ab#c', 2, '#'); // array(0 => '中文空白', 5 => 'öäü', 9 => 'abc#c')
8980
     * </code>
8981
     *
8982
     * @param string $str       <p>The input string.</p>
8983
     * @param int    $format    [optional] <p>
8984
     *                          <strong>0</strong> => return a number of words (default)<br>
8985
     *                          <strong>1</strong> => return an array of words<br>
8986
     *                          <strong>2</strong> => return an array of words with word-offset as key
8987
     *                          </p>
8988
     * @param string $char_list [optional] <p>Additional chars that contains to words and do not start a new word.</p>
8989
     *
8990
     * @psalm-pure
8991
     *
8992
     * @return int|string[]
8993
     *                      <p>The number of words in the string.</p>
8994
     */
8995 2
    public static function str_word_count(string $str, int $format = 0, string $char_list = '')
8996
    {
8997 2
        $str_parts = self::str_to_words($str, $char_list);
8998
8999 2
        $len = \count($str_parts);
9000
9001 2
        if ($format === 1) {
9002 2
            $number_of_words = [];
9003 2
            for ($i = 1; $i < $len; $i += 2) {
9004 2
                $number_of_words[] = $str_parts[$i];
9005
            }
9006 2
        } elseif ($format === 2) {
9007 2
            $number_of_words = [];
9008 2
            $offset = (int) self::strlen($str_parts[0]);
9009 2
            for ($i = 1; $i < $len; $i += 2) {
9010 2
                $number_of_words[$offset] = $str_parts[$i];
9011 2
                $offset += (int) self::strlen($str_parts[$i]) + (int) self::strlen($str_parts[$i + 1]);
9012
            }
9013
        } else {
9014 2
            $number_of_words = (int) (($len - 1) / 2);
9015
        }
9016
9017 2
        return $number_of_words;
9018
    }
9019
9020
    /**
9021
     * Case-insensitive string comparison.
9022
     *
9023
     * INFO: Case-insensitive version of UTF8::strcmp()
9024
     *
9025
     * EXAMPLE: <code>UTF8::strcasecmp("iñtërnâtiôn\nàlizætiøn", "Iñtërnâtiôn\nàlizætiøn"); // 0</code>
9026
     *
9027
     * @param string $str1     <p>The first string.</p>
9028
     * @param string $str2     <p>The second string.</p>
9029
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
9030
     *
9031
     * @psalm-pure
9032
     *
9033
     * @return int
9034
     *             <strong>&lt; 0</strong> if str1 is less than str2;<br>
9035
     *             <strong>&gt; 0</strong> if str1 is greater than str2,<br>
9036
     *             <strong>0</strong> if they are equal
9037
     */
9038 23
    public static function strcasecmp(
9039
        string $str1,
9040
        string $str2,
9041
        string $encoding = 'UTF-8'
9042
    ): int {
9043 23
        return self::strcmp(
9044 23
            self::strtocasefold(
9045 23
                $str1,
9046 23
                true,
9047 23
                false,
9048
                $encoding,
9049 23
                null,
9050 23
                false
9051
            ),
9052 23
            self::strtocasefold(
9053 23
                $str2,
9054 23
                true,
9055 23
                false,
9056
                $encoding,
9057 23
                null,
9058 23
                false
9059
            )
9060
        );
9061
    }
9062
9063
    /**
9064
     * Case-sensitive string comparison.
9065
     *
9066
     * EXAMPLE: <code>UTF8::strcmp("iñtërnâtiôn\nàlizætiøn", "iñtërnâtiôn\nàlizætiøn"); // 0</code>
9067
     *
9068
     * @param string $str1 <p>The first string.</p>
9069
     * @param string $str2 <p>The second string.</p>
9070
     *
9071
     * @psalm-pure
9072
     *
9073
     * @return int
9074
     *             <strong>&lt; 0</strong> if str1 is less than str2<br>
9075
     *             <strong>&gt; 0</strong> if str1 is greater than str2<br>
9076
     *             <strong>0</strong> if they are equal
9077
     */
9078 29
    public static function strcmp(string $str1, string $str2): int
9079
    {
9080 29
        if ($str1 === $str2) {
9081 21
            return 0;
9082
        }
9083
9084 24
        return \strcmp(
9085
            /** @phpstan-ignore-next-line - we use only NFD */
9086 24
            \Normalizer::normalize($str1, \Normalizer::NFD),
9087
            /** @phpstan-ignore-next-line - we use only NFD */
9088 24
            \Normalizer::normalize($str2, \Normalizer::NFD)
9089
        );
9090
    }
9091
9092
    /**
9093
     * Find length of initial segment not matching mask.
9094
     *
9095
     * @param string   $str
9096
     * @param string   $char_list
9097
     * @param int      $offset
9098
     * @param int|null $length
9099
     * @param string   $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
9100
     *
9101
     * @psalm-pure
9102
     *
9103
     * @return int
9104
     */
9105 12
    public static function strcspn(
9106
        string $str,
9107
        string $char_list,
9108
        int $offset = 0,
9109
        int $length = null,
9110
        string $encoding = 'UTF-8'
9111
    ): int {
9112 12
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
9113
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
9114
        }
9115
9116 12
        if ($char_list === '') {
9117 2
            return (int) self::strlen($str, $encoding);
9118
        }
9119
9120 11
        if ($offset || $length !== null) {
9121 3
            if ($encoding === 'UTF-8') {
9122 3
                if ($length === null) {
9123 2
                    $str_tmp = \mb_substr($str, $offset);
9124
                } else {
9125 3
                    $str_tmp = \mb_substr($str, $offset, $length);
9126
                }
9127
            } else {
9128
                $str_tmp = self::substr($str, $offset, $length, $encoding);
9129
            }
9130
9131 3
            if ($str_tmp === false) {
9132
                return 0;
9133
            }
9134
9135 3
            $str = $str_tmp;
9136
        }
9137
9138 11
        if ($str === '') {
9139 2
            return 0;
9140
        }
9141
9142 10
        $matches = [];
9143 10
        if (\preg_match('/^(.*?)' . self::rxClass($char_list) . '/us', $str, $matches)) {
9144 9
            $return = self::strlen($matches[1], $encoding);
9145 9
            if ($return === false) {
9146
                return 0;
9147
            }
9148
9149 9
            return $return;
9150
        }
9151
9152 2
        return (int) self::strlen($str, $encoding);
9153
    }
9154
9155
    /**
9156
     * Create a UTF-8 string from code points.
9157
     *
9158
     * INFO: opposite to UTF8::codepoints()
9159
     *
9160
     * EXAMPLE: <code>UTF8::string(array(246, 228, 252)); // 'öäü'</code>
9161
     *
9162
     * @param int|int[]|string|string[] $intOrHex <p>Integer or Hexadecimal codepoints.</p>
9163
     *
9164
     * @phpstan-param int[]|numeric-string[]|int|numeric-string $intOrHex
9165
     *
9166
     * @psalm-pure
9167
     *
9168
     * @return string
9169
     *                <p>A UTF-8 encoded string.</p>
9170
     */
9171 4
    public static function string($intOrHex): string
9172
    {
9173 4
        if ($intOrHex === []) {
9174 4
            return '';
9175
        }
9176
9177 4
        if (!\is_array($intOrHex)) {
9178 1
            $intOrHex = [$intOrHex];
9179
        }
9180
9181 4
        $str = '';
9182 4
        foreach ($intOrHex as $strPart) {
9183 4
            $str .= '&#' . (int) $strPart . ';';
9184
        }
9185
9186 4
        return self::html_entity_decode($str, \ENT_QUOTES | \ENT_HTML5);
9187
    }
9188
9189
    /**
9190
     * Checks if string starts with "BOM" (Byte Order Mark Character) character.
9191
     *
9192
     * EXAMPLE: <code>UTF8::string_has_bom("\xef\xbb\xbf foobar"); // true</code>
9193
     *
9194
     * @param string $str <p>The input string.</p>
9195
     *
9196
     * @psalm-pure
9197
     *
9198
     * @return bool
9199
     *              <p>
9200
     *              <strong>true</strong> if the string has BOM at the start,<br>
9201
     *              <strong>false</strong> otherwise
9202
     *              </p>
9203
     */
9204 6
    public static function string_has_bom(string $str): bool
9205
    {
9206 6
        foreach (self::$BOM as $bom_string => &$bom_byte_length) {
9207 6
            if (\strncmp($str, $bom_string, $bom_byte_length) === 0) {
9208 6
                return true;
9209
            }
9210
        }
9211
9212 6
        return false;
9213
    }
9214
9215
    /**
9216
     * Strip HTML and PHP tags from a string + clean invalid UTF-8.
9217
     *
9218
     * EXAMPLE: <code>UTF8::strip_tags("<span>κόσμε\xa0\xa1</span>"); // 'κόσμε'</code>
9219
     *
9220
     * @see http://php.net/manual/en/function.strip-tags.php
9221
     *
9222
     * @param string      $str            <p>
9223
     *                                    The input string.
9224
     *                                    </p>
9225
     * @param string|null $allowable_tags [optional] <p>
9226
     *                                    You can use the optional second parameter to specify tags which should
9227
     *                                    not be stripped.
9228
     *                                    </p>
9229
     *                                    <p>
9230
     *                                    HTML comments and PHP tags are also stripped. This is hardcoded and
9231
     *                                    can not be changed with allowable_tags.
9232
     *                                    </p>
9233
     * @param bool        $clean_utf8     [optional] <p>Remove non UTF-8 chars from the string.</p>
9234
     *
9235
     * @psalm-pure
9236
     *
9237
     * @return string
9238
     *                <p>The stripped string.</p>
9239
     */
9240 4
    public static function strip_tags(
9241
        string $str,
9242
        string $allowable_tags = null,
9243
        bool $clean_utf8 = false
9244
    ): string {
9245 4
        if ($str === '') {
9246 1
            return '';
9247
        }
9248
9249 4
        if ($clean_utf8) {
9250 2
            $str = self::clean($str);
9251
        }
9252
9253 4
        if ($allowable_tags === null) {
9254 4
            return \strip_tags($str);
9255
        }
9256
9257 2
        return \strip_tags($str, $allowable_tags);
9258
    }
9259
9260
    /**
9261
     * Strip all whitespace characters. This includes tabs and newline
9262
     * characters, as well as multibyte whitespace such as the thin space
9263
     * and ideographic space.
9264
     *
9265
     * EXAMPLE: <code>UTF8::strip_whitespace('   Ο     συγγραφέας  '); // 'Οσυγγραφέας'</code>
9266
     *
9267
     * @param string $str
9268
     *
9269
     * @psalm-pure
9270
     *
9271
     * @return string
9272
     */
9273 36
    public static function strip_whitespace(string $str): string
9274
    {
9275 36
        if ($str === '') {
9276 3
            return '';
9277
        }
9278
9279 33
        return (string) \preg_replace('/[[:space:]]+/u', '', $str);
9280
    }
9281
9282
    /**
9283
     * Find the position of the first occurrence of a substring in a string, case-insensitive.
9284
     *
9285
     * INFO: use UTF8::stripos_in_byte() for the byte-length
9286
     *
9287
     * EXAMPLE: <code>UTF8::stripos('aσσb', 'ΣΣ'); // 1</code> (σσ == ΣΣ)
9288
     *
9289
     * @see http://php.net/manual/en/function.mb-stripos.php
9290
     *
9291
     * @param string $haystack   <p>The string from which to get the position of the first occurrence of needle.</p>
9292
     * @param string $needle     <p>The string to find in haystack.</p>
9293
     * @param int    $offset     [optional] <p>The position in haystack to start searching.</p>
9294
     * @param string $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
9295
     * @param bool   $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
9296
     *
9297
     * @psalm-pure
9298
     *
9299
     * @return false|int
9300
     *                   Return the <strong>(int)</strong> numeric position of the first occurrence of needle in the
9301
     *                   haystack string,<br> or <strong>false</strong> if needle is not found
9302
     */
9303 25
    public static function stripos(
9304
        string $haystack,
9305
        string $needle,
9306
        int $offset = 0,
9307
        string $encoding = 'UTF-8',
9308
        bool $clean_utf8 = false
9309
    ) {
9310 25
        if ($haystack === '') {
9311 5
            if (\PHP_VERSION_ID >= 80000 && $needle === '') {
9312 2
                return 0;
9313
            }
9314
9315 5
            return false;
9316
        }
9317
9318 24
        if ($needle === '' && \PHP_VERSION_ID < 80000) {
9319
            return false;
9320
        }
9321
9322 24
        if ($clean_utf8) {
9323
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
9324
            // if invalid characters are found in $haystack before $needle
9325 1
            $haystack = self::clean($haystack);
9326 1
            $needle = self::clean($needle);
9327
        }
9328
9329 24
        if (self::$SUPPORT['mbstring'] === true) {
9330 24
            if ($encoding === 'UTF-8') {
9331 24
                return \mb_stripos($haystack, $needle, $offset);
9332
            }
9333
9334 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
9335
9336 2
            return \mb_stripos($haystack, $needle, $offset, $encoding);
9337
        }
9338
9339 2
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
9340
9341
        if (
9342 2
            $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
9343
            &&
9344 2
            $offset >= 0 // grapheme_stripos() can't handle negative offset
9345
            &&
9346 2
            self::$SUPPORT['intl'] === true
9347
        ) {
9348
            $return_tmp = \grapheme_stripos($haystack, $needle, $offset);
9349
            if ($return_tmp !== false) {
9350
                return $return_tmp;
9351
            }
9352
        }
9353
9354
        //
9355
        // fallback for ascii only
9356
        //
9357
9358 2
        if (ASCII::is_ascii($haystack . $needle)) {
9359 2
            return \stripos($haystack, $needle, $offset);
9360
        }
9361
9362
        //
9363
        // fallback via vanilla php
9364
        //
9365
9366 2
        $haystack = self::strtocasefold($haystack, true, false, $encoding, null, false);
9367 2
        $needle = self::strtocasefold($needle, true, false, $encoding, null, false);
9368
9369 2
        return self::strpos($haystack, $needle, $offset, $encoding);
9370
    }
9371
9372
    /**
9373
     * Returns all of haystack starting from and including the first occurrence of needle to the end.
9374
     *
9375
     * EXAMPLE: <code>
9376
     * $str = 'iñtërnâtiônàlizætiøn';
9377
     * $search = 'NÂT';
9378
     *
9379
     * UTF8::stristr($str, $search)); // 'nâtiônàlizætiøn'
9380
     * UTF8::stristr($str, $search, true)); // 'iñtër'
9381
     * </code>
9382
     *
9383
     * @param string $haystack      <p>The input string. Must be valid UTF-8.</p>
9384
     * @param string $needle        <p>The string to look for. Must be valid UTF-8.</p>
9385
     * @param bool   $before_needle [optional] <p>
9386
     *                              If <b>TRUE</b>, it returns the part of the
9387
     *                              haystack before the first occurrence of the needle (excluding the needle).
9388
     *                              </p>
9389
     * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
9390
     * @param bool   $clean_utf8    [optional] <p>Remove non UTF-8 chars from the string.</p>
9391
     *
9392
     * @psalm-pure
9393
     *
9394
     * @return false|string
9395
     *                      <p>A sub-string,<br>or <strong>false</strong> if needle is not found.</p>
9396
     */
9397 13
    public static function stristr(
9398
        string $haystack,
9399
        string $needle,
9400
        bool $before_needle = false,
9401
        string $encoding = 'UTF-8',
9402
        bool $clean_utf8 = false
9403
    ) {
9404 13
        if ($haystack === '') {
9405 3
            if (\PHP_VERSION_ID >= 80000 && $needle === '') {
9406 2
                return '';
9407
            }
9408
9409 2
            return false;
9410
        }
9411
9412 11
        if ($clean_utf8) {
9413
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
9414
            // if invalid characters are found in $haystack before $needle
9415 1
            $needle = self::clean($needle);
9416 1
            $haystack = self::clean($haystack);
9417
        }
9418
9419 11
        if ($needle === '') {
9420 2
            if (\PHP_VERSION_ID >= 80000) {
9421 2
                return $haystack;
9422
            }
9423
9424
            return false;
9425
        }
9426
9427 10
        if (self::$SUPPORT['mbstring'] === true) {
9428 10
            if ($encoding === 'UTF-8') {
9429 10
                return \mb_stristr($haystack, $needle, $before_needle);
9430
            }
9431
9432 1
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
9433
9434 1
            return \mb_stristr($haystack, $needle, $before_needle, $encoding);
9435
        }
9436
9437
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
9438
9439
        if (
9440
            $encoding !== 'UTF-8'
9441
            &&
9442
            self::$SUPPORT['mbstring'] === false
9443
        ) {
9444
            /**
9445
             * @psalm-suppress ImpureFunctionCall - is is only a warning
9446
             */
9447
            \trigger_error('UTF8::stristr() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
9448
        }
9449
9450
        if (
9451
            $encoding === 'UTF-8' // INFO: "grapheme_stristr()" can't handle other encodings
9452
            &&
9453
            self::$SUPPORT['intl'] === true
9454
        ) {
9455
            $return_tmp = \grapheme_stristr($haystack, $needle, $before_needle);
9456
            if ($return_tmp !== false) {
9457
                return $return_tmp;
9458
            }
9459
        }
9460
9461
        if (ASCII::is_ascii($needle . $haystack)) {
9462
            return \stristr($haystack, $needle, $before_needle);
9463
        }
9464
9465
        \preg_match('/^(.*?)' . \preg_quote($needle, '/') . '/usi', $haystack, $match);
9466
9467
        if (!isset($match[1])) {
9468
            return false;
9469
        }
9470
9471
        if ($before_needle) {
9472
            return $match[1];
9473
        }
9474
9475
        return self::substr($haystack, (int) self::strlen($match[1], $encoding), null, $encoding);
9476
    }
9477
9478
    /**
9479
     * Get the string length, not the byte-length!
9480
     *
9481
     * INFO: use UTF8::strwidth() for the char-length
9482
     *
9483
     * EXAMPLE: <code>UTF8::strlen("Iñtërnâtiôn\xE9àlizætiøn")); // 20</code>
9484
     *
9485
     * @see http://php.net/manual/en/function.mb-strlen.php
9486
     *
9487
     * @param string $str        <p>The string being checked for length.</p>
9488
     * @param string $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
9489
     * @param bool   $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
9490
     *
9491
     * @psalm-pure
9492
     *
9493
     * @return false|int
9494
     *                   <p>
9495
     *                   The number <strong>(int)</strong> of characters in the string $str having character encoding
9496
     *                   $encoding.
9497
     *                   (One multi-byte character counted as +1).
9498
     *                   <br>
9499
     *                   Can return <strong>false</strong>, if e.g. mbstring is not installed and we process invalid
9500
     *                   chars.
9501
     *                   </p>
9502
     */
9503 174
    public static function strlen(
9504
        string $str,
9505
        string $encoding = 'UTF-8',
9506
        bool $clean_utf8 = false
9507
    ) {
9508 174
        if ($str === '') {
9509 25
            return 0;
9510
        }
9511
9512 172
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
9513 12
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
9514
        }
9515
9516 172
        if ($clean_utf8) {
9517
            // "mb_strlen" and "\iconv_strlen" returns wrong length,
9518
            // if invalid characters are found in $str
9519 5
            $str = self::clean($str);
9520
        }
9521
9522
        //
9523
        // fallback via mbstring
9524
        //
9525
9526 172
        if (self::$SUPPORT['mbstring'] === true) {
9527 166
            if ($encoding === 'UTF-8') {
9528
                /** @noinspection PhpUsageOfSilenceOperatorInspection - ignore warnings, it's working anyway */
9529 166
                return @\mb_strlen($str);
9530
            }
9531
9532
            /** @noinspection PhpUsageOfSilenceOperatorInspection - ignore warnings, it's working anyway */
9533 4
            return @\mb_strlen($str, $encoding);
9534
        }
9535
9536
        //
9537
        // fallback for binary || ascii only
9538
        //
9539
9540
        if (
9541 8
            $encoding === 'CP850'
9542
            ||
9543 8
            $encoding === 'ASCII'
9544
        ) {
9545
            return \strlen($str);
9546
        }
9547
9548
        if (
9549 8
            $encoding !== 'UTF-8'
9550
            &&
9551 8
            self::$SUPPORT['mbstring'] === false
9552
            &&
9553 8
            self::$SUPPORT['iconv'] === false
9554
        ) {
9555
            /**
9556
             * @psalm-suppress ImpureFunctionCall - is is only a warning
9557
             */
9558 2
            \trigger_error('UTF8::strlen() without mbstring / iconv cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
9559
        }
9560
9561
        //
9562
        // fallback via iconv
9563
        //
9564
9565 8
        if (self::$SUPPORT['iconv'] === true) {
9566
            $return_tmp = \iconv_strlen($str, $encoding);
9567
            if ($return_tmp !== false) {
9568
                return $return_tmp;
9569
            }
9570
        }
9571
9572
        //
9573
        // fallback via intl
9574
        //
9575
9576
        if (
9577 8
            $encoding === 'UTF-8' // INFO: "grapheme_strlen()" can't handle other encodings
9578
            &&
9579 8
            self::$SUPPORT['intl'] === true
9580
        ) {
9581
            $return_tmp = \grapheme_strlen($str);
9582
            if ($return_tmp !== null) {
9583
                return $return_tmp;
9584
            }
9585
        }
9586
9587
        //
9588
        // fallback for ascii only
9589
        //
9590
9591 8
        if (ASCII::is_ascii($str)) {
9592 4
            return \strlen($str);
9593
        }
9594
9595
        //
9596
        // fallback via vanilla php
9597
        //
9598
9599 8
        \preg_match_all('/./us', $str, $parts);
9600
9601 8
        $return_tmp = \count($parts[0]);
9602 8
        if ($return_tmp === 0) {
9603
            return false;
9604
        }
9605
9606 8
        return $return_tmp;
9607
    }
9608
9609
    /**
9610
     * Get string length in byte.
9611
     *
9612
     * @param string $str
9613
     *
9614
     * @psalm-pure
9615
     *
9616
     * @return int
9617
     */
9618 1
    public static function strlen_in_byte(string $str): int
9619
    {
9620 1
        if ($str === '') {
9621
            return 0;
9622
        }
9623
9624 1
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
9625
            // "mb_" is available if overload is used, so use it ...
9626
            return \mb_strlen($str, 'CP850'); // 8-BIT
9627
        }
9628
9629 1
        return \strlen($str);
9630
    }
9631
9632
    /**
9633
     * Case-insensitive string comparisons using a "natural order" algorithm.
9634
     *
9635
     * INFO: natural order version of UTF8::strcasecmp()
9636
     *
9637
     * EXAMPLES: <code>
9638
     * UTF8::strnatcasecmp('2', '10Hello WORLD 中文空白!'); // -1
9639
     * UTF8::strcasecmp('2Hello world 中文空白!', '10Hello WORLD 中文空白!'); // 1
9640
     *
9641
     * UTF8::strnatcasecmp('10Hello world 中文空白!', '2Hello WORLD 中文空白!'); // 1
9642
     * UTF8::strcasecmp('10Hello world 中文空白!', '2Hello WORLD 中文空白!'); // -1
9643
     * </code>
9644
     *
9645
     * @param string $str1     <p>The first string.</p>
9646
     * @param string $str2     <p>The second string.</p>
9647
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
9648
     *
9649
     * @psalm-pure
9650
     *
9651
     * @return int
9652
     *             <strong>&lt; 0</strong> if str1 is less than str2<br>
9653
     *             <strong>&gt; 0</strong> if str1 is greater than str2<br>
9654
     *             <strong>0</strong> if they are equal
9655
     */
9656 2
    public static function strnatcasecmp(string $str1, string $str2, string $encoding = 'UTF-8'): int
9657
    {
9658 2
        return self::strnatcmp(
9659 2
            self::strtocasefold($str1, true, false, $encoding, null, false),
9660 2
            self::strtocasefold($str2, true, false, $encoding, null, false)
9661
        );
9662
    }
9663
9664
    /**
9665
     * String comparisons using a "natural order" algorithm
9666
     *
9667
     * INFO: natural order version of UTF8::strcmp()
9668
     *
9669
     * EXAMPLES: <code>
9670
     * UTF8::strnatcmp('2Hello world 中文空白!', '10Hello WORLD 中文空白!'); // -1
9671
     * UTF8::strcmp('2Hello world 中文空白!', '10Hello WORLD 中文空白!'); // 1
9672
     *
9673
     * UTF8::strnatcmp('10Hello world 中文空白!', '2Hello WORLD 中文空白!'); // 1
9674
     * UTF8::strcmp('10Hello world 中文空白!', '2Hello WORLD 中文空白!'); // -1
9675
     * </code>
9676
     *
9677
     * @see http://php.net/manual/en/function.strnatcmp.php
9678
     *
9679
     * @param string $str1 <p>The first string.</p>
9680
     * @param string $str2 <p>The second string.</p>
9681
     *
9682
     * @psalm-pure
9683
     *
9684
     * @return int
9685
     *             <strong>&lt; 0</strong> if str1 is less than str2;<br>
9686
     *             <strong>&gt; 0</strong> if str1 is greater than str2;<br>
9687
     *             <strong>0</strong> if they are equal
9688
     */
9689 4
    public static function strnatcmp(string $str1, string $str2): int
9690
    {
9691 4
        if ($str1 === $str2) {
9692 4
            return 0;
9693
        }
9694
9695 4
        return \strnatcmp(
9696 4
            (string) self::strtonatfold($str1),
9697 4
            (string) self::strtonatfold($str2)
9698
        );
9699
    }
9700
9701
    /**
9702
     * Case-insensitive string comparison of the first n characters.
9703
     *
9704
     * EXAMPLE: <code>
9705
     * UTF8::strcasecmp("iñtërnâtiôn\nàlizætiøn321", "iñtërnâtiôn\nàlizætiøn123", 5); // 0
9706
     * </code>
9707
     *
9708
     * @see http://php.net/manual/en/function.strncasecmp.php
9709
     *
9710
     * @param string $str1     <p>The first string.</p>
9711
     * @param string $str2     <p>The second string.</p>
9712
     * @param int    $len      <p>The length of strings to be used in the comparison.</p>
9713
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
9714
     *
9715
     * @psalm-pure
9716
     *
9717
     * @return int
9718
     *             <strong>&lt; 0</strong> if <i>str1</i> is less than <i>str2</i>;<br>
9719
     *             <strong>&gt; 0</strong> if <i>str1</i> is greater than <i>str2</i>;<br>
9720
     *             <strong>0</strong> if they are equal
9721
     */
9722 2
    public static function strncasecmp(
9723
        string $str1,
9724
        string $str2,
9725
        int $len,
9726
        string $encoding = 'UTF-8'
9727
    ): int {
9728 2
        return self::strncmp(
9729 2
            self::strtocasefold($str1, true, false, $encoding, null, false),
9730 2
            self::strtocasefold($str2, true, false, $encoding, null, false),
9731
            $len
9732
        );
9733
    }
9734
9735
    /**
9736
     * String comparison of the first n characters.
9737
     *
9738
     * EXAMPLE: <code>
9739
     * UTF8::strncmp("Iñtërnâtiôn\nàlizætiøn321", "Iñtërnâtiôn\nàlizætiøn123", 5); // 0
9740
     * </code>
9741
     *
9742
     * @see http://php.net/manual/en/function.strncmp.php
9743
     *
9744
     * @param string $str1     <p>The first string.</p>
9745
     * @param string $str2     <p>The second string.</p>
9746
     * @param int    $len      <p>Number of characters to use in the comparison.</p>
9747
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
9748
     *
9749
     * @psalm-pure
9750
     *
9751
     * @return int
9752
     *             <strong>&lt; 0</strong> if <i>str1</i> is less than <i>str2</i>;<br>
9753
     *             <strong>&gt; 0</strong> if <i>str1</i> is greater than <i>str2</i>;<br>
9754
     *             <strong>0</strong> if they are equal
9755
     */
9756 4
    public static function strncmp(
9757
        string $str1,
9758
        string $str2,
9759
        int $len,
9760
        string $encoding = 'UTF-8'
9761
    ): int {
9762 4
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
9763
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
9764
        }
9765
9766 4
        if ($encoding === 'UTF-8') {
9767 4
            $str1 = (string) \mb_substr($str1, 0, $len);
9768 4
            $str2 = (string) \mb_substr($str2, 0, $len);
9769
        } else {
9770
            $str1 = (string) self::substr($str1, 0, $len, $encoding);
9771
            $str2 = (string) self::substr($str2, 0, $len, $encoding);
9772
        }
9773
9774 4
        return self::strcmp($str1, $str2);
9775
    }
9776
9777
    /**
9778
     * Search a string for any of a set of characters.
9779
     *
9780
     * EXAMPLE: <code>UTF8::strpbrk('-中文空白-', '白'); // '白-'</code>
9781
     *
9782
     * @see http://php.net/manual/en/function.strpbrk.php
9783
     *
9784
     * @param string $haystack  <p>The string where char_list is looked for.</p>
9785
     * @param string $char_list <p>This parameter is case-sensitive.</p>
9786
     *
9787
     * @psalm-pure
9788
     *
9789
     * @return false|string
9790
     *                      <p>The string starting from the character found, or false if it is not found.</p>
9791
     */
9792 2
    public static function strpbrk(string $haystack, string $char_list)
9793
    {
9794 2
        if ($haystack === '' || $char_list === '') {
9795 2
            return false;
9796
        }
9797
9798 2
        if (\preg_match('/' . self::rxClass($char_list) . '/us', $haystack, $m)) {
9799 2
            return \substr($haystack, (int) \strpos($haystack, $m[0]));
9800
        }
9801
9802 2
        return false;
9803
    }
9804
9805
    /**
9806
     * Find the position of the first occurrence of a substring in a string.
9807
     *
9808
     * INFO: use UTF8::strpos_in_byte() for the byte-length
9809
     *
9810
     * EXAMPLE: <code>UTF8::strpos('ABC-ÖÄÜ-中文空白-中文空白', '中'); // 8</code>
9811
     *
9812
     * @see http://php.net/manual/en/function.mb-strpos.php
9813
     *
9814
     * @param string     $haystack   <p>The string from which to get the position of the first occurrence of needle.</p>
9815
     * @param int|string $needle     <p>The string to find in haystack.<br>Or a code point as int.</p>
9816
     * @param int        $offset     [optional] <p>The search offset. If it is not specified, 0 is used.</p>
9817
     * @param string     $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
9818
     * @param bool       $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
9819
     *
9820
     * @psalm-pure
9821
     *
9822
     * @return false|int
9823
     *                   The <strong>(int)</strong> numeric position of the first occurrence of needle in the haystack
9824
     *                   string.<br> If needle is not found it returns false.
9825
     */
9826 52
    public static function strpos(
9827
        string $haystack,
9828
        $needle,
9829
        int $offset = 0,
9830
        string $encoding = 'UTF-8',
9831
        bool $clean_utf8 = false
9832
    ) {
9833 52
        if ($haystack === '') {
9834 4
            if (\PHP_VERSION_ID >= 80000) {
9835 4
                if ($needle === '') {
9836 4
                    return 0;
9837
                }
9838
            } else {
9839
                return false;
9840
            }
9841
        }
9842
9843
        // iconv and mbstring do not support integer $needle
9844 52
        if ((int) $needle === $needle) {
9845
            $needle = (string) self::chr($needle);
9846
        }
9847 52
        $needle = (string) $needle;
9848
9849 52
        if ($haystack === '') {
9850 2
            if (\PHP_VERSION_ID >= 80000 && $needle === '') {
9851
                return 0;
9852
            }
9853
9854 2
            return false;
9855
        }
9856
9857 51
        if ($needle === '' && \PHP_VERSION_ID < 80000) {
9858
            return false;
9859
        }
9860
9861 51
        if ($clean_utf8) {
9862
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
9863
            // if invalid characters are found in $haystack before $needle
9864 3
            $needle = self::clean($needle);
9865 3
            $haystack = self::clean($haystack);
9866
        }
9867
9868 51
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
9869 10
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
9870
        }
9871
9872
        //
9873
        // fallback via mbstring
9874
        //
9875
9876 51
        if (self::$SUPPORT['mbstring'] === true) {
9877 49
            if ($encoding === 'UTF-8') {
9878
                /** @noinspection PhpUsageOfSilenceOperatorInspection - Offset not contained in string */
9879 49
                return @\mb_strpos($haystack, $needle, $offset);
9880
            }
9881
9882
            /** @noinspection PhpUsageOfSilenceOperatorInspection - Offset not contained in string */
9883 2
            return @\mb_strpos($haystack, $needle, $offset, $encoding);
9884
        }
9885
9886
        //
9887
        // fallback for binary || ascii only
9888
        //
9889
        if (
9890 4
            $encoding === 'CP850'
9891
            ||
9892 4
            $encoding === 'ASCII'
9893
        ) {
9894 2
            return \strpos($haystack, $needle, $offset);
9895
        }
9896
9897
        if (
9898 4
            $encoding !== 'UTF-8'
9899
            &&
9900 4
            self::$SUPPORT['iconv'] === false
9901
            &&
9902 4
            self::$SUPPORT['mbstring'] === false
9903
        ) {
9904
            /**
9905
             * @psalm-suppress ImpureFunctionCall - is is only a warning
9906
             */
9907 2
            \trigger_error('UTF8::strpos() without mbstring / iconv cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
9908
        }
9909
9910
        //
9911
        // fallback via intl
9912
        //
9913
9914
        if (
9915 4
            $encoding === 'UTF-8' // INFO: "grapheme_strpos()" can't handle other encodings
9916
            &&
9917 4
            $offset >= 0 // grapheme_strpos() can't handle negative offset
9918
            &&
9919 4
            self::$SUPPORT['intl'] === true
9920
        ) {
9921
            $return_tmp = \grapheme_strpos($haystack, $needle, $offset);
9922
            if ($return_tmp !== false) {
9923
                return $return_tmp;
9924
            }
9925
        }
9926
9927
        //
9928
        // fallback via iconv
9929
        //
9930
9931
        if (
9932 4
            $offset >= 0 // iconv_strpos() can't handle negative offset
9933
            &&
9934 4
            self::$SUPPORT['iconv'] === true
9935
        ) {
9936
            // ignore invalid negative offset to keep compatibility
9937
            // with php < 5.5.35, < 5.6.21, < 7.0.6
9938
            $return_tmp = \iconv_strpos($haystack, $needle, $offset > 0 ? $offset : 0, $encoding);
9939
            if ($return_tmp !== false) {
9940
                return $return_tmp;
9941
            }
9942
        }
9943
9944
        //
9945
        // fallback for ascii only
9946
        //
9947
9948 4
        if (ASCII::is_ascii($haystack . $needle)) {
9949
            /** @noinspection PhpUsageOfSilenceOperatorInspection - Offset not contained in string */
9950 2
            return @\strpos($haystack, $needle, $offset);
9951
        }
9952
9953
        //
9954
        // fallback via vanilla php
9955
        //
9956
9957 4
        $haystack_tmp = self::substr($haystack, $offset, null, $encoding);
9958 4
        if ($haystack_tmp === false) {
9959
            $haystack_tmp = '';
9960
        }
9961 4
        $haystack = (string) $haystack_tmp;
9962
9963 4
        if ($offset < 0) {
9964
            $offset = 0;
9965
        }
9966
9967 4
        $pos = \strpos($haystack, $needle);
9968 4
        if ($pos === false) {
9969 3
            return false;
9970
        }
9971
9972 4
        if ($pos) {
9973 4
            return $offset + (int) self::strlen(\substr($haystack, 0, $pos), $encoding);
9974
        }
9975
9976 4
        return $offset + 0;
9977
    }
9978
9979
    /**
9980
     * Find the position of the first occurrence of a substring in a string.
9981
     *
9982
     * @param string $haystack <p>
9983
     *                         The string being checked.
9984
     *                         </p>
9985
     * @param string $needle   <p>
9986
     *                         The position counted from the beginning of haystack.
9987
     *                         </p>
9988
     * @param int    $offset   [optional] <p>
9989
     *                         The search offset. If it is not specified, 0 is used.
9990
     *                         </p>
9991
     *
9992
     * @psalm-pure
9993
     *
9994
     * @return false|int
9995
     *                   <p>The numeric position of the first occurrence of needle in the
9996
     *                   haystack string. If needle is not found, it returns false.</p>
9997
     */
9998 2
    public static function strpos_in_byte(string $haystack, string $needle, int $offset = 0)
9999
    {
10000 2
        if ($haystack === '' || $needle === '') {
10001
            return false;
10002
        }
10003
10004 2
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
10005
            // "mb_" is available if overload is used, so use it ...
10006
            return \mb_strpos($haystack, $needle, $offset, 'CP850'); // 8-BIT
10007
        }
10008
10009 2
        return \strpos($haystack, $needle, $offset);
10010
    }
10011
10012
    /**
10013
     * Find the position of the first occurrence of a substring in a string, case-insensitive.
10014
     *
10015
     * @param string $haystack <p>
10016
     *                         The string being checked.
10017
     *                         </p>
10018
     * @param string $needle   <p>
10019
     *                         The position counted from the beginning of haystack.
10020
     *                         </p>
10021
     * @param int    $offset   [optional] <p>
10022
     *                         The search offset. If it is not specified, 0 is used.
10023
     *                         </p>
10024
     *
10025
     * @psalm-pure
10026
     *
10027
     * @return false|int
10028
     *                   <p>The numeric position of the first occurrence of needle in the
10029
     *                   haystack string. If needle is not found, it returns false.</p>
10030
     */
10031 2
    public static function stripos_in_byte(string $haystack, string $needle, int $offset = 0)
10032
    {
10033 2
        if ($haystack === '' || $needle === '') {
10034
            return false;
10035
        }
10036
10037 2
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
10038
            // "mb_" is available if overload is used, so use it ...
10039
            return \mb_stripos($haystack, $needle, $offset, 'CP850'); // 8-BIT
10040
        }
10041
10042 2
        return \stripos($haystack, $needle, $offset);
10043
    }
10044
10045
    /**
10046
     * Find the last occurrence of a character in a string within another.
10047
     *
10048
     * EXAMPLE: <code>UTF8::strrchr('κόσμεκόσμε-äöü', 'κόσμε'); // 'κόσμε-äöü'</code>
10049
     *
10050
     * @see http://php.net/manual/en/function.mb-strrchr.php
10051
     *
10052
     * @param string $haystack      <p>The string from which to get the last occurrence of needle.</p>
10053
     * @param string $needle        <p>The string to find in haystack</p>
10054
     * @param bool   $before_needle [optional] <p>
10055
     *                              Determines which portion of haystack
10056
     *                              this function returns.
10057
     *                              If set to true, it returns all of haystack
10058
     *                              from the beginning to the last occurrence of needle.
10059
     *                              If set to false, it returns all of haystack
10060
     *                              from the last occurrence of needle to the end,
10061
     *                              </p>
10062
     * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
10063
     * @param bool   $clean_utf8    [optional] <p>Remove non UTF-8 chars from the string.</p>
10064
     *
10065
     * @psalm-pure
10066
     *
10067
     * @return false|string
10068
     *                      <p>The portion of haystack or false if needle is not found.</p>
10069
     */
10070 2
    public static function strrchr(
10071
        string $haystack,
10072
        string $needle,
10073
        bool $before_needle = false,
10074
        string $encoding = 'UTF-8',
10075
        bool $clean_utf8 = false
10076
    ) {
10077 2
        if ($haystack === '' || $needle === '') {
10078 2
            return false;
10079
        }
10080
10081 2
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
10082 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
10083
        }
10084
10085 2
        if ($clean_utf8) {
10086
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
10087
            // if invalid characters are found in $haystack before $needle
10088 2
            $needle = self::clean($needle);
10089 2
            $haystack = self::clean($haystack);
10090
        }
10091
10092
        //
10093
        // fallback via mbstring
10094
        //
10095
10096 2
        if (self::$SUPPORT['mbstring'] === true) {
10097 2
            if ($encoding === 'UTF-8') {
10098 2
                return \mb_strrchr($haystack, $needle, $before_needle);
10099
            }
10100
10101 2
            return \mb_strrchr($haystack, $needle, $before_needle, $encoding);
10102
        }
10103
10104
        //
10105
        // fallback for binary || ascii only
10106
        //
10107
10108
        if (
10109
            !$before_needle
10110
            &&
10111
            (
10112
                $encoding === 'CP850'
10113
                ||
10114
                $encoding === 'ASCII'
10115
            )
10116
        ) {
10117
            return \strrchr($haystack, $needle);
10118
        }
10119
10120
        if (
10121
            $encoding !== 'UTF-8'
10122
            &&
10123
            self::$SUPPORT['mbstring'] === false
10124
        ) {
10125
            /**
10126
             * @psalm-suppress ImpureFunctionCall - is is only a warning
10127
             */
10128
            \trigger_error('UTF8::strrchr() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
10129
        }
10130
10131
        //
10132
        // fallback via iconv
10133
        //
10134
10135
        if (self::$SUPPORT['iconv'] === true) {
10136
            $needle_tmp = self::substr($needle, 0, 1, $encoding);
10137
            if ($needle_tmp === false) {
10138
                return false;
10139
            }
10140
            $needle = $needle_tmp;
10141
10142
            $pos = \iconv_strrpos($haystack, $needle, $encoding);
10143
            if ($pos === false) {
10144
                return false;
10145
            }
10146
10147
            if ($before_needle) {
10148
                return self::substr($haystack, 0, $pos, $encoding);
10149
            }
10150
10151
            return self::substr($haystack, $pos, null, $encoding);
10152
        }
10153
10154
        //
10155
        // fallback via vanilla php
10156
        //
10157
10158
        $needle_tmp = self::substr($needle, 0, 1, $encoding);
10159
        if ($needle_tmp === false) {
10160
            return false;
10161
        }
10162
        $needle = $needle_tmp;
10163
10164
        $pos = self::strrpos($haystack, $needle, 0, $encoding);
10165
        if ($pos === false) {
10166
            return false;
10167
        }
10168
10169
        if ($before_needle) {
10170
            return self::substr($haystack, 0, $pos, $encoding);
10171
        }
10172
10173
        return self::substr($haystack, $pos, null, $encoding);
10174
    }
10175
10176
    /**
10177
     * Reverses characters order in the string.
10178
     *
10179
     * EXAMPLE: <code>UTF8::strrev('κ-öäü'); // 'üäö-κ'</code>
10180
     *
10181
     * @param string $str      <p>The input string.</p>
10182
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
10183
     *
10184
     * @psalm-pure
10185
     *
10186
     * @return string
10187
     *                <p>The string with characters in the reverse sequence.</p>
10188
     */
10189 10
    public static function strrev(string $str, string $encoding = 'UTF-8'): string
10190
    {
10191 10
        if ($str === '') {
10192 4
            return '';
10193
        }
10194
10195
        // init
10196 8
        $reversed = '';
10197
10198 8
        $str = self::emoji_encode($str, true);
10199
10200 8
        if ($encoding === 'UTF-8') {
10201 8
            if (self::$SUPPORT['intl'] === true) {
10202
                // try "grapheme" first: https://stackoverflow.com/questions/17496493/strrev-dosent-support-utf-8
10203 8
                $i = (int) \grapheme_strlen($str);
10204 8
                while ($i--) {
10205 8
                    $reversed_tmp = \grapheme_substr($str, $i, 1);
10206 8
                    if ($reversed_tmp !== false) {
10207 8
                        $reversed .= $reversed_tmp;
10208
                    }
10209
                }
10210
            } else {
10211
                $i = (int) \mb_strlen($str);
10212 8
                while ($i--) {
10213
                    $reversed_tmp = \mb_substr($str, $i, 1);
10214
                    if ($reversed_tmp !== false) {
10215
                        $reversed .= $reversed_tmp;
10216
                    }
10217
                }
10218
            }
10219
        } else {
10220
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
10221
10222
            $i = (int) self::strlen($str, $encoding);
10223
            while ($i--) {
10224
                $reversed_tmp = self::substr($str, $i, 1, $encoding);
10225
                if ($reversed_tmp !== false) {
10226
                    $reversed .= $reversed_tmp;
10227
                }
10228
            }
10229
        }
10230
10231 8
        return self::emoji_decode($reversed, true);
10232
    }
10233
10234
    /**
10235
     * Find the last occurrence of a character in a string within another, case-insensitive.
10236
     *
10237
     * EXAMPLE: <code>UTF8::strrichr('Aκόσμεκόσμε-äöü', 'aκόσμε'); // 'Aκόσμεκόσμε-äöü'</code>
10238
     *
10239
     * @see http://php.net/manual/en/function.mb-strrichr.php
10240
     *
10241
     * @param string $haystack      <p>The string from which to get the last occurrence of needle.</p>
10242
     * @param string $needle        <p>The string to find in haystack.</p>
10243
     * @param bool   $before_needle [optional] <p>
10244
     *                              Determines which portion of haystack
10245
     *                              this function returns.
10246
     *                              If set to true, it returns all of haystack
10247
     *                              from the beginning to the last occurrence of needle.
10248
     *                              If set to false, it returns all of haystack
10249
     *                              from the last occurrence of needle to the end,
10250
     *                              </p>
10251
     * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
10252
     * @param bool   $clean_utf8    [optional] <p>Remove non UTF-8 chars from the string.</p>
10253
     *
10254
     * @psalm-pure
10255
     *
10256
     * @return false|string
10257
     *                      <p>The portion of haystack or<br>false if needle is not found.</p>
10258
     */
10259 3
    public static function strrichr(
10260
        string $haystack,
10261
        string $needle,
10262
        bool $before_needle = false,
10263
        string $encoding = 'UTF-8',
10264
        bool $clean_utf8 = false
10265
    ) {
10266 3
        if ($haystack === '' || $needle === '') {
10267 2
            return false;
10268
        }
10269
10270 3
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
10271 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
10272
        }
10273
10274 3
        if ($clean_utf8) {
10275
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
10276
            // if invalid characters are found in $haystack before $needle
10277 2
            $needle = self::clean($needle);
10278 2
            $haystack = self::clean($haystack);
10279
        }
10280
10281
        //
10282
        // fallback via mbstring
10283
        //
10284
10285 3
        if (self::$SUPPORT['mbstring'] === true) {
10286 3
            if ($encoding === 'UTF-8') {
10287 3
                return \mb_strrichr($haystack, $needle, $before_needle);
10288
            }
10289
10290 2
            return \mb_strrichr($haystack, $needle, $before_needle, $encoding);
10291
        }
10292
10293
        //
10294
        // fallback via vanilla php
10295
        //
10296
10297
        $needle_tmp = self::substr($needle, 0, 1, $encoding);
10298
        if ($needle_tmp === false) {
10299
            return false;
10300
        }
10301
        $needle = $needle_tmp;
10302
10303
        $pos = self::strripos($haystack, $needle, 0, $encoding);
10304
        if ($pos === false) {
10305
            return false;
10306
        }
10307
10308
        if ($before_needle) {
10309
            return self::substr($haystack, 0, $pos, $encoding);
10310
        }
10311
10312
        return self::substr($haystack, $pos, null, $encoding);
10313
    }
10314
10315
    /**
10316
     * Find the position of the last occurrence of a substring in a string, case-insensitive.
10317
     *
10318
     * EXAMPLE: <code>UTF8::strripos('ABC-ÖÄÜ-中文空白-中文空白', '中'); // 13</code>
10319
     *
10320
     * @param string     $haystack   <p>The string to look in.</p>
10321
     * @param int|string $needle     <p>The string to look for.</p>
10322
     * @param int        $offset     [optional] <p>Number of characters to ignore in the beginning or end.</p>
10323
     * @param string     $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
10324
     * @param bool       $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
10325
     *
10326
     * @psalm-pure
10327
     *
10328
     * @return false|int
10329
     *                   <p>The <strong>(int)</strong> numeric position of the last occurrence of needle in the haystack
10330
     *                   string.<br>If needle is not found, it returns false.</p>
10331
     */
10332 14
    public static function strripos(
10333
        string $haystack,
10334
        $needle,
10335
        int $offset = 0,
10336
        string $encoding = 'UTF-8',
10337
        bool $clean_utf8 = false
10338
    ) {
10339 14
        if ($haystack === '') {
10340 3
            if (\PHP_VERSION_ID >= 80000) {
10341 3
                if ($needle === '') {
10342 3
                    return 0;
10343
                }
10344
            } else {
10345
                return false;
10346
            }
10347
        }
10348
10349
        // iconv and mbstring do not support integer $needle
10350 14
        if ((int) $needle === $needle && $needle >= 0) {
10351
            $needle = (string) self::chr($needle);
10352
        }
10353 14
        $needle = (string) $needle;
10354
10355 14
        if ($haystack === '') {
10356 1
            if (\PHP_VERSION_ID >= 80000 && $needle === '') {
10357
                return 0;
10358
            }
10359
10360 1
            return false;
10361
        }
10362
10363 14
        if ($needle === '' && \PHP_VERSION_ID < 80000) {
10364
            return false;
10365
        }
10366
10367 14
        if ($clean_utf8) {
10368
            // mb_strripos() && iconv_strripos() is not tolerant to invalid characters
10369 3
            $needle = self::clean($needle);
10370 3
            $haystack = self::clean($haystack);
10371
        }
10372
10373 14
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
10374 9
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
10375
        }
10376
10377
        //
10378
        // fallback via mbstrig
10379
        //
10380
10381 14
        if (self::$SUPPORT['mbstring'] === true) {
10382 14
            if ($encoding === 'UTF-8') {
10383 14
                return \mb_strripos($haystack, $needle, $offset);
10384
            }
10385
10386
            return \mb_strripos($haystack, $needle, $offset, $encoding);
10387
        }
10388
10389
        //
10390
        // fallback for binary || ascii only
10391
        //
10392
10393
        if (
10394
            $encoding === 'CP850'
10395
            ||
10396
            $encoding === 'ASCII'
10397
        ) {
10398
            return \strripos($haystack, $needle, $offset);
10399
        }
10400
10401
        if (
10402
            $encoding !== 'UTF-8'
10403
            &&
10404
            self::$SUPPORT['mbstring'] === false
10405
        ) {
10406
            /**
10407
             * @psalm-suppress ImpureFunctionCall - is is only a warning
10408
             */
10409
            \trigger_error('UTF8::strripos() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
10410
        }
10411
10412
        //
10413
        // fallback via intl
10414
        //
10415
10416
        if (
10417
            $encoding === 'UTF-8' // INFO: "grapheme_strripos()" can't handle other encodings
10418
            &&
10419
            $offset >= 0 // grapheme_strripos() can't handle negative offset
10420
            &&
10421
            self::$SUPPORT['intl'] === true
10422
        ) {
10423
            $return_tmp = \grapheme_strripos($haystack, $needle, $offset);
10424
            if ($return_tmp !== false) {
10425
                return $return_tmp;
10426
            }
10427
        }
10428
10429
        //
10430
        // fallback for ascii only
10431
        //
10432
10433
        if (ASCII::is_ascii($haystack . $needle)) {
10434
            return \strripos($haystack, $needle, $offset);
10435
        }
10436
10437
        //
10438
        // fallback via vanilla php
10439
        //
10440
10441
        $haystack = self::strtocasefold($haystack, true, false, $encoding);
10442
        $needle = self::strtocasefold($needle, true, false, $encoding);
10443
10444
        return self::strrpos($haystack, $needle, $offset, $encoding, $clean_utf8);
10445
    }
10446
10447
    /**
10448
     * Finds position of last occurrence of a string within another, case-insensitive.
10449
     *
10450
     * @param string $haystack <p>
10451
     *                         The string from which to get the position of the last occurrence
10452
     *                         of needle.
10453
     *                         </p>
10454
     * @param string $needle   <p>
10455
     *                         The string to find in haystack.
10456
     *                         </p>
10457
     * @param int    $offset   [optional] <p>
10458
     *                         The position in haystack
10459
     *                         to start searching.
10460
     *                         </p>
10461
     *
10462
     * @psalm-pure
10463
     *
10464
     * @return false|int
10465
     *                   <p>eturn the numeric position of the last occurrence of needle in the
10466
     *                   haystack string, or false if needle is not found.</p>
10467
     */
10468 2
    public static function strripos_in_byte(string $haystack, string $needle, int $offset = 0)
10469
    {
10470 2
        if ($haystack === '' || $needle === '') {
10471
            return false;
10472
        }
10473
10474 2
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
10475
            // "mb_" is available if overload is used, so use it ...
10476
            return \mb_strripos($haystack, $needle, $offset, 'CP850'); // 8-BIT
10477
        }
10478
10479 2
        return \strripos($haystack, $needle, $offset);
10480
    }
10481
10482
    /**
10483
     * Find the position of the last occurrence of a substring in a string.
10484
     *
10485
     * EXAMPLE: <code>UTF8::strrpos('ABC-ÖÄÜ-中文空白-中文空白', '中'); // 13</code>
10486
     *
10487
     * @see http://php.net/manual/en/function.mb-strrpos.php
10488
     *
10489
     * @param string     $haystack   <p>The string being checked, for the last occurrence of needle</p>
10490
     * @param int|string $needle     <p>The string to find in haystack.<br>Or a code point as int.</p>
10491
     * @param int        $offset     [optional] <p>May be specified to begin searching an arbitrary number of characters
10492
     *                               into the string. Negative values will stop searching at an arbitrary point prior to
10493
     *                               the end of the string.
10494
     *                               </p>
10495
     * @param string     $encoding   [optional] <p>Set the charset.</p>
10496
     * @param bool       $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
10497
     *
10498
     * @psalm-pure
10499
     *
10500
     * @return false|int
10501
     *                   <p>The <strong>(int)</strong> numeric position of the last occurrence of needle in the haystack
10502
     *                   string.<br>If needle is not found, it returns false.</p>
10503
     */
10504 35
    public static function strrpos(
10505
        string $haystack,
10506
        $needle,
10507
        int $offset = 0,
10508
        string $encoding = 'UTF-8',
10509
        bool $clean_utf8 = false
10510
    ) {
10511 35
        if ($haystack === '') {
10512 4
            if (\PHP_VERSION_ID >= 80000) {
10513 4
                if ($needle === '') {
10514 4
                    return 0;
10515
                }
10516
            } else {
10517
                return false;
10518
            }
10519
        }
10520
10521
        // iconv and mbstring do not support integer $needle
10522 35
        if ((int) $needle === $needle && $needle >= 0) {
10523 1
            $needle = (string) self::chr($needle);
10524
        }
10525 35
        $needle = (string) $needle;
10526
10527 35
        if ($haystack === '') {
10528 2
            if (\PHP_VERSION_ID >= 80000 && $needle === '') {
10529
                return 0;
10530
            }
10531
10532 2
            return false;
10533
        }
10534
10535 34
        if ($needle === '' && \PHP_VERSION_ID < 80000) {
10536
            return false;
10537
        }
10538
10539 34
        if ($clean_utf8) {
10540
            // mb_strrpos && iconv_strrpos is not tolerant to invalid characters
10541 4
            $needle = self::clean($needle);
10542 4
            $haystack = self::clean($haystack);
10543
        }
10544
10545 34
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
10546 8
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
10547
        }
10548
10549
        //
10550
        // fallback via mbstring
10551
        //
10552
10553 34
        if (self::$SUPPORT['mbstring'] === true) {
10554 34
            if ($encoding === 'UTF-8') {
10555 34
                return \mb_strrpos($haystack, $needle, $offset);
10556
            }
10557
10558 2
            return \mb_strrpos($haystack, $needle, $offset, $encoding);
10559
        }
10560
10561
        //
10562
        // fallback for binary || ascii only
10563
        //
10564
10565
        if (
10566
            $encoding === 'CP850'
10567
            ||
10568
            $encoding === 'ASCII'
10569
        ) {
10570
            return \strrpos($haystack, $needle, $offset);
10571
        }
10572
10573
        if (
10574
            $encoding !== 'UTF-8'
10575
            &&
10576
            self::$SUPPORT['mbstring'] === false
10577
        ) {
10578
            /**
10579
             * @psalm-suppress ImpureFunctionCall - is is only a warning
10580
             */
10581
            \trigger_error('UTF8::strrpos() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
10582
        }
10583
10584
        //
10585
        // fallback via intl
10586
        //
10587
10588
        if (
10589
            $offset >= 0 // grapheme_strrpos() can't handle negative offset
10590
            &&
10591
            $encoding === 'UTF-8' // INFO: "grapheme_strrpos()" can't handle other encodings
10592
            &&
10593
            self::$SUPPORT['intl'] === true
10594
        ) {
10595
            $return_tmp = \grapheme_strrpos($haystack, $needle, $offset);
10596
            if ($return_tmp !== false) {
10597
                return $return_tmp;
10598
            }
10599
        }
10600
10601
        //
10602
        // fallback for ascii only
10603
        //
10604
10605
        if (ASCII::is_ascii($haystack . $needle)) {
10606
            return \strrpos($haystack, $needle, $offset);
10607
        }
10608
10609
        //
10610
        // fallback via vanilla php
10611
        //
10612
10613
        $haystack_tmp = null;
10614
        if ($offset > 0) {
10615
            $haystack_tmp = self::substr($haystack, $offset);
10616
        } elseif ($offset < 0) {
10617
            $haystack_tmp = self::substr($haystack, 0, $offset);
10618
            $offset = 0;
10619
        }
10620
10621
        if ($haystack_tmp !== null) {
10622
            if ($haystack_tmp === false) {
10623
                $haystack_tmp = '';
10624
            }
10625
            $haystack = (string) $haystack_tmp;
10626
        }
10627
10628
        $pos = \strrpos($haystack, $needle);
10629
        if ($pos === false) {
10630
            return false;
10631
        }
10632
10633
        /** @var false|string $str_tmp - needed for PhpStan (stubs error) */
10634
        $str_tmp = \substr($haystack, 0, $pos);
10635
        if ($str_tmp === false) {
10636
            return false;
10637
        }
10638
10639
        return $offset + (int) self::strlen($str_tmp);
10640
    }
10641
10642
    /**
10643
     * Find the position of the last occurrence of a substring in a string.
10644
     *
10645
     * @param string $haystack <p>
10646
     *                         The string being checked, for the last occurrence
10647
     *                         of needle.
10648
     *                         </p>
10649
     * @param string $needle   <p>
10650
     *                         The string to find in haystack.
10651
     *                         </p>
10652
     * @param int    $offset   [optional] <p>May be specified to begin searching an arbitrary number of characters into
10653
     *                         the string. Negative values will stop searching at an arbitrary point
10654
     *                         prior to the end of the string.
10655
     *                         </p>
10656
     *
10657
     * @psalm-pure
10658
     *
10659
     * @return false|int
10660
     *                   <p>The numeric position of the last occurrence of needle in the
10661
     *                   haystack string. If needle is not found, it returns false.</p>
10662
     */
10663 2
    public static function strrpos_in_byte(string $haystack, string $needle, int $offset = 0)
10664
    {
10665 2
        if ($haystack === '' || $needle === '') {
10666
            return false;
10667
        }
10668
10669 2
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
10670
            // "mb_" is available if overload is used, so use it ...
10671
            return \mb_strrpos($haystack, $needle, $offset, 'CP850'); // 8-BIT
10672
        }
10673
10674 2
        return \strrpos($haystack, $needle, $offset);
10675
    }
10676
10677
    /**
10678
     * Finds the length of the initial segment of a string consisting entirely of characters contained within a given
10679
     * mask.
10680
     *
10681
     * EXAMPLE: <code>UTF8::strspn('iñtërnâtiônàlizætiøn', 'itñ'); // '3'</code>
10682
     *
10683
     * @param string   $str      <p>The input string.</p>
10684
     * @param string   $mask     <p>The mask of chars</p>
10685
     * @param int      $offset   [optional]
10686
     * @param int|null $length   [optional]
10687
     * @param string   $encoding [optional] <p>Set the charset.</p>
10688
     *
10689
     * @psalm-pure
10690
     *
10691
     * @return false|int
10692
     */
10693 10
    public static function strspn(
10694
        string $str,
10695
        string $mask,
10696
        int $offset = 0,
10697
        int $length = null,
10698
        string $encoding = 'UTF-8'
10699
    ) {
10700 10
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
10701
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
10702
        }
10703
10704 10
        if ($offset || $length !== null) {
10705 2
            if ($encoding === 'UTF-8') {
10706 2
                if ($length === null) {
10707
                    $str = (string) \mb_substr($str, $offset);
10708
                } else {
10709 2
                    $str = (string) \mb_substr($str, $offset, $length);
10710
                }
10711
            } else {
10712
                $str = (string) self::substr($str, $offset, $length, $encoding);
10713
            }
10714
        }
10715
10716 10
        if ($str === '' || $mask === '') {
10717 2
            return 0;
10718
        }
10719
10720 8
        $matches = [];
10721
10722 8
        return \preg_match('/^' . self::rxClass($mask) . '+/u', $str, $matches) ? (int) self::strlen($matches[0], $encoding) : 0;
10723
    }
10724
10725
    /**
10726
     * Returns part of haystack string from the first occurrence of needle to the end of haystack.
10727
     *
10728
     * EXAMPLE: <code>
10729
     * $str = 'iñtërnâtiônàlizætiøn';
10730
     * $search = 'nât';
10731
     *
10732
     * UTF8::strstr($str, $search)); // 'nâtiônàlizætiøn'
10733
     * UTF8::strstr($str, $search, true)); // 'iñtër'
10734
     * </code>
10735
     *
10736
     * @param string $haystack      <p>The input string. Must be valid UTF-8.</p>
10737
     * @param string $needle        <p>The string to look for. Must be valid UTF-8.</p>
10738
     * @param bool   $before_needle [optional] <p>
10739
     *                              If <b>TRUE</b>, strstr() returns the part of the
10740
     *                              haystack before the first occurrence of the needle (excluding the needle).
10741
     *                              </p>
10742
     * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
10743
     * @param bool   $clean_utf8    [optional] <p>Remove non UTF-8 chars from the string.</p>
10744
     *
10745
     * @psalm-pure
10746
     *
10747
     * @return false|string
10748
     *                      <p>A sub-string,<br>or <strong>false</strong> if needle is not found.</p>
10749
     */
10750 3
    public static function strstr(
10751
        string $haystack,
10752
        string $needle,
10753
        bool $before_needle = false,
10754
        string $encoding = 'UTF-8',
10755
        bool $clean_utf8 = false
10756
    ) {
10757 3
        if ($haystack === '') {
10758 2
            if (\PHP_VERSION_ID >= 80000 && $needle === '') {
10759 1
                return '';
10760
            }
10761
10762 2
            return false;
10763
        }
10764
10765 3
        if ($clean_utf8) {
10766
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
10767
            // if invalid characters are found in $haystack before $needle
10768
            $needle = self::clean($needle);
10769
            $haystack = self::clean($haystack);
10770
        }
10771
10772 3
        if ($needle === '') {
10773 1
            if (\PHP_VERSION_ID >= 80000) {
10774 1
                return $haystack;
10775
            }
10776
10777
            return false;
10778
        }
10779
10780 3
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
10781 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
10782
        }
10783
10784
        //
10785
        // fallback via mbstring
10786
        //
10787
10788 3
        if (self::$SUPPORT['mbstring'] === true) {
10789 3
            if ($encoding === 'UTF-8') {
10790 3
                return \mb_strstr($haystack, $needle, $before_needle);
10791
            }
10792
10793 2
            return \mb_strstr($haystack, $needle, $before_needle, $encoding);
10794
        }
10795
10796
        //
10797
        // fallback for binary || ascii only
10798
        //
10799
10800
        if (
10801
            $encoding === 'CP850'
10802
            ||
10803
            $encoding === 'ASCII'
10804
        ) {
10805
            return \strstr($haystack, $needle, $before_needle);
10806
        }
10807
10808
        if (
10809
            $encoding !== 'UTF-8'
10810
            &&
10811
            self::$SUPPORT['mbstring'] === false
10812
        ) {
10813
            /**
10814
             * @psalm-suppress ImpureFunctionCall - is is only a warning
10815
             */
10816
            \trigger_error('UTF8::strstr() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
10817
        }
10818
10819
        //
10820
        // fallback via intl
10821
        //
10822
10823
        if (
10824
            $encoding === 'UTF-8' // INFO: "grapheme_strstr()" can't handle other encodings
10825
            &&
10826
            self::$SUPPORT['intl'] === true
10827
        ) {
10828
            $return_tmp = \grapheme_strstr($haystack, $needle, $before_needle);
10829
            if ($return_tmp !== false) {
10830
                return $return_tmp;
10831
            }
10832
        }
10833
10834
        //
10835
        // fallback for ascii only
10836
        //
10837
10838
        if (ASCII::is_ascii($haystack . $needle)) {
10839
            return \strstr($haystack, $needle, $before_needle);
10840
        }
10841
10842
        //
10843
        // fallback via vanilla php
10844
        //
10845
10846
        \preg_match('/^(.*?)' . \preg_quote($needle, '/') . '/us', $haystack, $match);
10847
10848
        if (!isset($match[1])) {
10849
            return false;
10850
        }
10851
10852
        if ($before_needle) {
10853
            return $match[1];
10854
        }
10855
10856
        return self::substr($haystack, (int) self::strlen($match[1]));
10857
    }
10858
10859
    /**
10860
     * Finds first occurrence of a string within another.
10861
     *
10862
     * @param string $haystack      <p>
10863
     *                              The string from which to get the first occurrence
10864
     *                              of needle.
10865
     *                              </p>
10866
     * @param string $needle        <p>
10867
     *                              The string to find in haystack.
10868
     *                              </p>
10869
     * @param bool   $before_needle [optional] <p>
10870
     *                              Determines which portion of haystack
10871
     *                              this function returns.
10872
     *                              If set to true, it returns all of haystack
10873
     *                              from the beginning to the first occurrence of needle.
10874
     *                              If set to false, it returns all of haystack
10875
     *                              from the first occurrence of needle to the end,
10876
     *                              </p>
10877
     *
10878
     * @psalm-pure
10879
     *
10880
     * @return false|string
10881
     *                      <p>The portion of haystack,
10882
     *                      or false if needle is not found.</p>
10883
     */
10884 2
    public static function strstr_in_byte(
10885
        string $haystack,
10886
        string $needle,
10887
        bool $before_needle = false
10888
    ) {
10889 2
        if ($haystack === '' || $needle === '') {
10890
            return false;
10891
        }
10892
10893 2
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
10894
            // "mb_" is available if overload is used, so use it ...
10895
            return \mb_strstr($haystack, $needle, $before_needle, 'CP850'); // 8-BIT
10896
        }
10897
10898 2
        return \strstr($haystack, $needle, $before_needle);
10899
    }
10900
10901
    /**
10902
     * Unicode transformation for case-less matching.
10903
     *
10904
     * EXAMPLE: <code>UTF8::strtocasefold('ǰ◌̱'); // 'ǰ◌̱'</code>
10905
     *
10906
     * @see http://unicode.org/reports/tr21/tr21-5.html
10907
     *
10908
     * @param string      $str        <p>The input string.</p>
10909
     * @param bool        $full       [optional] <p>
10910
     *                                <b>true</b>, replace full case folding chars (default)<br>
10911
     *                                <b>false</b>, use only limited static array [UTF8::$COMMON_CASE_FOLD]
10912
     *                                </p>
10913
     * @param bool        $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
10914
     * @param string      $encoding   [optional] <p>Set the charset.</p>
10915
     * @param string|null $lang       [optional] <p>Set the language for special cases: az, el, lt, tr</p>
10916
     * @param bool        $lower      [optional] <p>Use lowercase string, otherwise use uppercase string. PS: uppercase
10917
     *                                is for some languages better ...</p>
10918
     *
10919
     * @psalm-pure
10920
     *
10921
     * @return string
10922
     */
10923 32
    public static function strtocasefold(
10924
        string $str,
10925
        bool $full = true,
10926
        bool $clean_utf8 = false,
10927
        string $encoding = 'UTF-8',
10928
        string $lang = null,
10929
        bool $lower = true
10930
    ): string {
10931 32
        if ($str === '') {
10932 7
            return '';
10933
        }
10934
10935 31
        if ($clean_utf8) {
10936
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
10937
            // if invalid characters are found in $haystack before $needle
10938 2
            $str = self::clean($str);
10939
        }
10940
10941 31
        $str = self::fixStrCaseHelper($str, $lower, $full);
10942
10943 31
        if ($lang === null && $encoding === 'UTF-8') {
10944 31
            if ($lower) {
10945 2
                return \mb_strtolower($str);
10946
            }
10947
10948 29
            return \mb_strtoupper($str);
10949
        }
10950
10951 2
        if ($lower) {
10952
            return self::strtolower($str, $encoding, false, $lang);
10953
        }
10954
10955 2
        return self::strtoupper($str, $encoding, false, $lang);
10956
    }
10957
10958
    /**
10959
     * Make a string lowercase.
10960
     *
10961
     * EXAMPLE: <code>UTF8::strtolower('DÉJÀ Σσς Iıİi'); // 'déjà σσς iıii'</code>
10962
     *
10963
     * @see http://php.net/manual/en/function.mb-strtolower.php
10964
     *
10965
     * @param string      $str                           <p>The string being lowercased.</p>
10966
     * @param string      $encoding                      [optional] <p>Set the charset for e.g. "mb_" function</p>
10967
     * @param bool        $clean_utf8                    [optional] <p>Remove non UTF-8 chars from the string.</p>
10968
     * @param string|null $lang                          [optional] <p>Set the language for special cases: az, el, lt,
10969
     *                                                   tr</p>
10970
     * @param bool        $try_to_keep_the_string_length [optional] <p>true === try to keep the string length: e.g. ẞ
10971
     *                                                   -> ß</p>
10972
     *
10973
     * @psalm-pure
10974
     *
10975
     * @return string
10976
     *                <p>String with all alphabetic characters converted to lowercase.</p>
10977
     */
10978 73
    public static function strtolower(
10979
        $str,
10980
        string $encoding = 'UTF-8',
10981
        bool $clean_utf8 = false,
10982
        string $lang = null,
10983
        bool $try_to_keep_the_string_length = false
10984
    ): string {
10985
        // init
10986 73
        $str = (string) $str;
10987
10988 73
        if ($str === '') {
10989 1
            return '';
10990
        }
10991
10992 72
        if ($clean_utf8) {
10993
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
10994
            // if invalid characters are found in $haystack before $needle
10995 2
            $str = self::clean($str);
10996
        }
10997
10998
        // hack for old php version or for the polyfill ...
10999 72
        if ($try_to_keep_the_string_length) {
11000
            $str = self::fixStrCaseHelper($str, true);
11001
        }
11002
11003 72
        if ($lang === null && $encoding === 'UTF-8') {
11004 13
            return \mb_strtolower($str);
11005
        }
11006
11007 61
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
11008
11009 61
        if ($lang !== null) {
11010 2
            if (self::$SUPPORT['intl'] === true) {
11011 2
                if (self::$INTL_TRANSLITERATOR_LIST === null) {
11012
                    self::$INTL_TRANSLITERATOR_LIST = self::getData('transliterator_list');
11013
                }
11014
11015 2
                $language_code = $lang . '-Lower';
11016 2
                if (!\in_array($language_code, self::$INTL_TRANSLITERATOR_LIST, true)) {
11017
                    /**
11018
                     * @psalm-suppress ImpureFunctionCall - is is only a warning
11019
                     */
11020
                    \trigger_error('UTF8::strtolower() cannot handle special language: ' . $lang . ' | supported: ' . \print_r(self::$INTL_TRANSLITERATOR_LIST, true), \E_USER_WARNING);
0 ignored issues
show
Bug introduced by
Are you sure print_r(self::INTL_TRANSLITERATOR_LIST, true) of type string|true can be used in concatenation? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

11020
                    \trigger_error('UTF8::strtolower() cannot handle special language: ' . $lang . ' | supported: ' . /** @scrutinizer ignore-type */ \print_r(self::$INTL_TRANSLITERATOR_LIST, true), \E_USER_WARNING);
Loading history...
11021
11022
                    $language_code = 'Any-Lower';
11023
                }
11024
11025 2
                return (string) \transliterator_transliterate($language_code, $str);
11026
            }
11027
11028
            /**
11029
             * @psalm-suppress ImpureFunctionCall - is is only a warning
11030
             */
11031
            \trigger_error('UTF8::strtolower() without intl cannot handle the "lang" parameter: ' . $lang, \E_USER_WARNING);
11032
        }
11033
11034
        // always fallback via symfony polyfill
11035 61
        return \mb_strtolower($str, $encoding);
11036
    }
11037
11038
    /**
11039
     * Make a string uppercase.
11040
     *
11041
     * EXAMPLE: <code>UTF8::strtoupper('Déjà Σσς Iıİi'); // 'DÉJÀ ΣΣΣ IIİI'</code>
11042
     *
11043
     * @see http://php.net/manual/en/function.mb-strtoupper.php
11044
     *
11045
     * @param string      $str                           <p>The string being uppercased.</p>
11046
     * @param string      $encoding                      [optional] <p>Set the charset.</p>
11047
     * @param bool        $clean_utf8                    [optional] <p>Remove non UTF-8 chars from the string.</p>
11048
     * @param string|null $lang                          [optional] <p>Set the language for special cases: az, el, lt,
11049
     *                                                   tr</p>
11050
     * @param bool        $try_to_keep_the_string_length [optional] <p>true === try to keep the string length: e.g. ẞ
11051
     *                                                   -> ß</p>
11052
     *
11053
     * @psalm-pure
11054
     *
11055
     * @return string
11056
     *                <p>String with all alphabetic characters converted to uppercase.</p>
11057
     */
11058 17
    public static function strtoupper(
11059
        $str,
11060
        string $encoding = 'UTF-8',
11061
        bool $clean_utf8 = false,
11062
        string $lang = null,
11063
        bool $try_to_keep_the_string_length = false
11064
    ): string {
11065
        // init
11066 17
        $str = (string) $str;
11067
11068 17
        if ($str === '') {
11069 1
            return '';
11070
        }
11071
11072 16
        if ($clean_utf8) {
11073
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
11074
            // if invalid characters are found in $haystack before $needle
11075 2
            $str = self::clean($str);
11076
        }
11077
11078
        // hack for old php version or for the polyfill ...
11079 16
        if ($try_to_keep_the_string_length) {
11080 2
            $str = self::fixStrCaseHelper($str);
11081
        }
11082
11083 16
        if ($lang === null && $encoding === 'UTF-8') {
11084 8
            return \mb_strtoupper($str);
11085
        }
11086
11087 10
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
11088
11089 10
        if ($lang !== null) {
11090 2
            if (self::$SUPPORT['intl'] === true) {
11091 2
                if (self::$INTL_TRANSLITERATOR_LIST === null) {
11092
                    self::$INTL_TRANSLITERATOR_LIST = self::getData('transliterator_list');
11093
                }
11094
11095 2
                $language_code = $lang . '-Upper';
11096 2
                if (!\in_array($language_code, self::$INTL_TRANSLITERATOR_LIST, true)) {
11097
                    /**
11098
                     * @psalm-suppress ImpureFunctionCall - is is only a warning
11099
                     */
11100
                    \trigger_error('UTF8::strtoupper() without intl for special language: ' . $lang, \E_USER_WARNING);
11101
11102
                    $language_code = 'Any-Upper';
11103
                }
11104
11105 2
                return (string) \transliterator_transliterate($language_code, $str);
11106
            }
11107
11108
            /**
11109
             * @psalm-suppress ImpureFunctionCall - is is only a warning
11110
             */
11111
            \trigger_error('UTF8::strtolower() without intl cannot handle the "lang"-parameter: ' . $lang, \E_USER_WARNING);
11112
        }
11113
11114
        // always fallback via symfony polyfill
11115 10
        return \mb_strtoupper($str, $encoding);
11116
    }
11117
11118
    /**
11119
     * Translate characters or replace sub-strings.
11120
     *
11121
     * EXAMPLE:
11122
     * <code>
11123
     * $array = [
11124
     *     'Hello'   => '○●◎',
11125
     *     '中文空白' => 'earth',
11126
     * ];
11127
     * UTF8::strtr('Hello 中文空白', $array); // '○●◎ earth'
11128
     * </code>
11129
     *
11130
     * @see http://php.net/manual/en/function.strtr.php
11131
     *
11132
     * @param string          $str  <p>The string being translated.</p>
11133
     * @param string|string[] $from <p>The string replacing from.</p>
11134
     * @param string|string[] $to   [optional] <p>The string being translated to to.</p>
11135
     *
11136
     * @psalm-pure
11137
     *
11138
     * @return string
11139
     *                <p>This function returns a copy of str, translating all occurrences of each character in "from"
11140
     *                to the corresponding character in "to".</p>
11141
     */
11142 2
    public static function strtr(string $str, $from, $to = ''): string
11143
    {
11144 2
        if ($str === '') {
11145
            return '';
11146
        }
11147
11148 2
        if ($from === $to) {
11149
            return $str;
11150
        }
11151
11152 2
        if ($to !== '') {
11153 2
            if (!\is_array($from)) {
11154 2
                $from = self::str_split($from);
11155
            }
11156
11157 2
            if (!\is_array($to)) {
11158 2
                $to = self::str_split($to);
11159
            }
11160
11161 2
            $count_from = \count($from);
11162 2
            $count_to = \count($to);
11163
11164 2
            if ($count_from !== $count_to) {
11165 2
                if ($count_from > $count_to) {
11166 2
                    $from = \array_slice($from, 0, $count_to);
11167 2
                } elseif ($count_from < $count_to) {
11168 2
                    $to = \array_slice($to, 0, $count_from);
11169
                }
11170
            }
11171
11172 2
            $from = \array_combine($from, $to);
11173 2
            if ($from === false) {
11174
                throw new \InvalidArgumentException('The number of elements for each array isn\'t equal or the arrays are empty: (from: ' . \print_r($from, true) . ' | to: ' . \print_r($to, true) . ')');
0 ignored issues
show
Bug introduced by
Are you sure print_r($to, true) of type string|true can be used in concatenation? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

11174
                throw new \InvalidArgumentException('The number of elements for each array isn\'t equal or the arrays are empty: (from: ' . \print_r($from, true) . ' | to: ' . /** @scrutinizer ignore-type */ \print_r($to, true) . ')');
Loading history...
Bug introduced by
Are you sure print_r($from, true) of type string|true can be used in concatenation? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

11174
                throw new \InvalidArgumentException('The number of elements for each array isn\'t equal or the arrays are empty: (from: ' . /** @scrutinizer ignore-type */ \print_r($from, true) . ' | to: ' . \print_r($to, true) . ')');
Loading history...
11175
            }
11176
        }
11177
11178 2
        if (\is_string($from)) {
11179 2
            return \str_replace($from, $to, $str);
0 ignored issues
show
Bug introduced by
It seems like $to can also be of type array<mixed,string[]>; however, parameter $replace of str_replace() does only seem to accept string|string[], maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

11179
            return \str_replace($from, /** @scrutinizer ignore-type */ $to, $str);
Loading history...
11180
        }
11181
11182 2
        return \strtr($str, $from);
11183
    }
11184
11185
    /**
11186
     * Return the width of a string.
11187
     *
11188
     * INFO: use UTF8::strlen() for the byte-length
11189
     *
11190
     * EXAMPLE: <code>UTF8::strwidth("Iñtërnâtiôn\xE9àlizætiøn")); // 21</code>
11191
     *
11192
     * @param string $str        <p>The input string.</p>
11193
     * @param string $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
11194
     * @param bool   $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
11195
     *
11196
     * @psalm-pure
11197
     *
11198
     * @return int
11199
     */
11200 2
    public static function strwidth(
11201
        string $str,
11202
        string $encoding = 'UTF-8',
11203
        bool $clean_utf8 = false
11204
    ): int {
11205 2
        if ($str === '') {
11206 2
            return 0;
11207
        }
11208
11209 2
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
11210 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
11211
        }
11212
11213 2
        if ($clean_utf8) {
11214
            // iconv and mbstring are not tolerant to invalid encoding
11215
            // further, their behaviour is inconsistent with that of PHP's substr
11216 2
            $str = self::clean($str);
11217
        }
11218
11219
        //
11220
        // fallback via mbstring
11221
        //
11222
11223 2
        if (self::$SUPPORT['mbstring'] === true) {
11224 2
            if ($encoding === 'UTF-8') {
11225 2
                return \mb_strwidth($str);
11226
            }
11227
11228
            return \mb_strwidth($str, $encoding);
11229
        }
11230
11231
        //
11232
        // fallback via vanilla php
11233
        //
11234
11235
        if ($encoding !== 'UTF-8') {
11236
            $str = self::encode('UTF-8', $str, false, $encoding);
11237
        }
11238
11239
        $wide = 0;
11240
        $str = (string) \preg_replace('/[\x{1100}-\x{115F}\x{2329}\x{232A}\x{2E80}-\x{303E}\x{3040}-\x{A4CF}\x{AC00}-\x{D7A3}\x{F900}-\x{FAFF}\x{FE10}-\x{FE19}\x{FE30}-\x{FE6F}\x{FF00}-\x{FF60}\x{FFE0}-\x{FFE6}\x{20000}-\x{2FFFD}\x{30000}-\x{3FFFD}]/u', '', $str, -1, $wide);
11241
11242
        return ($wide << 1) + (int) self::strlen($str);
11243
    }
11244
11245
    /**
11246
     * Get part of a string.
11247
     *
11248
     * EXAMPLE: <code>UTF8::substr('中文空白', 1, 2); // '文空'</code>
11249
     *
11250
     * @see http://php.net/manual/en/function.mb-substr.php
11251
     *
11252
     * @param string   $str        <p>The string being checked.</p>
11253
     * @param int      $offset     <p>The first position used in str.</p>
11254
     * @param int|null $length     [optional] <p>The maximum length of the returned string.</p>
11255
     * @param string   $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
11256
     * @param bool     $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
11257
     *
11258
     * @psalm-pure
11259
     *
11260
     * @return false|string
11261
     *                      The portion of <i>str</i> specified by the <i>offset</i> and
11262
     *                      <i>length</i> parameters.</p><p>If <i>str</i> is shorter than <i>offset</i>
11263
     *                      characters long, <b>FALSE</b> will be returned.
11264
     */
11265 172
    public static function substr(
11266
        string $str,
11267
        int $offset = 0,
11268
        int $length = null,
11269
        string $encoding = 'UTF-8',
11270
        bool $clean_utf8 = false
11271
    ) {
11272
        // empty string
11273 172
        if ($str === '' || $length === 0) {
11274 8
            return '';
11275
        }
11276
11277 168
        if ($clean_utf8) {
11278
            // iconv and mbstring are not tolerant to invalid encoding
11279
            // further, their behaviour is inconsistent with that of PHP's substr
11280 2
            $str = self::clean($str);
11281
        }
11282
11283
        // whole string
11284 168
        if (!$offset && $length === null) {
11285 7
            return $str;
11286
        }
11287
11288 163
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
11289 19
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
11290
        }
11291
11292
        //
11293
        // fallback via mbstring
11294
        //
11295
11296 163
        if (self::$SUPPORT['mbstring'] === true && $encoding === 'UTF-8') {
11297 161
            if ($length === null) {
11298 64
                return \mb_substr($str, $offset);
11299
            }
11300
11301 102
            return \mb_substr($str, $offset, $length);
11302
        }
11303
11304
        //
11305
        // fallback for binary || ascii only
11306
        //
11307
11308
        if (
11309 4
            $encoding === 'CP850'
11310
            ||
11311 4
            $encoding === 'ASCII'
11312
        ) {
11313
            if ($length === null) {
11314
                return \substr($str, $offset);
11315
            }
11316
11317
            return \substr($str, $offset, $length);
11318
        }
11319
11320
        // otherwise we need the string-length
11321 4
        $str_length = 0;
11322 4
        if ($offset || $length === null) {
11323 4
            $str_length = self::strlen($str, $encoding);
11324
        }
11325
11326
        // e.g.: invalid chars + mbstring not installed
11327 4
        if ($str_length === false) {
11328
            return false;
11329
        }
11330
11331
        // empty string
11332 4
        if ($offset === $str_length && !$length) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $length of type integer|null is loosely compared to false; this is ambiguous if the integer can be 0. You might want to explicitly use === null instead.

In PHP, under loose comparison (like ==, or !=, or switch conditions), values of different types might be equal.

For integer values, zero is a special case, in particular the following results might be unexpected:

0   == false // true
0   == null  // true
123 == false // false
123 == null  // false

// It is often better to use strict comparison
0 === false // false
0 === null  // false
Loading history...
11333
            return '';
11334
        }
11335
11336
        // impossible
11337 4
        if ($offset && $offset > $str_length) {
11338
            return '';
11339
        }
11340
11341 4
        $length = $length ?? $str_length;
11342
11343
        if (
11344 4
            $encoding !== 'UTF-8'
11345
            &&
11346 4
            self::$SUPPORT['mbstring'] === false
11347
        ) {
11348
            /**
11349
             * @psalm-suppress ImpureFunctionCall - is is only a warning
11350
             */
11351 2
            \trigger_error('UTF8::substr() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
11352
        }
11353
11354
        //
11355
        // fallback via intl
11356
        //
11357
11358
        if (
11359 4
            $encoding === 'UTF-8' // INFO: "grapheme_substr()" can't handle other encodings
11360
            &&
11361 4
            $offset >= 0 // grapheme_substr() can't handle negative offset
11362
            &&
11363 4
            self::$SUPPORT['intl'] === true
11364
        ) {
11365
            $return_tmp = \grapheme_substr($str, $offset, $length);
11366
            if ($return_tmp !== false) {
11367
                return $return_tmp;
11368
            }
11369
        }
11370
11371
        //
11372
        // fallback via iconv
11373
        //
11374
11375
        if (
11376 4
            $length >= 0 // "iconv_substr()" can't handle negative length
11377
            &&
11378 4
            self::$SUPPORT['iconv'] === true
11379
        ) {
11380
            $return_tmp = \iconv_substr($str, $offset, $length);
11381
            if ($return_tmp !== false) {
11382
                return $return_tmp;
11383
            }
11384
        }
11385
11386
        //
11387
        // fallback for ascii only
11388
        //
11389
11390 4
        if (ASCII::is_ascii($str)) {
11391
            return \substr($str, $offset, $length);
11392
        }
11393
11394
        //
11395
        // fallback via vanilla php
11396
        //
11397
11398
        // split to array, and remove invalid characters
11399
        // &&
11400
        // extract relevant part, and join to make sting again
11401 4
        return \implode('', \array_slice(self::str_split($str), $offset, $length));
11402
    }
11403
11404
    /**
11405
     * Binary-safe comparison of two strings from an offset, up to a length of characters.
11406
     *
11407
     * EXAMPLE: <code>
11408
     * UTF8::substr_compare("○●◎\r", '●◎', 0, 2); // -1
11409
     * UTF8::substr_compare("○●◎\r", '◎●', 1, 2); // 1
11410
     * UTF8::substr_compare("○●◎\r", '●◎', 1, 2); // 0
11411
     * </code>
11412
     *
11413
     * @param string   $str1               <p>The main string being compared.</p>
11414
     * @param string   $str2               <p>The secondary string being compared.</p>
11415
     * @param int      $offset             [optional] <p>The start position for the comparison. If negative, it starts
11416
     *                                     counting from the end of the string.</p>
11417
     * @param int|null $length             [optional] <p>The length of the comparison. The default value is the largest
11418
     *                                     of the length of the str compared to the length of main_str less the
11419
     *                                     offset.</p>
11420
     * @param bool     $case_insensitivity [optional] <p>If case_insensitivity is TRUE, comparison is case
11421
     *                                     insensitive.</p>
11422
     * @param string   $encoding           [optional] <p>Set the charset for e.g. "mb_" function</p>
11423
     *
11424
     * @psalm-pure
11425
     *
11426
     * @return int
11427
     *             <strong>&lt; 0</strong> if str1 is less than str2;<br>
11428
     *             <strong>&gt; 0</strong> if str1 is greater than str2,<br>
11429
     *             <strong>0</strong> if they are equal
11430
     */
11431 2
    public static function substr_compare(
11432
        string $str1,
11433
        string $str2,
11434
        int $offset = 0,
11435
        int $length = null,
11436
        bool $case_insensitivity = false,
11437
        string $encoding = 'UTF-8'
11438
    ): int {
11439
        if (
11440 2
            $offset !== 0
11441
            ||
11442 2
            $length !== null
11443
        ) {
11444 2
            if ($encoding === 'UTF-8') {
11445 2
                if ($length === null) {
11446 2
                    $str1 = (string) \mb_substr($str1, $offset);
11447
                } else {
11448 2
                    $str1 = (string) \mb_substr($str1, $offset, $length);
11449
                }
11450 2
                $str2 = (string) \mb_substr($str2, 0, (int) self::strlen($str1));
11451
            } else {
11452
                $encoding = self::normalize_encoding($encoding, 'UTF-8');
11453
11454
                $str1 = (string) self::substr($str1, $offset, $length, $encoding);
11455
                $str2 = (string) self::substr($str2, 0, (int) self::strlen($str1), $encoding);
11456
            }
11457
        }
11458
11459 2
        if ($case_insensitivity) {
11460 2
            return self::strcasecmp($str1, $str2, $encoding);
11461
        }
11462
11463 2
        return self::strcmp($str1, $str2);
11464
    }
11465
11466
    /**
11467
     * Count the number of substring occurrences.
11468
     *
11469
     * EXAMPLE: <code>UTF8::substr_count('中文空白', '文空', 1, 2); // 1</code>
11470
     *
11471
     * @see http://php.net/manual/en/function.substr-count.php
11472
     *
11473
     * @param string   $haystack   <p>The string to search in.</p>
11474
     * @param string   $needle     <p>The substring to search for.</p>
11475
     * @param int      $offset     [optional] <p>The offset where to start counting.</p>
11476
     * @param int|null $length     [optional] <p>
11477
     *                             The maximum length after the specified offset to search for the
11478
     *                             substring. It outputs a warning if the offset plus the length is
11479
     *                             greater than the haystack length.
11480
     *                             </p>
11481
     * @param string   $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
11482
     * @param bool     $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
11483
     *
11484
     * @psalm-pure
11485
     *
11486
     * @return false|int
11487
     *                   <p>This functions returns an integer or false if there isn't a string.</p>
11488
     */
11489 5
    public static function substr_count(
11490
        string $haystack,
11491
        string $needle,
11492
        int $offset = 0,
11493
        int $length = null,
11494
        string $encoding = 'UTF-8',
11495
        bool $clean_utf8 = false
11496
    ) {
11497 5
        if ($needle === '') {
11498 2
            return false;
11499
        }
11500
11501 5
        if ($haystack === '') {
11502 2
            if (\PHP_VERSION_ID >= 80000) {
11503 2
                return 0;
11504
            }
11505
11506
            return 0;
11507
        }
11508
11509 5
        if ($length === 0) {
11510 2
            return 0;
11511
        }
11512
11513 5
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
11514 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
11515
        }
11516
11517 5
        if ($clean_utf8) {
11518
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
11519
            // if invalid characters are found in $haystack before $needle
11520
            $needle = self::clean($needle);
11521
            $haystack = self::clean($haystack);
11522
        }
11523
11524 5
        if ($offset || $length > 0) {
11525 2
            if ($length === null) {
11526 2
                $length_tmp = self::strlen($haystack, $encoding);
11527 2
                if ($length_tmp === false) {
11528
                    return false;
11529
                }
11530 2
                $length = $length_tmp;
11531
            }
11532
11533 2
            if ($encoding === 'UTF-8') {
11534 2
                $haystack = (string) \mb_substr($haystack, $offset, $length);
11535
            } else {
11536 2
                $haystack = (string) \mb_substr($haystack, $offset, $length, $encoding);
11537
            }
11538
        }
11539
11540
        if (
11541 5
            $encoding !== 'UTF-8'
11542
            &&
11543 5
            self::$SUPPORT['mbstring'] === false
11544
        ) {
11545
            /**
11546
             * @psalm-suppress ImpureFunctionCall - is is only a warning
11547
             */
11548
            \trigger_error('UTF8::substr_count() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
11549
        }
11550
11551 5
        if (self::$SUPPORT['mbstring'] === true) {
11552 5
            if ($encoding === 'UTF-8') {
11553 5
                return \mb_substr_count($haystack, $needle);
11554
            }
11555
11556 2
            return \mb_substr_count($haystack, $needle, $encoding);
11557
        }
11558
11559
        \preg_match_all('/' . \preg_quote($needle, '/') . '/us', $haystack, $matches, \PREG_SET_ORDER);
11560
11561
        return \count($matches);
11562
    }
11563
11564
    /**
11565
     * Count the number of substring occurrences.
11566
     *
11567
     * @param string   $haystack <p>
11568
     *                           The string being checked.
11569
     *                           </p>
11570
     * @param string   $needle   <p>
11571
     *                           The string being found.
11572
     *                           </p>
11573
     * @param int      $offset   [optional] <p>
11574
     *                           The offset where to start counting
11575
     *                           </p>
11576
     * @param int|null $length   [optional] <p>
11577
     *                           The maximum length after the specified offset to search for the
11578
     *                           substring. It outputs a warning if the offset plus the length is
11579
     *                           greater than the haystack length.
11580
     *                           </p>
11581
     *
11582
     * @psalm-pure
11583
     *
11584
     * @return false|int
11585
     *                   <p>The number of times the
11586
     *                   needle substring occurs in the
11587
     *                   haystack string.</p>
11588
     */
11589 4
    public static function substr_count_in_byte(
11590
        string $haystack,
11591
        string $needle,
11592
        int $offset = 0,
11593
        int $length = null
11594
    ) {
11595 4
        if ($haystack === '' || $needle === '') {
11596 1
            return 0;
11597
        }
11598
11599
        if (
11600 3
            ($offset || $length !== null)
11601
            &&
11602 3
            self::$SUPPORT['mbstring_func_overload'] === true
11603
        ) {
11604
            if ($length === null) {
11605
                $length_tmp = self::strlen($haystack);
11606
                if ($length_tmp === false) {
11607
                    return false;
11608
                }
11609
                $length = $length_tmp;
11610
            }
11611
11612
            if (
11613
                (
11614
                    $length !== 0
11615
                    &&
11616
                    $offset !== 0
11617
                )
11618
                &&
11619
                ($length + $offset) <= 0
11620
                &&
11621
                \PHP_VERSION_ID < 71000 // output from "substr_count()" have changed in PHP 7.1
11622
            ) {
11623
                return false;
11624
            }
11625
11626
            /** @var false|string $haystack_tmp - needed for PhpStan (stubs error) */
11627
            $haystack_tmp = \substr($haystack, $offset, $length);
11628
            if ($haystack_tmp === false) {
11629
                $haystack_tmp = '';
11630
            }
11631
            $haystack = (string) $haystack_tmp;
11632
        }
11633
11634 3
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
11635
            // "mb_" is available if overload is used, so use it ...
11636
            return \mb_substr_count($haystack, $needle, 'CP850'); // 8-BIT
11637
        }
11638
11639 3
        if ($length === null) {
11640 3
            return \substr_count($haystack, $needle, $offset);
11641
        }
11642
11643
        return \substr_count($haystack, $needle, $offset, $length);
11644
    }
11645
11646
    /**
11647
     * Returns the number of occurrences of $substring in the given string.
11648
     * By default, the comparison is case-sensitive, but can be made insensitive
11649
     * by setting $case_sensitive to false.
11650
     *
11651
     * @param string $str            <p>The input string.</p>
11652
     * @param string $substring      <p>The substring to search for.</p>
11653
     * @param bool   $case_sensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
11654
     * @param string $encoding       [optional] <p>Set the charset for e.g. "mb_" function</p>
11655
     *
11656
     * @psalm-pure
11657
     *
11658
     * @return int
11659
     */
11660 15
    public static function substr_count_simple(
11661
        string $str,
11662
        string $substring,
11663
        bool $case_sensitive = true,
11664
        string $encoding = 'UTF-8'
11665
    ): int {
11666 15
        if ($str === '' || $substring === '') {
11667 2
            return 0;
11668
        }
11669
11670 13
        if ($encoding === 'UTF-8') {
11671 7
            if ($case_sensitive) {
11672
                return (int) \mb_substr_count($str, $substring);
11673
            }
11674
11675 7
            return (int) \mb_substr_count(
11676 7
                \mb_strtoupper($str),
11677 7
                \mb_strtoupper($substring)
11678
            );
11679
        }
11680
11681 6
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
11682
11683 6
        if ($case_sensitive) {
11684 3
            return (int) \mb_substr_count($str, $substring, $encoding);
11685
        }
11686
11687 3
        return (int) \mb_substr_count(
11688 3
            self::strtocasefold($str, true, false, $encoding, null, false),
11689 3
            self::strtocasefold($substring, true, false, $encoding, null, false),
11690 3
            $encoding
11691
        );
11692
    }
11693
11694
    /**
11695
     * Removes a prefix ($needle) from the beginning of the string ($haystack), case-insensitive.
11696
     *
11697
     * EXMAPLE: <code>
11698
     * UTF8::substr_ileft('ΚόσμεMiddleEnd', 'Κόσμε'); // 'MiddleEnd'
11699
     * UTF8::substr_ileft('ΚόσμεMiddleEnd', 'κόσμε'); // 'MiddleEnd'
11700
     * </code>
11701
     *
11702
     * @param string $haystack <p>The string to search in.</p>
11703
     * @param string $needle   <p>The substring to search for.</p>
11704
     *
11705
     * @psalm-pure
11706
     *
11707
     * @return string
11708
     *                <p>Return the sub-string.</p>
11709
     */
11710 2
    public static function substr_ileft(string $haystack, string $needle): string
11711
    {
11712 2
        if ($haystack === '') {
11713 2
            return '';
11714
        }
11715
11716 2
        if ($needle === '') {
11717 2
            return $haystack;
11718
        }
11719
11720 2
        if (self::str_istarts_with($haystack, $needle)) {
11721 2
            $haystack = (string) \mb_substr($haystack, (int) self::strlen($needle));
11722
        }
11723
11724 2
        return $haystack;
11725
    }
11726
11727
    /**
11728
     * Get part of a string process in bytes.
11729
     *
11730
     * @param string   $str    <p>The string being checked.</p>
11731
     * @param int      $offset <p>The first position used in str.</p>
11732
     * @param int|null $length [optional] <p>The maximum length of the returned string.</p>
11733
     *
11734
     * @psalm-pure
11735
     *
11736
     * @return false|string
11737
     *                      The portion of <i>str</i> specified by the <i>offset</i> and
11738
     *                      <i>length</i> parameters.</p><p>If <i>str</i> is shorter than <i>offset</i>
11739
     *                      characters long, <b>FALSE</b> will be returned.
11740
     */
11741 1
    public static function substr_in_byte(string $str, int $offset = 0, int $length = null)
11742
    {
11743
        // empty string
11744 1
        if ($str === '' || $length === 0) {
11745
            return '';
11746
        }
11747
11748
        // whole string
11749 1
        if (!$offset && $length === null) {
11750
            return $str;
11751
        }
11752
11753 1
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
11754
            // "mb_" is available if overload is used, so use it ...
11755
            return \mb_substr($str, $offset, $length, 'CP850'); // 8-BIT
11756
        }
11757
11758 1
        return \substr($str, $offset, $length ?? 2147483647);
11759
    }
11760
11761
    /**
11762
     * Removes a suffix ($needle) from the end of the string ($haystack), case-insensitive.
11763
     *
11764
     * EXAMPLE: <code>
11765
     * UTF8::substr_iright('BeginMiddleΚόσμε', 'Κόσμε'); // 'BeginMiddle'
11766
     * UTF8::substr_iright('BeginMiddleΚόσμε', 'κόσμε'); // 'BeginMiddle'
11767
     * </code>
11768
     *
11769
     * @param string $haystack <p>The string to search in.</p>
11770
     * @param string $needle   <p>The substring to search for.</p>
11771
     *
11772
     * @psalm-pure
11773
     *
11774
     * @return string
11775
     *                <p>Return the sub-string.<p>
11776
     */
11777 2
    public static function substr_iright(string $haystack, string $needle): string
11778
    {
11779 2
        if ($haystack === '') {
11780 2
            return '';
11781
        }
11782
11783 2
        if ($needle === '') {
11784 2
            return $haystack;
11785
        }
11786
11787 2
        if (self::str_iends_with($haystack, $needle)) {
11788 2
            $haystack = (string) \mb_substr($haystack, 0, (int) self::strlen($haystack) - (int) self::strlen($needle));
11789
        }
11790
11791 2
        return $haystack;
11792
    }
11793
11794
    /**
11795
     * Removes a prefix ($needle) from the beginning of the string ($haystack).
11796
     *
11797
     * EXAMPLE: <code>
11798
     * UTF8::substr_left('ΚόσμεMiddleEnd', 'Κόσμε'); // 'MiddleEnd'
11799
     * UTF8::substr_left('ΚόσμεMiddleEnd', 'κόσμε'); // 'ΚόσμεMiddleEnd'
11800
     * </code>
11801
     *
11802
     * @param string $haystack <p>The string to search in.</p>
11803
     * @param string $needle   <p>The substring to search for.</p>
11804
     *
11805
     * @psalm-pure
11806
     *
11807
     * @return string
11808
     *                <p>Return the sub-string.</p>
11809
     */
11810 2
    public static function substr_left(string $haystack, string $needle): string
11811
    {
11812 2
        if ($haystack === '') {
11813 2
            return '';
11814
        }
11815
11816 2
        if ($needle === '') {
11817 2
            return $haystack;
11818
        }
11819
11820 2
        if (self::str_starts_with($haystack, $needle)) {
11821 2
            $haystack = (string) \mb_substr($haystack, (int) self::strlen($needle));
11822
        }
11823
11824 2
        return $haystack;
11825
    }
11826
11827
    /**
11828
     * Replace text within a portion of a string.
11829
     *
11830
     * EXAMPLE: <code>UTF8::substr_replace(array('Iñtërnâtiônàlizætiøn', 'foo'), 'æ', 1); // array('Iæñtërnâtiônàlizætiøn', 'fæoo')</code>
11831
     *
11832
     * source: https://gist.github.com/stemar/8287074
11833
     *
11834
     * @param string|string[] $str         <p>The input string or an array of stings.</p>
11835
     * @param string|string[] $replacement <p>The replacement string or an array of stings.</p>
11836
     * @param int|int[]       $offset      <p>
11837
     *                                     If start is positive, the replacing will begin at the start'th offset
11838
     *                                     into string.
11839
     *                                     <br><br>
11840
     *                                     If start is negative, the replacing will begin at the start'th character
11841
     *                                     from the end of string.
11842
     *                                     </p>
11843
     * @param int|int[]|null  $length      [optional] <p>If given and is positive, it represents the length of the
11844
     *                                     portion of string which is to be replaced. If it is negative, it
11845
     *                                     represents the number of characters from the end of string at which to
11846
     *                                     stop replacing. If it is not given, then it will default to strlen(
11847
     *                                     string ); i.e. end the replacing at the end of string. Of course, if
11848
     *                                     length is zero then this function will have the effect of inserting
11849
     *                                     replacement into string at the given start offset.</p>
11850
     * @param string          $encoding    [optional] <p>Set the charset for e.g. "mb_" function</p>
11851
     *
11852
     * @psalm-pure
11853
     *
11854
     * @return string|string[]
11855
     *                         <p>The result string is returned. If string is an array then array is returned.</p>
11856
     *
11857
     * @template TSubstrReplace
11858
     * @phpstan-param TSubstrReplace $str
11859
     * @phpstan-return TSubstrReplace
11860
     */
11861 10
    public static function substr_replace(
11862
        $str,
11863
        $replacement,
11864
        $offset,
11865
        $length = null,
11866
        string $encoding = 'UTF-8'
11867
    ) {
11868 10
        if (\is_array($str)) {
11869 1
            $num = \count($str);
11870
11871
            // the replacement
11872 1
            if (\is_array($replacement)) {
11873 1
                $replacement = \array_slice($replacement, 0, $num);
11874
            } else {
11875 1
                $replacement = \array_pad([$replacement], $num, $replacement);
11876
            }
11877
11878
            // the offset
11879 1
            if (\is_array($offset)) {
11880 1
                $offset = \array_slice($offset, 0, $num);
11881 1
                foreach ($offset as &$value_tmp) {
11882 1
                    $value_tmp = (int) $value_tmp === $value_tmp ? $value_tmp : 0;
11883
                }
11884 1
                unset($value_tmp);
11885
            } else {
11886 1
                $offset = \array_pad([$offset], $num, $offset);
11887
            }
11888
11889
            // the length
11890 1
            if ($length === null) {
11891 1
                $length = \array_fill(0, $num, 0);
11892 1
            } elseif (\is_array($length)) {
11893 1
                $length = \array_slice($length, 0, $num);
11894 1
                foreach ($length as &$value_tmp_V2) {
11895 1
                    $value_tmp_V2 = (int) $value_tmp_V2 === $value_tmp_V2 ? $value_tmp_V2 : $num;
11896
                }
11897 1
                unset($value_tmp_V2);
11898
            } else {
11899 1
                $length = \array_pad([$length], $num, $length);
11900
            }
11901
11902
            // recursive call
11903
            /** @phpstan-ignore-next-line - phpstan currently can't handle recursive calls */
11904 1
            return \array_map([self::class, 'substr_replace'], $str, $replacement, $offset, $length);
11905
        }
11906
11907 10
        if (\is_array($replacement)) {
11908 1
            if ($replacement !== []) {
11909 1
                $replacement = $replacement[0];
11910
            } else {
11911 1
                $replacement = '';
11912
            }
11913
        }
11914
11915
        // init
11916 10
        $str = (string) $str;
11917 10
        $replacement = (string) $replacement;
11918
11919 10
        if (\is_array($length)) {
11920
            throw new \InvalidArgumentException('Parameter "$length" can only be an array, if "$str" is also an array.');
11921
        }
11922
11923 10
        if (\is_array($offset)) {
11924
            throw new \InvalidArgumentException('Parameter "$offset" can only be an array, if "$str" is also an array.');
11925
        }
11926
11927 10
        if ($str === '') {
11928 5
            return $replacement;
11929
        }
11930
11931 9
        if (self::$SUPPORT['mbstring'] === true) {
11932 9
            $string_length = (int) self::strlen($str, $encoding);
11933
11934 9
            if ($offset < 0) {
11935 1
                $offset = (int) \max(0, $string_length + $offset);
11936 9
            } elseif ($offset > $string_length) {
11937 1
                $offset = $string_length;
11938
            }
11939
11940 9
            if ($length !== null && $length < 0) {
11941 1
                $length = (int) \max(0, $string_length - $offset + $length);
11942 9
            } elseif ($length === null || $length > $string_length) {
11943 4
                $length = $string_length;
11944
            }
11945
11946 9
            if (($offset + $length) > $string_length) {
11947 4
                $length = $string_length - $offset;
11948
            }
11949
11950 9
            return ((string) \mb_substr($str, 0, $offset, $encoding)) .
11951 9
                   $replacement .
11952 9
                   ((string) \mb_substr($str, $offset + $length, $string_length - $offset - $length, $encoding));
11953
        }
11954
11955
        //
11956
        // fallback for ascii only
11957
        //
11958
11959
        if (ASCII::is_ascii($str)) {
11960
            return ($length === null) ?
11961
                \substr_replace($str, $replacement, $offset) :
11962
                \substr_replace($str, $replacement, $offset, $length);
11963
        }
11964
11965
        //
11966
        // fallback via vanilla php
11967
        //
11968
11969
        \preg_match_all('/./us', $str, $str_matches);
11970
        \preg_match_all('/./us', $replacement, $replacement_matches);
11971
11972
        if ($length === null) {
11973
            $length_tmp = self::strlen($str, $encoding);
11974
            if ($length_tmp === false) {
11975
                // e.g.: non mbstring support + invalid chars
11976
                return '';
11977
            }
11978
            $length = $length_tmp;
11979
        }
11980
11981
        \array_splice($str_matches[0], $offset, $length, $replacement_matches[0]);
11982
11983
        return \implode('', $str_matches[0]);
11984
    }
11985
11986
    /**
11987
     * Removes a suffix ($needle) from the end of the string ($haystack).
11988
     *
11989
     * EXAMPLE: <code>
11990
     * UTF8::substr_right('BeginMiddleΚόσμε', 'Κόσμε'); // 'BeginMiddle'
11991
     * UTF8::substr_right('BeginMiddleΚόσμε', 'κόσμε'); // 'BeginMiddleΚόσμε'
11992
     * </code>
11993
     *
11994
     * @param string $haystack <p>The string to search in.</p>
11995
     * @param string $needle   <p>The substring to search for.</p>
11996
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
11997
     *
11998
     * @psalm-pure
11999
     *
12000
     * @return string
12001
     *                <p>Return the sub-string.</p>
12002
     */
12003 2
    public static function substr_right(
12004
        string $haystack,
12005
        string $needle,
12006
        string $encoding = 'UTF-8'
12007
    ): string {
12008 2
        if ($haystack === '') {
12009 2
            return '';
12010
        }
12011
12012 2
        if ($needle === '') {
12013 2
            return $haystack;
12014
        }
12015
12016
        if (
12017 2
            $encoding === 'UTF-8'
12018
            &&
12019 2
            \substr($haystack, -\strlen($needle)) === $needle
12020
        ) {
12021 2
            return (string) \mb_substr($haystack, 0, (int) \mb_strlen($haystack) - (int) \mb_strlen($needle));
12022
        }
12023
12024 2
        if (\substr($haystack, -\strlen($needle)) === $needle) {
12025
            return (string) self::substr(
12026
                $haystack,
12027
                0,
12028
                (int) self::strlen($haystack, $encoding) - (int) self::strlen($needle, $encoding),
12029
                $encoding
12030
            );
12031
        }
12032
12033 2
        return $haystack;
12034
    }
12035
12036
    /**
12037
     * Returns a case swapped version of the string.
12038
     *
12039
     * EXAMPLE: <code>UTF8::swapCase('déJÀ σσς iıII'); // 'DÉjà ΣΣΣ IIii'</code>
12040
     *
12041
     * @param string $str        <p>The input string.</p>
12042
     * @param string $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
12043
     * @param bool   $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
12044
     *
12045
     * @psalm-pure
12046
     *
12047
     * @return string
12048
     *                <p>Each character's case swapped.</p>
12049
     */
12050 6
    public static function swapCase(string $str, string $encoding = 'UTF-8', bool $clean_utf8 = false): string
12051
    {
12052 6
        if ($str === '') {
12053 1
            return '';
12054
        }
12055
12056 6
        if ($clean_utf8) {
12057
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
12058
            // if invalid characters are found in $haystack before $needle
12059 2
            $str = self::clean($str);
12060
        }
12061
12062 6
        if ($encoding === 'UTF-8') {
12063 4
            return (string) (\mb_strtolower($str) ^ \mb_strtoupper($str) ^ $str);
12064
        }
12065
12066 4
        return (string) (self::strtolower($str, $encoding) ^ self::strtoupper($str, $encoding) ^ $str);
12067
    }
12068
12069
    /**
12070
     * Checks whether symfony-polyfills are used.
12071
     *
12072
     * @psalm-pure
12073
     *
12074
     * @return bool
12075
     *              <p><strong>true</strong> if in use, <strong>false</strong> otherwise</p>
12076
     *
12077
     * @internal <p>Please do not use it anymore, we will make is private in next major version.</p>
12078
     */
12079
    public static function symfony_polyfill_used(): bool
12080
    {
12081
        // init
12082
        $return = false;
12083
12084
        $return_tmp = \extension_loaded('mbstring');
12085
        if (!$return_tmp && \function_exists('mb_strlen')) {
12086
            $return = true;
12087
        }
12088
12089
        $return_tmp = \extension_loaded('iconv');
12090
        if (!$return_tmp && \function_exists('iconv')) {
12091
            $return = true;
12092
        }
12093
12094
        return $return;
12095
    }
12096
12097
    /**
12098
     * @param string $str
12099
     * @param int    $tab_length
12100
     *
12101
     * @psalm-pure
12102
     *
12103
     * @return string
12104
     */
12105 6
    public static function tabs_to_spaces(string $str, int $tab_length = 4): string
12106
    {
12107 6
        if ($tab_length === 4) {
12108 3
            $spaces = '    ';
12109 3
        } elseif ($tab_length === 2) {
12110 1
            $spaces = '  ';
12111
        } else {
12112 2
            $spaces = \str_repeat(' ', $tab_length);
12113
        }
12114
12115 6
        return \str_replace("\t", $spaces, $str);
12116
    }
12117
12118
    /**
12119
     * Converts the first character of each word in the string to uppercase
12120
     * and all other chars to lowercase.
12121
     *
12122
     * @param string      $str                           <p>The input string.</p>
12123
     * @param string      $encoding                      [optional] <p>Set the charset for e.g. "mb_" function</p>
12124
     * @param bool        $clean_utf8                    [optional] <p>Remove non UTF-8 chars from the string.</p>
12125
     * @param string|null $lang                          [optional] <p>Set the language for special cases: az, el, lt,
12126
     *                                                   tr</p>
12127
     * @param bool        $try_to_keep_the_string_length [optional] <p>true === try to keep the string length: e.g. ẞ
12128
     *                                                   -> ß</p>
12129
     *
12130
     * @psalm-pure
12131
     *
12132
     * @return string
12133
     *                <p>A string with all characters of $str being title-cased.</p>
12134
     */
12135 5
    public static function titlecase(
12136
        string $str,
12137
        string $encoding = 'UTF-8',
12138
        bool $clean_utf8 = false,
12139
        string $lang = null,
12140
        bool $try_to_keep_the_string_length = false
12141
    ): string {
12142 5
        if ($clean_utf8) {
12143
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
12144
            // if invalid characters are found in $haystack before $needle
12145
            $str = self::clean($str);
12146
        }
12147
12148
        if (
12149 5
            $lang === null
12150
            &&
12151 5
            !$try_to_keep_the_string_length
12152
        ) {
12153 5
            if ($encoding === 'UTF-8') {
12154 3
                return \mb_convert_case($str, \MB_CASE_TITLE);
12155
            }
12156
12157 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
12158
12159 2
            return \mb_convert_case($str, \MB_CASE_TITLE, $encoding);
12160
        }
12161
12162
        return self::str_titleize(
12163
            $str,
12164
            null,
12165
            $encoding,
12166
            false,
12167
            $lang,
12168
            $try_to_keep_the_string_length,
12169
            false
12170
        );
12171
    }
12172
12173
    /**
12174
     * Convert a string into ASCII.
12175
     *
12176
     * EXAMPLE: <code>UTF8::to_ascii('déjà σσς iıii'); // 'deja sss iiii'</code>
12177
     *
12178
     * @param string $str     <p>The input string.</p>
12179
     * @param string $unknown [optional] <p>Character use if character unknown. (default is ?)</p>
12180
     * @param bool   $strict  [optional] <p>Use "transliterator_transliterate()" from PHP-Intl | WARNING: bad
12181
     *                        performance</p>
12182
     *
12183
     * @psalm-pure
12184
     *
12185
     * @return string
12186
     */
12187 37
    public static function to_ascii(
12188
        string $str,
12189
        string $unknown = '?',
12190
        bool $strict = false
12191
    ): string {
12192 37
        return ASCII::to_transliterate($str, $unknown, $strict);
12193
    }
12194
12195
    /**
12196
     * @param bool|float|int|string $str
12197
     *
12198
     * @psalm-pure
12199
     *
12200
     * @return bool
12201
     */
12202 25
    public static function to_boolean($str): bool
12203
    {
12204
        // init
12205 25
        $str = (string) $str;
12206
12207 25
        if ($str === '') {
12208 2
            return false;
12209
        }
12210
12211
        // Info: http://php.net/manual/en/filter.filters.validate.php
12212 23
        $map = [
12213
            'true'  => true,
12214
            '1'     => true,
12215
            'on'    => true,
12216
            'yes'   => true,
12217
            'false' => false,
12218
            '0'     => false,
12219
            'off'   => false,
12220
            'no'    => false,
12221
        ];
12222
12223 23
        if (isset($map[$str])) {
12224 13
            return $map[$str];
12225
        }
12226
12227 10
        $key = \strtolower($str);
12228 10
        if (isset($map[$key])) {
12229 2
            return $map[$key];
12230
        }
12231
12232 8
        if (\is_numeric($str)) {
12233 6
            return ((float) $str) > 0;
12234
        }
12235
12236 2
        return (bool) \trim($str);
12237
    }
12238
12239
    /**
12240
     * Convert given string to safe filename (and keep string case).
12241
     *
12242
     * @param string $str
12243
     * @param bool   $use_transliterate No transliteration, conversion etc. is done by default - unsafe characters are
12244
     *                                  simply replaced with hyphen.
12245
     * @param string $fallback_char
12246
     *
12247
     * @psalm-pure
12248
     *
12249
     * @return string
12250
     */
12251 1
    public static function to_filename(
12252
        string $str,
12253
        bool $use_transliterate = false,
12254
        string $fallback_char = '-'
12255
    ): string {
12256 1
        return ASCII::to_filename(
12257 1
            $str,
12258
            $use_transliterate,
12259
            $fallback_char
12260
        );
12261
    }
12262
12263
    /**
12264
     * Convert a string into "ISO-8859"-encoding (Latin-1).
12265
     *
12266
     * EXAMPLE: <code>UTF8::to_utf8(UTF8::to_iso8859('  -ABC-中文空白-  ')); // '  -ABC-????-  '</code>
12267
     *
12268
     * @param string|string[] $str
12269
     *
12270
     * @psalm-pure
12271
     *
12272
     * @return string|string[]
12273
     *
12274
     * @template TToIso8859
12275
     * @phpstan-param TToIso8859 $str
12276
     * @phpstan-return TToIso8859
12277
     */
12278 8
    public static function to_iso8859($str)
12279
    {
12280 8
        if (\is_array($str)) {
12281 2
            foreach ($str as &$v) {
12282 2
                $v = self::to_iso8859($v);
12283
            }
12284
12285 2
            return $str;
12286
        }
12287
12288 8
        $str = (string) $str;
12289 8
        if ($str === '') {
12290 2
            return '';
12291
        }
12292
12293 8
        return self::utf8_decode($str);
12294
    }
12295
12296
    /**
12297
     * This function leaves UTF-8 characters alone, while converting almost all non-UTF8 to UTF8.
12298
     *
12299
     * <ul>
12300
     * <li>It decode UTF-8 codepoints and Unicode escape sequences.</li>
12301
     * <li>It assumes that the encoding of the original string is either WINDOWS-1252 or ISO-8859.</li>
12302
     * <li>WARNING: It does not remove invalid UTF-8 characters, so you maybe need to use "UTF8::clean()" for this
12303
     * case.</li>
12304
     * </ul>
12305
     *
12306
     * EXAMPLE: <code>UTF8::to_utf8(["\u0063\u0061\u0074"]); // array('cat')</code>
12307
     *
12308
     * @param string|string[] $str                        <p>Any string or array of strings.</p>
12309
     * @param bool            $decode_html_entity_to_utf8 <p>Set to true, if you need to decode html-entities.</p>
12310
     *
12311
     * @psalm-pure
12312
     *
12313
     * @return string|string[]
12314
     *                         <p>The UTF-8 encoded string</p>
12315
     *
12316
     * @template TToUtf8
12317
     * @phpstan-param TToUtf8 $str
12318
     * @phpstan-return TToUtf8
12319
     */
12320 42
    public static function to_utf8($str, bool $decode_html_entity_to_utf8 = false)
12321
    {
12322 42
        if (\is_array($str)) {
12323 4
            foreach ($str as &$v) {
12324 4
                $v = self::to_utf8_string($v, $decode_html_entity_to_utf8);
12325
            }
12326
12327
            /** @phpstan-var TToUtf8 $str */
12328 4
            return $str;
12329
        }
12330
12331
        /** @phpstan-var TToUtf8 $str */
12332 42
        $str = self::to_utf8_string($str, $decode_html_entity_to_utf8);
12333
12334 42
        return $str;
12335
    }
12336
12337
    /**
12338
     * This function leaves UTF-8 characters alone, while converting almost all non-UTF8 to UTF8.
12339
     *
12340
     * <ul>
12341
     * <li>It decode UTF-8 codepoints and Unicode escape sequences.</li>
12342
     * <li>It assumes that the encoding of the original string is either WINDOWS-1252 or ISO-8859.</li>
12343
     * <li>WARNING: It does not remove invalid UTF-8 characters, so you maybe need to use "UTF8::clean()" for this
12344
     * case.</li>
12345
     * </ul>
12346
     *
12347
     * EXAMPLE: <code>UTF8::to_utf8_string("\u0063\u0061\u0074"); // 'cat'</code>
12348
     *
12349
     * @param string $str                        <p>Any string.</p>
12350
     * @param bool   $decode_html_entity_to_utf8 <p>Set to true, if you need to decode html-entities.</p>
12351
     *
12352
     * @psalm-pure
12353
     *
12354
     * @return string
12355
     *                <p>The UTF-8 encoded string</p>
12356
     */
12357 42
    public static function to_utf8_string(string $str, bool $decode_html_entity_to_utf8 = false): string
12358
    {
12359 42
        if ($str === '') {
12360 7
            return $str;
12361
        }
12362
12363 42
        $max = \strlen($str);
12364 42
        $buf = '';
12365
12366 42
        for ($i = 0; $i < $max; ++$i) {
12367 42
            $c1 = $str[$i];
12368
12369 42
            if ($c1 >= "\xC0") { // should be converted to UTF8, if it's not UTF8 already
12370
12371 38
                if ($c1 <= "\xDF") { // looks like 2 bytes UTF8
12372
12373 35
                    $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
12374
12375 35
                    if ($c2 >= "\x80" && $c2 <= "\xBF") { // yeah, almost sure it's UTF8 already
12376 21
                        $buf .= $c1 . $c2;
12377 21
                        ++$i;
12378
                    } else { // not valid UTF8 - convert it
12379 35
                        $buf .= self::to_utf8_convert_helper($c1);
12380
                    }
12381 35
                } elseif ($c1 >= "\xE0" && $c1 <= "\xEF") { // looks like 3 bytes UTF8
12382
12383 34
                    $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
12384 34
                    $c3 = $i + 2 >= $max ? "\x00" : $str[$i + 2];
12385
12386 34
                    if ($c2 >= "\x80" && $c2 <= "\xBF" && $c3 >= "\x80" && $c3 <= "\xBF") { // yeah, almost sure it's UTF8 already
12387 17
                        $buf .= $c1 . $c2 . $c3;
12388 17
                        $i += 2;
12389
                    } else { // not valid UTF8 - convert it
12390 34
                        $buf .= self::to_utf8_convert_helper($c1);
12391
                    }
12392 28
                } elseif ($c1 >= "\xF0" && $c1 <= "\xF7") { // looks like 4 bytes UTF8
12393
12394 28
                    $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
12395 28
                    $c3 = $i + 2 >= $max ? "\x00" : $str[$i + 2];
12396 28
                    $c4 = $i + 3 >= $max ? "\x00" : $str[$i + 3];
12397
12398 28
                    if ($c2 >= "\x80" && $c2 <= "\xBF" && $c3 >= "\x80" && $c3 <= "\xBF" && $c4 >= "\x80" && $c4 <= "\xBF") { // yeah, almost sure it's UTF8 already
12399 10
                        $buf .= $c1 . $c2 . $c3 . $c4;
12400 10
                        $i += 3;
12401
                    } else { // not valid UTF8 - convert it
12402 28
                        $buf .= self::to_utf8_convert_helper($c1);
12403
                    }
12404
                } else { // doesn't look like UTF8, but should be converted
12405
12406 38
                    $buf .= self::to_utf8_convert_helper($c1);
12407
                }
12408 40
            } elseif (($c1 & "\xC0") === "\x80") { // needs conversion
12409
12410 4
                $buf .= self::to_utf8_convert_helper($c1);
12411
            } else { // it doesn't need conversion
12412
12413 40
                $buf .= $c1;
12414
            }
12415
        }
12416
12417
        // decode unicode escape sequences + unicode surrogate pairs
12418 42
        $buf = \preg_replace_callback(
12419 42
            '/\\\\u([dD][89abAB][0-9a-fA-F]{2})\\\\u([dD][cdefCDEF][\da-fA-F]{2})|\\\\u([0-9a-fA-F]{4})/',
12420
            /**
12421
             * @param array $matches
12422
             *
12423
             * @psalm-pure
12424
             *
12425
             * @return string
12426
             */
12427 42
            static function (array $matches): string {
12428 13
                if (isset($matches[3])) {
12429 13
                    $cp = (int) \hexdec($matches[3]);
12430
                } else {
12431
                    // http://unicode.org/faq/utf_bom.html#utf16-4
12432 1
                    $cp = ((int) \hexdec($matches[1]) << 10)
12433 1
                          + (int) \hexdec($matches[2])
12434 1
                          + 0x10000
12435 1
                          - (0xD800 << 10)
12436 1
                          - 0xDC00;
12437
                }
12438
12439
                // https://github.com/php/php-src/blob/php-7.3.2/ext/standard/html.c#L471
12440
                //
12441
                // php_utf32_utf8(unsigned char *buf, unsigned k)
12442
12443 13
                if ($cp < 0x80) {
12444 8
                    return (string) self::chr($cp);
12445
                }
12446
12447 10
                if ($cp < 0xA0) {
12448
                    /** @noinspection UnnecessaryCastingInspection */
12449
                    return (string) self::chr(0xC0 | $cp >> 6) . (string) self::chr(0x80 | $cp & 0x3F);
12450
                }
12451
12452 10
                return self::decimal_to_chr($cp);
12453 42
            },
12454 42
            $buf
12455
        );
12456
12457 42
        if ($buf === null) {
12458
            return '';
12459
        }
12460
12461
        // decode UTF-8 codepoints
12462 42
        if ($decode_html_entity_to_utf8) {
12463 3
            $buf = self::html_entity_decode($buf);
12464
        }
12465
12466 42
        return $buf;
12467
    }
12468
12469
    /**
12470
     * Returns the given string as an integer, or null if the string isn't numeric.
12471
     *
12472
     * @param string $str
12473
     *
12474
     * @psalm-pure
12475
     *
12476
     * @return int|null
12477
     *                  <p>null if the string isn't numeric</p>
12478
     */
12479 1
    public static function to_int(string $str)
12480
    {
12481 1
        if (\is_numeric($str)) {
12482 1
            return (int) $str;
12483
        }
12484
12485 1
        return null;
12486
    }
12487
12488
    /**
12489
     * Returns the given input as string, or null if the input isn't int|float|string
12490
     * and do not implement the "__toString()" method.
12491
     *
12492
     * @param float|int|object|string|null $input
12493
     *
12494
     * @psalm-pure
12495
     *
12496
     * @return string|null
12497
     *                     <p>null if the input isn't int|float|string and has no "__toString()" method</p>
12498
     */
12499 1
    public static function to_string($input)
12500
    {
12501 1
        if ($input === null) {
12502
            return null;
12503
        }
12504
12505
        /** @var string $input_type - hack for psalm */
12506 1
        $input_type = \gettype($input);
12507
12508
        if (
12509 1
            $input_type === 'string'
12510
            ||
12511 1
            $input_type === 'integer'
12512
            ||
12513 1
            $input_type === 'float'
12514
            ||
12515 1
            $input_type === 'double'
12516
        ) {
12517 1
            return (string) $input;
12518
        }
12519
12520 1
        if ($input_type === 'object' && \method_exists($input, '__toString')) {
12521 1
            return (string) $input;
12522
        }
12523
12524 1
        return null;
12525
    }
12526
12527
    /**
12528
     * Strip whitespace or other characters from the beginning and end of a UTF-8 string.
12529
     *
12530
     * INFO: This is slower then "trim()"
12531
     *
12532
     * We can only use the original-function, if we use <= 7-Bit in the string / chars
12533
     * but the check for ASCII (7-Bit) cost more time, then we can safe here.
12534
     *
12535
     * EXAMPLE: <code>UTF8::trim('   -ABC-中文空白-  '); // '-ABC-中文空白-'</code>
12536
     *
12537
     * @param string      $str   <p>The string to be trimmed</p>
12538
     * @param string|null $chars [optional] <p>Optional characters to be stripped</p>
12539
     *
12540
     * @psalm-pure
12541
     *
12542
     * @return string
12543
     *                <p>The trimmed string.</p>
12544
     */
12545 57
    public static function trim(string $str = '', string $chars = null): string
12546
    {
12547 57
        if ($str === '') {
12548 9
            return '';
12549
        }
12550
12551 50
        if (self::$SUPPORT['mbstring'] === true) {
12552 50
            if ($chars !== null) {
12553
                /** @noinspection PregQuoteUsageInspection */
12554 28
                $chars = \preg_quote($chars);
12555 28
                $pattern = "^[${chars}]+|[${chars}]+\$";
12556
            } else {
12557 22
                $pattern = '^[\\s]+|[\\s]+$';
12558
            }
12559
12560 50
            return (string) \mb_ereg_replace($pattern, '', $str);
12561
        }
12562
12563 8
        if ($chars !== null) {
12564
            $chars = \preg_quote($chars, '/');
12565
            $pattern = "^[${chars}]+|[${chars}]+\$";
12566
        } else {
12567 8
            $pattern = '^[\\s]+|[\\s]+$';
12568
        }
12569
12570 8
        return self::regex_replace($str, $pattern, '');
12571
    }
12572
12573
    /**
12574
     * Makes string's first char uppercase.
12575
     *
12576
     * EXAMPLE: <code>UTF8::ucfirst('ñtërnâtiônàlizætiøn foo'); // 'Ñtërnâtiônàlizætiøn foo'</code>
12577
     *
12578
     * @param string      $str                           <p>The input string.</p>
12579
     * @param string      $encoding                      [optional] <p>Set the charset for e.g. "mb_" function</p>
12580
     * @param bool        $clean_utf8                    [optional] <p>Remove non UTF-8 chars from the string.</p>
12581
     * @param string|null $lang                          [optional] <p>Set the language for special cases: az, el, lt,
12582
     *                                                   tr</p>
12583
     * @param bool        $try_to_keep_the_string_length [optional] <p>true === try to keep the string length: e.g. ẞ
12584
     *                                                   -> ß</p>
12585
     *
12586
     * @psalm-pure
12587
     *
12588
     * @return string
12589
     *                <p>The resulting string with with char uppercase.</p>
12590
     */
12591 69
    public static function ucfirst(
12592
        string $str,
12593
        string $encoding = 'UTF-8',
12594
        bool $clean_utf8 = false,
12595
        string $lang = null,
12596
        bool $try_to_keep_the_string_length = false
12597
    ): string {
12598 69
        if ($str === '') {
12599 3
            return '';
12600
        }
12601
12602 68
        if ($clean_utf8) {
12603
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
12604
            // if invalid characters are found in $haystack before $needle
12605 1
            $str = self::clean($str);
12606
        }
12607
12608 68
        $use_mb_functions = $lang === null && !$try_to_keep_the_string_length;
12609
12610 68
        if ($encoding === 'UTF-8') {
12611 22
            $str_part_two = (string) \mb_substr($str, 1);
12612
12613 22
            if ($use_mb_functions) {
12614 22
                $str_part_one = \mb_strtoupper(
12615 22
                    (string) \mb_substr($str, 0, 1)
12616
                );
12617
            } else {
12618 22
                $str_part_one = self::strtoupper(
12619
                    (string) \mb_substr($str, 0, 1),
12620
                    $encoding,
12621
                    false,
12622
                    $lang,
12623
                    $try_to_keep_the_string_length
12624
                );
12625
            }
12626
        } else {
12627 47
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
12628
12629 47
            $str_part_two = (string) self::substr($str, 1, null, $encoding);
12630
12631 47
            if ($use_mb_functions) {
12632 47
                $str_part_one = \mb_strtoupper(
12633 47
                    (string) \mb_substr($str, 0, 1, $encoding),
12634 47
                    $encoding
12635
                );
12636
            } else {
12637
                $str_part_one = self::strtoupper(
12638
                    (string) self::substr($str, 0, 1, $encoding),
12639
                    $encoding,
12640
                    false,
12641
                    $lang,
12642
                    $try_to_keep_the_string_length
12643
                );
12644
            }
12645
        }
12646
12647 68
        return $str_part_one . $str_part_two;
12648
    }
12649
12650
    /**
12651
     * Uppercase for all words in the string.
12652
     *
12653
     * EXAMPLE: <code>UTF8::ucwords('iñt ërn âTi ônà liz æti øn'); // 'Iñt Ërn ÂTi Ônà Liz Æti Øn'</code>
12654
     *
12655
     * @param string   $str        <p>The input string.</p>
12656
     * @param string[] $exceptions [optional] <p>Exclusion for some words.</p>
12657
     * @param string   $char_list  [optional] <p>Additional chars that contains to words and do not start a new
12658
     *                             word.</p>
12659
     * @param string   $encoding   [optional] <p>Set the charset.</p>
12660
     * @param bool     $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
12661
     *
12662
     * @psalm-pure
12663
     *
12664
     * @return string
12665
     */
12666 9
    public static function ucwords(
12667
        string $str,
12668
        array $exceptions = [],
12669
        string $char_list = '',
12670
        string $encoding = 'UTF-8',
12671
        bool $clean_utf8 = false
12672
    ): string {
12673 9
        if (!$str) {
12674 2
            return '';
12675
        }
12676
12677
        // INFO: mb_convert_case($str, MB_CASE_TITLE);
12678
        // -> MB_CASE_TITLE didn't only uppercase the first letter, it also lowercase all other letters
12679
12680 8
        if ($clean_utf8) {
12681
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
12682
            // if invalid characters are found in $haystack before $needle
12683 1
            $str = self::clean($str);
12684
        }
12685
12686 8
        $use_php_default_functions = !(bool) ($char_list . \implode('', $exceptions));
12687
12688
        if (
12689 8
            $use_php_default_functions
12690
            &&
12691 8
            ASCII::is_ascii($str)
12692
        ) {
12693
            return \ucwords($str);
12694
        }
12695
12696 8
        $words = self::str_to_words($str, $char_list);
12697 8
        $use_exceptions = $exceptions !== [];
12698
12699 8
        $words_str = '';
12700 8
        foreach ($words as &$word) {
12701 8
            if (!$word) {
12702 8
                continue;
12703
            }
12704
12705
            if (
12706 8
                !$use_exceptions
12707
                ||
12708 8
                !\in_array($word, $exceptions, true)
12709
            ) {
12710 8
                $words_str .= self::ucfirst($word, $encoding);
12711
            } else {
12712 1
                $words_str .= $word;
12713
            }
12714
        }
12715
12716 8
        return $words_str;
12717
    }
12718
12719
    /**
12720
     * Multi decode HTML entity + fix urlencoded-win1252-chars.
12721
     *
12722
     * EXAMPLE: <code>UTF8::urldecode('tes%20öäü%20\u00edtest+test'); // 'tes öäü ítest test'</code>
12723
     *
12724
     * e.g:
12725
     * 'test+test'                     => 'test test'
12726
     * 'D&#252;sseldorf'               => 'Düsseldorf'
12727
     * 'D%FCsseldorf'                  => 'Düsseldorf'
12728
     * 'D&#xFC;sseldorf'               => 'Düsseldorf'
12729
     * 'D%26%23xFC%3Bsseldorf'         => 'Düsseldorf'
12730
     * 'Düsseldorf'                   => 'Düsseldorf'
12731
     * 'D%C3%BCsseldorf'               => 'Düsseldorf'
12732
     * 'D%C3%83%C2%BCsseldorf'         => 'Düsseldorf'
12733
     * 'D%25C3%2583%25C2%25BCsseldorf' => 'Düsseldorf'
12734
     *
12735
     * @param string $str          <p>The input string.</p>
12736
     * @param bool   $multi_decode <p>Decode as often as possible.</p>
12737
     *
12738
     * @psalm-pure
12739
     *
12740
     * @return string
12741
     */
12742 4
    public static function urldecode(string $str, bool $multi_decode = true): string
12743
    {
12744 4
        if ($str === '') {
12745 3
            return '';
12746
        }
12747
12748 4
        $str = self::urldecode_unicode_helper($str);
12749
12750 4
        if ($multi_decode) {
12751
            do {
12752 3
                $str_compare = $str;
12753
12754
                /**
12755
                 * @psalm-suppress PossiblyInvalidArgument
12756
                 */
12757 3
                $str = \urldecode(
12758 3
                    self::html_entity_decode(
12759 3
                        self::to_utf8($str),
12760 3
                        \ENT_QUOTES | \ENT_HTML5
12761
                    )
12762
                );
12763 3
            } while ($str_compare !== $str);
12764
        } else {
12765
            /**
12766
             * @psalm-suppress PossiblyInvalidArgument
12767
             */
12768 1
            $str = \urldecode(
12769 1
                self::html_entity_decode(
12770 1
                    self::to_utf8($str),
12771 1
                    \ENT_QUOTES | \ENT_HTML5
12772
                )
12773
            );
12774
        }
12775
12776 4
        return self::fix_simple_utf8($str);
12777
    }
12778
12779
    /**
12780
     * Decodes a UTF-8 string to ISO-8859-1.
12781
     *
12782
     * EXAMPLE: <code>UTF8::encode('UTF-8', UTF8::utf8_decode('-ABC-中文空白-')); // '-ABC-????-'</code>
12783
     *
12784
     * @param string $str             <p>The input string.</p>
12785
     * @param bool   $keep_utf8_chars
12786
     *
12787
     * @psalm-pure
12788
     *
12789
     * @return string
12790
     */
12791 14
    public static function utf8_decode(string $str, bool $keep_utf8_chars = false): string
12792
    {
12793 14
        if ($str === '') {
12794 6
            return '';
12795
        }
12796
12797
        // save for later comparision
12798 14
        $str_backup = $str;
12799 14
        $len = \strlen($str);
12800
12801 14
        if (self::$ORD === null) {
12802
            self::$ORD = self::getData('ord');
12803
        }
12804
12805 14
        if (self::$CHR === null) {
12806
            self::$CHR = self::getData('chr');
12807
        }
12808
12809 14
        $no_char_found = '?';
12810 14
        for ($i = 0, $j = 0; $i < $len; ++$i, ++$j) {
12811 14
            switch ($str[$i] & "\xF0") {
12812 14
                case "\xC0":
12813 13
                case "\xD0":
12814 13
                    $c = (self::$ORD[$str[$i] & "\x1F"] << 6) | self::$ORD[$str[++$i] & "\x3F"];
12815 13
                    $str[$j] = $c < 256 ? self::$CHR[$c] : $no_char_found;
12816
12817 13
                    break;
12818
12819 13
                case "\xF0":
12820
                    ++$i;
12821
12822
                // no break
12823
12824 13
                case "\xE0":
12825 11
                    $str[$j] = $no_char_found;
12826 11
                    $i += 2;
12827
12828 11
                    break;
12829
12830
                default:
12831 12
                    $str[$j] = $str[$i];
12832
            }
12833
        }
12834
12835
        /** @var false|string $return - needed for PhpStan (stubs error) */
12836 14
        $return = \substr($str, 0, $j);
12837 14
        if ($return === false) {
12838
            $return = '';
12839
        }
12840
12841
        if (
12842 14
            $keep_utf8_chars
12843
            &&
12844 14
            (int) self::strlen($return) >= (int) self::strlen($str_backup)
12845
        ) {
12846 2
            return $str_backup;
12847
        }
12848
12849 14
        return $return;
12850
    }
12851
12852
    /**
12853
     * Encodes an ISO-8859-1 string to UTF-8.
12854
     *
12855
     * EXAMPLE: <code>UTF8::utf8_decode(UTF8::utf8_encode('-ABC-中文空白-')); // '-ABC-中文空白-'</code>
12856
     *
12857
     * @param string $str <p>The input string.</p>
12858
     *
12859
     * @psalm-pure
12860
     *
12861
     * @return string
12862
     */
12863 16
    public static function utf8_encode(string $str): string
12864
    {
12865 16
        if ($str === '') {
12866 14
            return '';
12867
        }
12868
12869
        /** @var false|string $str - the polyfill maybe return false */
12870 16
        $str = \utf8_encode($str);
0 ignored issues
show
Bug introduced by
It seems like $str can also be of type false; however, parameter $string of utf8_encode() does only seem to accept string, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

12870
        $str = \utf8_encode(/** @scrutinizer ignore-type */ $str);
Loading history...
12871
12872 16
        if ($str === false) {
12873
            return '';
12874
        }
12875
12876 16
        return $str;
12877
    }
12878
12879
    /**
12880
     * Returns an array with all utf8 whitespace characters.
12881
     *
12882
     * @see http://www.bogofilter.org/pipermail/bogofilter/2003-March/001889.html
12883
     *
12884
     * @psalm-pure
12885
     *
12886
     * @return string[]
12887
     *                  An array with all known whitespace characters as values and the type of whitespace as keys
12888
     *                  as defined in above URL
12889
     */
12890 2
    public static function whitespace_table(): array
12891
    {
12892 2
        return self::$WHITESPACE_TABLE;
12893
    }
12894
12895
    /**
12896
     * Limit the number of words in a string.
12897
     *
12898
     * EXAMPLE: <code>UTF8::words_limit('fòô bàř fòô', 2, ''); // 'fòô bàř'</code>
12899
     *
12900
     * @param string $str        <p>The input string.</p>
12901
     * @param int    $limit      <p>The limit of words as integer.</p>
12902
     * @param string $str_add_on <p>Replacement for the striped string.</p>
12903
     *
12904
     * @psalm-pure
12905
     *
12906
     * @return string
12907
     */
12908 2
    public static function words_limit(
12909
        string $str,
12910
        int $limit = 100,
12911
        string $str_add_on = '…'
12912
    ): string {
12913 2
        if ($str === '' || $limit < 1) {
12914 2
            return '';
12915
        }
12916
12917 2
        \preg_match('/^\\s*+(?:[^\\s]++\\s*+){1,' . $limit . '}/u', $str, $matches);
12918
12919
        if (
12920 2
            !isset($matches[0])
12921
            ||
12922 2
            \mb_strlen($str) === (int) \mb_strlen($matches[0])
12923
        ) {
12924 2
            return $str;
12925
        }
12926
12927 2
        return \rtrim($matches[0]) . $str_add_on;
12928
    }
12929
12930
    /**
12931
     * Wraps a string to a given number of characters
12932
     *
12933
     * EXAMPLE: <code>UTF8::wordwrap('Iñtërnâtiônàlizætiøn', 2, '<br>', true)); // 'Iñ<br>të<br>rn<br>ât<br>iô<br>nà<br>li<br>zæ<br>ti<br>øn'</code>
12934
     *
12935
     * @see http://php.net/manual/en/function.wordwrap.php
12936
     *
12937
     * @param string $str   <p>The input string.</p>
12938
     * @param int    $width [optional] <p>The column width.</p>
12939
     * @param string $break [optional] <p>The line is broken using the optional break parameter.</p>
12940
     * @param bool   $cut   [optional] <p>
12941
     *                      If the cut is set to true, the string is
12942
     *                      always wrapped at or before the specified width. So if you have
12943
     *                      a word that is larger than the given width, it is broken apart.
12944
     *                      </p>
12945
     *
12946
     * @psalm-pure
12947
     *
12948
     * @return string
12949
     *                <p>The given string wrapped at the specified column.</p>
12950
     */
12951 12
    public static function wordwrap(
12952
        string $str,
12953
        int $width = 75,
12954
        string $break = "\n",
12955
        bool $cut = false
12956
    ): string {
12957 12
        if ($str === '' || $break === '') {
12958 4
            return '';
12959
        }
12960
12961 10
        $str_split = \explode($break, $str);
12962 10
        if ($str_split === false) {
12963
            return '';
12964
        }
12965
12966
        /** @var string[] $charsArray */
12967 10
        $charsArray = [];
12968 10
        $word_split = '';
12969 10
        foreach ($str_split as $i => $i_value) {
12970 10
            if ($i) {
12971 3
                $charsArray[] = $break;
12972 3
                $word_split .= '#';
12973
            }
12974
12975 10
            foreach (self::str_split($i_value) as $c) {
12976 10
                $charsArray[] = $c;
12977 10
                if ($c === ' ') {
12978 3
                    $word_split .= ' ';
12979
                } else {
12980 10
                    $word_split .= '?';
12981
                }
12982
            }
12983
        }
12984
12985 10
        $str_return = '';
12986 10
        $j = 0;
12987 10
        $b = -1;
12988 10
        $i = -1;
12989 10
        $word_split = \wordwrap($word_split, $width, '#', $cut);
12990
12991 10
        $max = \mb_strlen($word_split);
12992
        /** @noinspection PhpAssignmentInConditionInspection - is ok here */
12993 10
        while (($b = \mb_strpos($word_split, '#', $b + 1)) !== false) {
12994 8
            for (++$i; $i < $b; ++$i) {
12995 8
                if (isset($charsArray[$j])) {
12996 8
                    $str_return .= $charsArray[$j];
12997 8
                    unset($charsArray[$j]);
12998
                }
12999 8
                ++$j;
13000
13001
                // prevent endless loop, e.g. if there is a error in the "mb_*" polyfill
13002 8
                if ($i > $max) {
13003
                    break 2;
13004
                }
13005
            }
13006
13007
            if (
13008 8
                $break === $charsArray[$j]
13009
                ||
13010 8
                $charsArray[$j] === ' '
13011
            ) {
13012 5
                unset($charsArray[$j++]);
13013
            }
13014
13015 8
            $str_return .= $break;
13016
13017
            // prevent endless loop, e.g. if there is a error in the "mb_*" polyfill
13018 8
            if ($b > $max) {
13019
                break;
13020
            }
13021
        }
13022
13023 10
        return $str_return . \implode('', $charsArray);
13024
    }
13025
13026
    /**
13027
     * Line-Wrap the string after $limit, but split the string by "$delimiter" before ...
13028
     *    ... so that we wrap the per line.
13029
     *
13030
     * @param string      $str             <p>The input string.</p>
13031
     * @param int         $width           [optional] <p>The column width.</p>
13032
     * @param string      $break           [optional] <p>The line is broken using the optional break parameter.</p>
13033
     * @param bool        $cut             [optional] <p>
13034
     *                                     If the cut is set to true, the string is
13035
     *                                     always wrapped at or before the specified width. So if you have
13036
     *                                     a word that is larger than the given width, it is broken apart.
13037
     *                                     </p>
13038
     * @param bool        $add_final_break [optional] <p>
13039
     *                                     If this flag is true, then the method will add a $break at the end
13040
     *                                     of the result string.
13041
     *                                     </p>
13042
     * @param string|null $delimiter       [optional] <p>
13043
     *                                     You can change the default behavior, where we split the string by newline.
13044
     *                                     </p>
13045
     *
13046
     * @psalm-pure
13047
     *
13048
     * @return string
13049
     */
13050 1
    public static function wordwrap_per_line(
13051
        string $str,
13052
        int $width = 75,
13053
        string $break = "\n",
13054
        bool $cut = false,
13055
        bool $add_final_break = true,
13056
        string $delimiter = null
13057
    ): string {
13058 1
        if ($delimiter === null) {
13059 1
            $strings = \preg_split('/\\r\\n|\\r|\\n/', $str);
13060
        } else {
13061 1
            $strings = \explode($delimiter, $str);
13062
        }
13063
13064 1
        $string_helper_array = [];
13065 1
        if ($strings !== false) {
13066 1
            foreach ($strings as $value) {
13067 1
                $string_helper_array[] = self::wordwrap($value, $width, $break, $cut);
13068
            }
13069
        }
13070
13071 1
        if ($add_final_break) {
13072 1
            $final_break = $break;
13073
        } else {
13074 1
            $final_break = '';
13075
        }
13076
13077 1
        return \implode($delimiter ?? "\n", $string_helper_array) . $final_break;
13078
    }
13079
13080
    /**
13081
     * Returns an array of Unicode White Space characters.
13082
     *
13083
     * @psalm-pure
13084
     *
13085
     * @return string[]
13086
     *                  <p>An array with numeric code point as key and White Space Character as value.</p>
13087
     */
13088 2
    public static function ws(): array
13089
    {
13090 2
        return self::$WHITESPACE;
13091
    }
13092
13093
    /**
13094
     * Checks whether the passed string contains only byte sequences that are valid UTF-8 characters.
13095
     *
13096
     * EXAMPLE: <code>
13097
     * UTF8::is_utf8_string('Iñtërnâtiônàlizætiøn']); // true
13098
     * //
13099
     * UTF8::is_utf8_string("Iñtërnâtiônàlizætiøn\xA0\xA1"); // false
13100
     * </code>
13101
     *
13102
     * @see          http://hsivonen.iki.fi/php-utf8/
13103
     *
13104
     * @param string $str    <p>The string to be checked.</p>
13105
     * @param bool   $strict <p>Check also if the string is not UTF-16 or UTF-32.</p>
13106
     *
13107
     * @psalm-pure
13108
     *
13109
     * @return bool
13110
     *
13111
     * @noinspection ReturnTypeCanBeDeclaredInspection
13112
     */
13113 110
    private static function is_utf8_string(string $str, bool $strict = false)
13114
    {
13115 110
        if ($str === '') {
13116 15
            return true;
13117
        }
13118
13119 103
        if ($strict) {
13120 2
            $is_binary = self::is_binary($str, true);
13121
13122 2
            if ($is_binary && self::is_utf16($str, false) !== false) {
13123 2
                return false;
13124
            }
13125
13126
            if ($is_binary && self::is_utf32($str, false) !== false) {
13127
                return false;
13128
            }
13129
        }
13130
13131 103
        if (self::$SUPPORT['pcre_utf8']) {
13132
            // If even just the first character can be matched, when the /u
13133
            // modifier is used, then it's valid UTF-8. If the UTF-8 is somehow
13134
            // invalid, nothing at all will match, even if the string contains
13135
            // some valid sequences
13136 103
            return \preg_match('/^./us', $str) === 1;
13137
        }
13138
13139 2
        $mState = 0; // cached expected number of octets after the current octet
13140
        // until the beginning of the next UTF8 character sequence
13141 2
        $mUcs4 = 0; // cached Unicode character
13142 2
        $mBytes = 1; // cached expected number of octets in the current sequence
13143
13144 2
        if (self::$ORD === null) {
13145
            self::$ORD = self::getData('ord');
13146
        }
13147
13148 2
        $len = \strlen($str);
13149 2
        for ($i = 0; $i < $len; ++$i) {
13150 2
            $in = self::$ORD[$str[$i]];
13151
13152 2
            if ($mState === 0) {
13153
                // When mState is zero we expect either a US-ASCII character or a
13154
                // multi-octet sequence.
13155 2
                if ((0x80 & $in) === 0) {
13156
                    // US-ASCII, pass straight through.
13157 2
                    $mBytes = 1;
13158 2
                } elseif ((0xE0 & $in) === 0xC0) {
13159
                    // First octet of 2 octet sequence.
13160 2
                    $mUcs4 = $in;
13161 2
                    $mUcs4 = ($mUcs4 & 0x1F) << 6;
13162 2
                    $mState = 1;
13163 2
                    $mBytes = 2;
13164 2
                } elseif ((0xF0 & $in) === 0xE0) {
13165
                    // First octet of 3 octet sequence.
13166 2
                    $mUcs4 = $in;
13167 2
                    $mUcs4 = ($mUcs4 & 0x0F) << 12;
13168 2
                    $mState = 2;
13169 2
                    $mBytes = 3;
13170
                } elseif ((0xF8 & $in) === 0xF0) {
13171
                    // First octet of 4 octet sequence.
13172
                    $mUcs4 = $in;
13173
                    $mUcs4 = ($mUcs4 & 0x07) << 18;
13174
                    $mState = 3;
13175
                    $mBytes = 4;
13176
                } elseif ((0xFC & $in) === 0xF8) {
13177
                    /* First octet of 5 octet sequence.
13178
                     *
13179
                     * This is illegal because the encoded codepoint must be either
13180
                     * (a) not the shortest form or
13181
                     * (b) outside the Unicode range of 0-0x10FFFF.
13182
                     * Rather than trying to resynchronize, we will carry on until the end
13183
                     * of the sequence and let the later error handling code catch it.
13184
                     */
13185
                    $mUcs4 = $in;
13186
                    $mUcs4 = ($mUcs4 & 0x03) << 24;
13187
                    $mState = 4;
13188
                    $mBytes = 5;
13189
                } elseif ((0xFE & $in) === 0xFC) {
13190
                    // First octet of 6 octet sequence, see comments for 5 octet sequence.
13191
                    $mUcs4 = $in;
13192
                    $mUcs4 = ($mUcs4 & 1) << 30;
13193
                    $mState = 5;
13194
                    $mBytes = 6;
13195
                } else {
13196
                    // Current octet is neither in the US-ASCII range nor a legal first
13197
                    // octet of a multi-octet sequence.
13198 2
                    return false;
13199
                }
13200 2
            } elseif ((0xC0 & $in) === 0x80) {
13201
13202
                // When mState is non-zero, we expect a continuation of the multi-octet
13203
                // sequence
13204
13205
                // Legal continuation.
13206 2
                $shift = ($mState - 1) * 6;
13207 2
                $tmp = $in;
13208 2
                $tmp = ($tmp & 0x0000003F) << $shift;
13209 2
                $mUcs4 |= $tmp;
13210
                // Prefix: End of the multi-octet sequence. mUcs4 now contains the final
13211
                // Unicode code point to be output.
13212 2
                if (--$mState === 0) {
13213
                    // Check for illegal sequences and code points.
13214
                    //
13215
                    // From Unicode 3.1, non-shortest form is illegal
13216
                    if (
13217 2
                        ($mBytes === 2 && $mUcs4 < 0x0080)
13218
                        ||
13219 2
                        ($mBytes === 3 && $mUcs4 < 0x0800)
13220
                        ||
13221 2
                        ($mBytes === 4 && $mUcs4 < 0x10000)
13222
                        ||
13223 2
                        ($mBytes > 4)
13224
                        ||
13225
                        // From Unicode 3.2, surrogate characters are illegal.
13226 2
                        (($mUcs4 & 0xFFFFF800) === 0xD800)
13227
                        ||
13228
                        // Code points outside the Unicode range are illegal.
13229 2
                        ($mUcs4 > 0x10FFFF)
13230
                    ) {
13231
                        return false;
13232
                    }
13233
                    // initialize UTF8 cache
13234 2
                    $mState = 0;
13235 2
                    $mUcs4 = 0;
13236 2
                    $mBytes = 1;
13237
                }
13238
            } else {
13239
                // ((0xC0 & (*in) != 0x80) && (mState != 0))
13240
                // Incomplete multi-octet sequence.
13241
                return false;
13242
            }
13243
        }
13244
13245 2
        return $mState === 0;
13246
    }
13247
13248
    /**
13249
     * @param string $str
13250
     * @param bool   $use_lowercase      <p>Use uppercase by default, otherwise use lowercase.</p>
13251
     * @param bool   $use_full_case_fold <p>Convert not only common cases.</p>
13252
     *
13253
     * @psalm-pure
13254
     *
13255
     * @return string
13256
     *
13257
     * @noinspection ReturnTypeCanBeDeclaredInspection
13258
     */
13259 33
    private static function fixStrCaseHelper(
13260
        string $str,
13261
        bool $use_lowercase = false,
13262
        bool $use_full_case_fold = false
13263
    ) {
13264 33
        $upper = self::$COMMON_CASE_FOLD['upper'];
13265 33
        $lower = self::$COMMON_CASE_FOLD['lower'];
13266
13267 33
        if ($use_lowercase) {
13268 2
            $str = \str_replace(
13269 2
                $upper,
13270 2
                $lower,
13271 2
                $str
13272
            );
13273
        } else {
13274 31
            $str = \str_replace(
13275 31
                $lower,
13276 31
                $upper,
13277 31
                $str
13278
            );
13279
        }
13280
13281 33
        if ($use_full_case_fold) {
13282
            /**
13283
             * @psalm-suppress ImpureStaticVariable
13284
             *
13285
             * @var array<mixed>|null
13286
             */
13287 31
            static $FULL_CASE_FOLD = null;
13288 31
            if ($FULL_CASE_FOLD === null) {
13289 1
                $FULL_CASE_FOLD = self::getData('caseFolding_full');
13290
            }
13291
13292 31
            if ($use_lowercase) {
13293 2
                $str = \str_replace($FULL_CASE_FOLD[0], $FULL_CASE_FOLD[1], $str);
13294
            } else {
13295 29
                $str = \str_replace($FULL_CASE_FOLD[1], $FULL_CASE_FOLD[0], $str);
13296
            }
13297
        }
13298
13299 33
        return $str;
13300
    }
13301
13302
    /**
13303
     * get data from "/data/*.php"
13304
     *
13305
     * @param string $file
13306
     *
13307
     * @psalm-pure
13308
     *
13309
     * @return array
13310
     *
13311
     * @noinspection ReturnTypeCanBeDeclaredInspection
13312
     */
13313 7
    private static function getData(string $file)
13314
    {
13315
        /** @noinspection PhpIncludeInspection */
13316
        /** @noinspection UsingInclusionReturnValueInspection */
13317
        /** @psalm-suppress UnresolvableInclude */
13318 7
        return include __DIR__ . '/data/' . $file . '.php';
13319
    }
13320
13321
    /**
13322
     * @psalm-pure
13323
     *
13324
     * @return true|null
13325
     *
13326
     * @noinspection ReturnTypeCanBeDeclaredInspection
13327
     */
13328 1
    private static function initEmojiData()
13329
    {
13330 1
        if (self::$EMOJI_KEYS_CACHE === null) {
13331 1
            if (self::$EMOJI === null) {
13332 1
                self::$EMOJI = self::getData('emoji');
13333
            }
13334
13335
            /**
13336
             * @psalm-suppress ImpureFunctionCall - static sort function is used
13337
             */
13338 1
            \uksort(
13339 1
                self::$EMOJI,
13340 1
                static function (string $a, string $b): int {
13341 1
                    return \strlen($b) <=> \strlen($a);
13342 1
                }
13343
            );
13344
13345 1
            self::$EMOJI_KEYS_CACHE = \array_keys(self::$EMOJI);
13346 1
            self::$EMOJI_VALUES_CACHE = self::$EMOJI;
13347
13348 1
            foreach (self::$EMOJI_KEYS_CACHE as $key) {
13349 1
                $tmp_key = \crc32($key);
13350 1
                self::$EMOJI_KEYS_REVERSIBLE_CACHE[] = '_-_PORTABLE_UTF8_-_' . $tmp_key . '_-_' . \strrev((string) $tmp_key) . '_-_8FTU_ELBATROP_-_';
13351
            }
13352
13353 1
            return true;
13354
        }
13355
13356
        return null;
13357
    }
13358
13359
    /**
13360
     * Checks whether mbstring "overloaded" is active on the server.
13361
     *
13362
     * @psalm-pure
13363
     *
13364
     * @return bool
13365
     */
13366
    private static function mbstring_overloaded(): bool
13367
    {
13368
        /**
13369
         * INI directive 'mbstring.func_overload' is deprecated since PHP 7.2
13370
         */
13371
13372
        /** @noinspection PhpComposerExtensionStubsInspection */
13373
        /** @noinspection PhpUsageOfSilenceOperatorInspection */
13374
        /** @noinspection DeprecatedIniOptionsInspection */
13375
        return \defined('MB_OVERLOAD_STRING')
13376
               &&
13377
               ((int) @\ini_get('mbstring.func_overload') & \MB_OVERLOAD_STRING);
13378
    }
13379
13380
    /**
13381
     * @param array    $strings
13382
     * @param bool     $remove_empty_values
13383
     * @param int|null $remove_short_values
13384
     *
13385
     * @psalm-pure
13386
     *
13387
     * @return array
13388
     *
13389
     * @noinspection ReturnTypeCanBeDeclaredInspection
13390
     */
13391 2
    private static function reduce_string_array(
13392
        array $strings,
13393
        bool $remove_empty_values,
13394
        int $remove_short_values = null
13395
    ) {
13396
        // init
13397 2
        $return = [];
13398
13399 2
        foreach ($strings as &$str) {
13400
            if (
13401 2
                $remove_short_values !== null
13402
                &&
13403 2
                \mb_strlen($str) <= $remove_short_values
13404
            ) {
13405 2
                continue;
13406
            }
13407
13408
            if (
13409 2
                $remove_empty_values
13410
                &&
13411 2
                \trim($str) === ''
13412
            ) {
13413 2
                continue;
13414
            }
13415
13416 2
            $return[] = $str;
13417
        }
13418
13419 2
        return $return;
13420
    }
13421
13422
    /**
13423
     * rxClass
13424
     *
13425
     * @param string $s
13426
     * @param string $class
13427
     *
13428
     * @return string
13429
     *                    *
13430
     * @psalm-pure
13431
     */
13432 36
    private static function rxClass(string $s, string $class = '')
13433
    {
13434
        /**
13435
         * @psalm-suppress ImpureStaticVariable
13436
         *
13437
         * @var array<string,string>
13438
         */
13439 36
        static $RX_CLASS_CACHE = [];
13440
13441 36
        $cache_key = $s . '_' . $class;
13442
13443 36
        if (isset($RX_CLASS_CACHE[$cache_key])) {
13444 24
            return $RX_CLASS_CACHE[$cache_key];
13445
        }
13446
13447 16
        $class_array[] = $class;
0 ignored issues
show
Comprehensibility Best Practice introduced by
$class_array was never initialized. Although not strictly required by PHP, it is generally a good practice to add $class_array = array(); before regardless.
Loading history...
13448
13449
        /** @noinspection SuspiciousLoopInspection */
13450
        /** @noinspection AlterInForeachInspection */
13451 16
        foreach (self::str_split($s) as &$s) {
0 ignored issues
show
Bug introduced by
The expression self::str_split($s) cannot be used as a reference.

Let?s assume that you have the following foreach statement:

foreach ($array as &$itemValue) { }

$itemValue is assigned by reference. This is possible because the expression (in the example $array) can be used as a reference target.

However, if we were to replace $array with something different like the result of a function call as in

foreach (getArray() as &$itemValue) { }

then assigning by reference is not possible anymore as there is no target that could be modified.

Available Fixes

1. Do not assign by reference
foreach (getArray() as $itemValue) { }
2. Assign to a local variable first
$array = getArray();
foreach ($array as &$itemValue) {}
3. Return a reference
function &getArray() { $array = array(); return $array; }

foreach (getArray() as &$itemValue) { }
Loading history...
13452 15
            if ($s === '-') {
13453
                $class_array[0] = '-' . $class_array[0];
13454 15
            } elseif (!isset($s[2])) {
13455 15
                $class_array[0] .= \preg_quote($s, '/');
13456 1
            } elseif (self::strlen($s) === 1) {
13457 1
                $class_array[0] .= $s;
13458
            } else {
13459
                $class_array[] = $s;
13460
            }
13461
        }
13462
13463 16
        if ($class_array[0]) {
13464 16
            $class_array[0] = '[' . $class_array[0] . ']';
13465
        }
13466
13467 16
        if (\count($class_array) === 1) {
13468 16
            $return = $class_array[0];
13469
        } else {
13470
            $return = '(?:' . \implode('|', $class_array) . ')';
13471
        }
13472
13473 16
        $RX_CLASS_CACHE[$cache_key] = $return;
13474
13475 16
        return $return;
13476
    }
13477
13478
    /**
13479
     * Personal names such as "Marcus Aurelius" are sometimes typed incorrectly using lowercase ("marcus aurelius").
13480
     *
13481
     * @param string $names
13482
     * @param string $delimiter
13483
     * @param string $encoding
13484
     *
13485
     * @psalm-pure
13486
     *
13487
     * @return string
13488
     *
13489
     * @noinspection ReturnTypeCanBeDeclaredInspection
13490
     */
13491 1
    private static function str_capitalize_name_helper(
13492
        string $names,
13493
        string $delimiter,
13494
        string $encoding = 'UTF-8'
13495
    ) {
13496
        // init
13497 1
        $name_helper_array = \explode($delimiter, $names);
13498 1
        if ($name_helper_array === false) {
13499
            return '';
13500
        }
13501
13502 1
        $special_cases = [
13503
            'names' => [
13504
                'ab',
13505
                'af',
13506
                'al',
13507
                'and',
13508
                'ap',
13509
                'bint',
13510
                'binte',
13511
                'da',
13512
                'de',
13513
                'del',
13514
                'den',
13515
                'der',
13516
                'di',
13517
                'dit',
13518
                'ibn',
13519
                'la',
13520
                'mac',
13521
                'nic',
13522
                'of',
13523
                'ter',
13524
                'the',
13525
                'und',
13526
                'van',
13527
                'von',
13528
                'y',
13529
                'zu',
13530
            ],
13531
            'prefixes' => [
13532
                'al-',
13533
                "d'",
13534
                'ff',
13535
                "l'",
13536
                'mac',
13537
                'mc',
13538
                'nic',
13539
            ],
13540
        ];
13541
13542 1
        foreach ($name_helper_array as &$name) {
13543 1
            if (\in_array($name, $special_cases['names'], true)) {
13544 1
                continue;
13545
            }
13546
13547 1
            $continue = false;
13548
13549 1
            if ($delimiter === '-') {
13550 1
                foreach ((array) $special_cases['names'] as &$beginning) {
13551 1
                    if (\strncmp($name, $beginning, \strlen($beginning)) === 0) {
13552 1
                        $continue = true;
13553
13554 1
                        break;
13555
                    }
13556
                }
13557 1
                unset($beginning);
13558
            }
13559
13560 1
            foreach ((array) $special_cases['prefixes'] as &$beginning) {
13561 1
                if (\strncmp($name, $beginning, \strlen($beginning)) === 0) {
13562 1
                    $continue = true;
13563
13564 1
                    break;
13565
                }
13566
            }
13567 1
            unset($beginning);
13568
13569 1
            if ($continue) {
13570 1
                continue;
13571
            }
13572
13573 1
            $name = self::ucfirst($name, $encoding);
13574
        }
13575
13576 1
        return \implode($delimiter, $name_helper_array);
13577
    }
13578
13579
    /**
13580
     * Generic case-sensitive transformation for collation matching.
13581
     *
13582
     * @param string $str <p>The input string</p>
13583
     *
13584
     * @psalm-pure
13585
     *
13586
     * @return string|null
13587
     *
13588
     * @noinspection ReturnTypeCanBeDeclaredInspection
13589
     */
13590 6
    private static function strtonatfold(string $str)
13591
    {
13592 6
        $str = \Normalizer::normalize($str, \Normalizer::NFD);
13593 6
        if ($str === false) {
13594 2
            return '';
13595
        }
13596
13597 6
        return \preg_replace(
13598 6
            '/\p{Mn}+/u',
13599 6
            '',
13600 6
            $str
13601
        );
13602
    }
13603
13604
    /**
13605
     * @param int|string $input
13606
     *
13607
     * @psalm-pure
13608
     *
13609
     * @return string
13610
     *
13611
     * @noinspection ReturnTypeCanBeDeclaredInspection
13612
     */
13613 30
    private static function to_utf8_convert_helper($input)
13614
    {
13615
        // init
13616 30
        $buf = '';
13617
13618 30
        if (self::$ORD === null) {
13619
            self::$ORD = self::getData('ord');
13620
        }
13621
13622 30
        if (self::$CHR === null) {
13623
            self::$CHR = self::getData('chr');
13624
        }
13625
13626 30
        if (self::$WIN1252_TO_UTF8 === null) {
13627 1
            self::$WIN1252_TO_UTF8 = self::getData('win1252_to_utf8');
13628
        }
13629
13630 30
        $ordC1 = self::$ORD[$input];
13631 30
        if (isset(self::$WIN1252_TO_UTF8[$ordC1])) { // found in Windows-1252 special cases
13632 30
            $buf .= self::$WIN1252_TO_UTF8[$ordC1];
13633
        } else {
13634
            /** @noinspection OffsetOperationsInspection */
13635 1
            $cc1 = self::$CHR[$ordC1 / 64] | "\xC0";
13636 1
            $cc2 = ((string) $input & "\x3F") | "\x80";
0 ignored issues
show
Bug introduced by
Are you sure you want to use the bitwise | or did you mean ||?
Loading history...
Bug introduced by
Are you sure you want to use the bitwise & or did you mean &&?
Loading history...
13637 1
            $buf .= $cc1 . $cc2;
13638
        }
13639
13640 30
        return $buf;
13641
    }
13642
13643
    /**
13644
     * @param string $str
13645
     *
13646
     * @psalm-pure
13647
     *
13648
     * @return string
13649
     *
13650
     * @noinspection ReturnTypeCanBeDeclaredInspection
13651
     */
13652 9
    private static function urldecode_unicode_helper(string $str)
13653
    {
13654 9
        if (\strpos($str, '%u') === false) {
13655 9
            return $str;
13656
        }
13657
13658 7
        $pattern = '/%u([0-9a-fA-F]{3,4})/';
13659 7
        if (\preg_match($pattern, $str)) {
13660 7
            $str = (string) \preg_replace($pattern, '&#x\\1;', $str);
13661
        }
13662
13663 7
        return $str;
13664
    }
13665
}
13666