Passed
Push — master ( bd8bdc...dbf068 )
by Lars
18:42 queued 15:49
created

UTF8   F

Complexity

Total Complexity 1756

Size/Duplication

Total Lines 13747
Duplicated Lines 0 %

Test Coverage

Coverage 80.73%

Importance

Changes 109
Bugs 53 Features 5
Metric Value
eloc 4243
dl 0
loc 13747
ccs 3158
cts 3912
cp 0.8073
rs 0.8
c 109
b 53
f 5
wmc 1756

274 Methods

Rating   Name   Duplication   Size   Complexity  
A str_ensure_right() 0 13 4
A str_humanize() 0 15 1
A str_ends_with_any() 0 13 4
A str_ensure_left() 0 11 3
A is_serialized() 0 11 3
A spaces_to_tabs() 0 11 3
A single_chr_html_encode() 0 18 4
C substr_count_in_byte() 0 55 15
A substr_count_simple() 0 31 6
A encode_mimeheader() 0 26 5
F extract_text() 0 175 34
B chr_to_decimal() 0 38 8
A add_bom_to_string() 0 7 2
A ctype_loaded() 0 3 1
D chr() 0 107 19
A chunk_split() 0 3 1
A css_identifier() 0 55 6
A css_stripe_media_queries() 0 6 1
A clean() 0 47 6
A __construct() 0 2 1
A codepoints() 0 36 5
A chr_map() 0 5 1
A cleanup() 0 24 2
A char_at() 0 7 2
A chars() 0 4 1
A checkForSupport() 0 46 4
A collapse_whitespace() 0 7 2
A access() 0 11 4
A callback() 0 3 1
A binary_to_str() 0 12 3
A bom() 0 3 1
A array_change_key_case() 0 23 5
A count_chars() 0 11 1
B between() 0 48 8
A emoji_decode() 0 21 3
A decode_mimeheader() 0 8 3
A emoji_encode() 0 21 3
A decimal_to_chr() 0 5 1
F encode() 0 144 37
A chr_to_hex() 0 11 3
A emoji_from_country_code() 0 17 3
A chr_size_list() 0 17 3
A get_unique_string() 0 21 3
A to_filename() 0 9 1
A str_iends_with() 0 11 3
A replace_all() 0 11 2
A ws() 0 3 1
A normalize_msword() 0 3 1
A replace() 0 11 2
B html_encode() 0 54 11
A file_has_bom() 0 8 2
A filter_input() 0 16 3
A is_bom() 0 10 3
A is_hexadecimal() 0 7 2
A has_uppercase() 0 7 2
A is_utf8() 0 13 4
A html_escape() 0 6 1
B get_file_type() 0 60 7
D is_utf16() 0 76 18
C filter() 0 59 14
A is_html() 0 14 2
A is_alpha() 0 7 2
B get_random_string() 0 54 10
A fix_utf8() 0 30 4
A first_char() 0 14 4
A is_uppercase() 0 7 2
A is_ascii() 0 3 1
A is_blank() 0 7 2
D getCharDirection() 0 104 117
A htmlspecialchars() 0 15 3
A filter_var_array() 0 15 2
A has_whitespace() 0 7 2
B is_binary() 0 39 10
A intlChar_loaded() 0 3 1
A lcfirst() 0 44 5
B is_url() 0 40 7
A finfo_loaded() 0 3 1
A fits_inside() 0 3 1
A is_binary_file() 0 16 4
A intl_loaded() 0 3 1
A html_stripe_empty_tags() 0 6 1
A json_loaded() 0 3 1
A is_lowercase() 0 7 2
A iconv_loaded() 0 3 1
A lcwords() 0 34 6
A filter_var() 0 15 2
A is_empty() 0 3 1
D is_utf32() 0 76 18
A is_alphanumeric() 0 7 2
A json_decode() 0 17 3
A fix_simple_utf8() 0 32 5
B is_json() 0 26 8
A is_printable() 0 3 1
A int_to_hex() 0 7 2
A has_lowercase() 0 7 2
A json_encode() 0 13 3
A is_base64() 0 17 5
A hex_to_int() 0 14 3
A hex_to_chr() 0 4 1
A htmlentities() 0 28 3
A filter_input_array() 0 15 3
A getSupportInfo() 0 13 3
A is_punctuation() 0 3 1
C html_entity_decode() 0 58 13
C file_get_contents() 0 60 12
A str_substr_after_first_separator() 0 28 6
A max() 0 14 3
B str_camelize() 0 74 10
A parse_str() 0 18 4
A str_contains() 0 15 3
B str_to_lines() 0 28 8
A substr_in_byte() 0 18 6
A stripos_in_byte() 0 12 4
A strnatcasecmp() 0 5 1
A substr_left() 0 15 4
D strlen() 0 104 19
A str_isubstr_last() 0 25 4
A to_int() 0 7 2
A str_replace_beginning() 0 25 6
A remove_left() 0 28 4
C stripos() 0 67 14
A str_offset_exists() 0 10 2
D strrchr() 0 104 20
A max_chr_width() 0 8 2
C utf8_decode() 0 59 13
A ltrim() 0 26 5
A levenshtein() 0 7 1
A remove_html() 0 3 1
B str_longest_common_suffix() 0 54 10
C wordwrap() 0 70 14
B ucfirst() 0 57 7
A str_pad_both() 0 12 1
A str_substr_last() 0 33 6
A mbstring_loaded() 0 3 1
A str_limit() 0 26 6
A string() 0 18 4
B str_obfuscate() 0 47 8
D normalize_encoding() 0 147 16
B rxClass() 0 44 8
B str_titleize_for_humans() 0 170 7
A normalize_whitespace() 0 11 1
A str_starts_with() 0 16 4
A strlen_in_byte() 0 12 3
A str_ireplace_ending() 0 21 6
A rtrim() 0 26 5
C str_longest_common_substring() 0 76 16
A regex_replace() 0 20 3
A titlecase() 0 35 5
A getData() 0 6 1
B strtolower() 0 58 10
A urldecode() 0 35 4
A str_isubstr_before_first_separator() 0 19 5
B strrev() 0 43 10
D substr_replace() 0 123 27
A strstr_in_byte() 0 15 4
A str_matches_pattern() 0 3 1
C str_titleize() 0 69 12
A str_split_array() 0 17 2
A str_replace_first() 0 20 2
A str_pad_right() 0 12 1
B ucwords() 0 51 9
A to_boolean() 0 35 5
C stristr() 0 79 17
A strncasecmp() 0 10 1
B strwidth() 0 43 8
A trim() 0 26 5
A str_upper_camelize() 0 8 1
A substr_compare() 0 33 6
A convertMbAscii() 0 19 4
D substr_count() 0 73 17
A strnatcmp() 0 9 2
D str_pad() 0 146 16
A urldecode_unicode_helper() 0 12 3
A str_ireplace() 0 31 5
A str_replace_ending() 0 24 6
A string_has_bom() 0 9 3
B strtr() 0 41 11
B str_contains_all() 0 22 9
A normalize_line_ending() 0 3 1
A str_isubstr_after_last_separator() 0 26 5
D range() 0 71 23
B strspn() 0 30 10
A strcasecmp() 0 21 1
A rawurldecode() 0 35 4
B str_capitalize_name_helper() 0 86 10
A utf8_encode() 0 14 3
C str_detect_encoding() 0 111 14
A str_istarts_with() 0 11 3
A str_replace() 0 18 1
A substr_iright() 0 15 4
A to_iso8859() 0 16 4
A words_limit() 0 20 5
A strip_tags() 0 18 4
A pcre_utf8_support() 0 4 1
A str_isubstr_before_last_separator() 0 24 6
D str_truncate_safe() 0 86 18
A substr_right() 0 31 6
D str_split() 0 134 29
A strrpos_in_byte() 0 12 4
F strrpos() 0 136 31
A remove_right() 0 25 4
A remove_html_breaks() 0 3 1
A showSupport() 0 16 3
A remove_invisible_characters() 0 11 1
A str_replace_last() 0 19 2
A str_substr_before_last_separator() 0 31 6
B strtocasefold() 0 33 7
A tabs_to_spaces() 0 11 3
B str_truncate() 0 43 7
F strripos() 0 113 25
A strpos_in_byte() 0 12 4
A str_ends_with() 0 16 4
A to_ascii() 0 6 1
A reduce_string_array() 0 29 6
B str_longest_common_prefix() 0 51 8
A mbstring_overloaded() 0 12 2
A str_pad_left() 0 12 1
A str_substr_first() 0 33 6
A remove_bom() 0 22 5
A str_repeat() 0 5 1
A strpbrk() 0 11 4
D to_utf8_string() 0 110 33
A whitespace_table() 0 3 1
A str_iends_with_any() 0 13 4
A str_substr_after_last_separator() 0 31 6
A str_isubstr_after_first_separator() 0 26 5
B str_snakeize() 0 57 6
A str_sort() 0 16 3
A to_utf8() 0 15 3
A str_underscored() 0 3 1
A str_offset_get() 0 14 4
A strip_whitespace() 0 7 2
A str_capitalize_name() 0 8 1
B str_limit_after_word() 0 51 11
A swapCase() 0 17 4
A substr_ileft() 0 15 4
A str_dasherize() 0 3 1
C ord() 0 68 16
B to_string() 0 27 8
A strtonatfold() 0 11 2
C strcspn() 0 48 12
A fixStrCaseHelper() 0 41 5
C str_split_pattern() 0 54 13
D strstr() 0 107 21
A str_isubstr_first() 0 25 4
A str_last_char() 0 16 4
A str_ireplace_beginning() 0 22 6
A str_substr_before_first_separator() 0 32 6
F substr() 0 137 31
A wordwrap_per_line() 0 28 5
A str_surround() 0 3 1
A strncmp() 0 19 4
A str_insert() 0 28 4
A replace_diamond_question_mark() 0 43 5
D is_utf8_string() 0 133 28
B str_delimit() 0 31 8
A to_utf8_convert_helper() 0 28 5
B strtoupper() 0 58 10
A min() 0 14 3
A str_starts_with_any() 0 17 5
B strrichr() 0 54 11
A str_istarts_with_any() 0 17 5
B str_contains_any() 0 28 8
A initEmojiData() 0 29 4
A remove_duplicates() 0 16 4
B str_slice() 0 33 10
F strpos() 0 151 33
A str_shuffle() 0 33 6
A strcmp() 0 11 2
A str_word_count() 0 23 5
A strripos_in_byte() 0 12 4
A str_to_binary() 0 10 2
A symfony_polyfill_used() 0 16 5
B str_to_words() 0 36 8

How to fix   Complexity   

Complex Class

Complex classes like UTF8 often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.

Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.

While breaking up the class, it is a good idea to analyze how other classes use UTF8, and based on these observations, apply Extract Interface, too.

1
<?php
2
3
declare(strict_types=1);
4
5
namespace voku\helper;
6
7
/**
8
 * @psalm-immutable
9
 */
10
final class UTF8
11
{
12
    /**
13
     * Bom => Byte-Length
14
     *
15
     * INFO: https://en.wikipedia.org/wiki/Byte_order_mark
16
     *
17
     * @var array<string, int>
18
     */
19
    private static $BOM = [
20
        "\xef\xbb\xbf"     => 3, // UTF-8 BOM
21
        ''              => 6, // UTF-8 BOM as "WINDOWS-1252" (one char has [maybe] more then one byte ...)
22
        "\x00\x00\xfe\xff" => 4, // UTF-32 (BE) BOM
23
        '  þÿ'             => 6, // UTF-32 (BE) BOM as "WINDOWS-1252"
24
        "\xff\xfe\x00\x00" => 4, // UTF-32 (LE) BOM
25
        'ÿþ  '             => 6, // UTF-32 (LE) BOM as "WINDOWS-1252"
26
        "\xfe\xff"         => 2, // UTF-16 (BE) BOM
27
        'þÿ'               => 4, // UTF-16 (BE) BOM as "WINDOWS-1252"
28
        "\xff\xfe"         => 2, // UTF-16 (LE) BOM
29
        'ÿþ'               => 4, // UTF-16 (LE) BOM as "WINDOWS-1252"
30
    ];
31
32
    /**
33
     * Numeric code point => UTF-8 Character
34
     *
35
     * url: http://www.w3schools.com/charsets/ref_utf_punctuation.asp
36
     *
37
     * @var array<int, string>
38
     */
39
    private static $WHITESPACE = [
40
        // NULL Byte
41
        0 => "\x0",
42
        // Tab
43
        9 => "\x9",
44
        // New Line
45
        10 => "\xa",
46
        // Vertical Tab
47
        11 => "\xb",
48
        // Carriage Return
49
        13 => "\xd",
50
        // Ordinary Space
51
        32 => "\x20",
52
        // NO-BREAK SPACE
53
        160 => "\xc2\xa0",
54
        // OGHAM SPACE MARK
55
        5760 => "\xe1\x9a\x80",
56
        // MONGOLIAN VOWEL SEPARATOR
57
        6158 => "\xe1\xa0\x8e",
58
        // EN QUAD
59
        8192 => "\xe2\x80\x80",
60
        // EM QUAD
61
        8193 => "\xe2\x80\x81",
62
        // EN SPACE
63
        8194 => "\xe2\x80\x82",
64
        // EM SPACE
65
        8195 => "\xe2\x80\x83",
66
        // THREE-PER-EM SPACE
67
        8196 => "\xe2\x80\x84",
68
        // FOUR-PER-EM SPACE
69
        8197 => "\xe2\x80\x85",
70
        // SIX-PER-EM SPACE
71
        8198 => "\xe2\x80\x86",
72
        // FIGURE SPACE
73
        8199 => "\xe2\x80\x87",
74
        // PUNCTUATION SPACE
75
        8200 => "\xe2\x80\x88",
76
        // THIN SPACE
77
        8201 => "\xe2\x80\x89",
78
        // HAIR SPACE
79
        8202 => "\xe2\x80\x8a",
80
        // LINE SEPARATOR
81
        8232 => "\xe2\x80\xa8",
82
        // PARAGRAPH SEPARATOR
83
        8233 => "\xe2\x80\xa9",
84
        // NARROW NO-BREAK SPACE
85
        8239 => "\xe2\x80\xaf",
86
        // MEDIUM MATHEMATICAL SPACE
87
        8287 => "\xe2\x81\x9f",
88
        // HALFWIDTH HANGUL FILLER
89
        65440 => "\xef\xbe\xa0",
90
        // IDEOGRAPHIC SPACE
91
        12288 => "\xe3\x80\x80",
92
    ];
93
94
    /**
95
     * @var array<string, string>
96
     */
97
    private static $WHITESPACE_TABLE = [
98
        'SPACE'                     => "\x20",
99
        'NO-BREAK SPACE'            => "\xc2\xa0",
100
        'OGHAM SPACE MARK'          => "\xe1\x9a\x80",
101
        'EN QUAD'                   => "\xe2\x80\x80",
102
        'EM QUAD'                   => "\xe2\x80\x81",
103
        'EN SPACE'                  => "\xe2\x80\x82",
104
        'EM SPACE'                  => "\xe2\x80\x83",
105
        'THREE-PER-EM SPACE'        => "\xe2\x80\x84",
106
        'FOUR-PER-EM SPACE'         => "\xe2\x80\x85",
107
        'SIX-PER-EM SPACE'          => "\xe2\x80\x86",
108
        'FIGURE SPACE'              => "\xe2\x80\x87",
109
        'PUNCTUATION SPACE'         => "\xe2\x80\x88",
110
        'THIN SPACE'                => "\xe2\x80\x89",
111
        'HAIR SPACE'                => "\xe2\x80\x8a",
112
        'LINE SEPARATOR'            => "\xe2\x80\xa8",
113
        'PARAGRAPH SEPARATOR'       => "\xe2\x80\xa9",
114
        'ZERO WIDTH SPACE'          => "\xe2\x80\x8b",
115
        'NARROW NO-BREAK SPACE'     => "\xe2\x80\xaf",
116
        'MEDIUM MATHEMATICAL SPACE' => "\xe2\x81\x9f",
117
        'IDEOGRAPHIC SPACE'         => "\xe3\x80\x80",
118
        'HALFWIDTH HANGUL FILLER'   => "\xef\xbe\xa0",
119
    ];
120
121
    /**
122
     * @var array
123
     *
124
     * @phpstan-var array{upper: string[], lower: string[]}
125
     */
126
    private static $COMMON_CASE_FOLD = [
127
        'upper' => [
128
            'µ',
129
            'ſ',
130
            "\xCD\x85",
131
            'ς',
132
            'ẞ',
133
            "\xCF\x90",
134
            "\xCF\x91",
135
            "\xCF\x95",
136
            "\xCF\x96",
137
            "\xCF\xB0",
138
            "\xCF\xB1",
139
            "\xCF\xB5",
140
            "\xE1\xBA\x9B",
141
            "\xE1\xBE\xBE",
142
        ],
143
        'lower' => [
144
            'μ',
145
            's',
146
            'ι',
147
            'σ',
148
            'ß',
149
            'β',
150
            'θ',
151
            'φ',
152
            'π',
153
            'κ',
154
            'ρ',
155
            'ε',
156
            "\xE1\xB9\xA1",
157
            'ι',
158
        ],
159
    ];
160
161
    /**
162
     * @var array
163
     *
164
     * @phpstan-var array<string, mixed>
165
     */
166
    private static $SUPPORT = [];
167
168
    /**
169
     * @var string[]|null
170
     *
171
     * @phpstan-var array<string, string>|null
172
     */
173
    private static $BROKEN_UTF8_FIX;
174
175
    /**
176
     * @var string[]|null
177
     *
178
     * @phpstan-var array<int, string>|null
179
     */
180
    private static $WIN1252_TO_UTF8;
181
182
    /**
183
     * @var string[]|null
184
     *
185
     * @phpstan-var array<int ,string>|null
186
     */
187
    private static $INTL_TRANSLITERATOR_LIST;
188
189
    /**
190
     * @var string[]|null
191
     *
192
     * @phpstan-var array<string>|null
193
     */
194
    private static $ENCODINGS;
195
196
    /**
197
     * @var int[]|null
198
     *
199
     * @phpstan-var array<string ,int>|null
200
     */
201
    private static $ORD;
202
203
    /**
204
     * @var string[]|null
205
     *
206
     * @phpstan-var array<string, string>|null
207
     */
208
    private static $EMOJI;
209
210
    /**
211
     * @var string[]|null
212
     *
213
     * @phpstan-var array<string>|null
214
     */
215
    private static $EMOJI_VALUES_CACHE;
216
217
    /**
218
     * @var string[]|null
219
     *
220
     * @phpstan-var array<string>|null
221
     */
222
    private static $EMOJI_KEYS_CACHE;
223
224
    /**
225
     * @var string[]|null
226
     *
227
     * @phpstan-var array<string>|null
228
     */
229
    private static $EMOJI_KEYS_REVERSIBLE_CACHE;
230
231
    /**
232
     * @var string[]|null
233
     *
234
     * @phpstan-var array<int, string>|null
235
     */
236
    private static $CHR;
237
238
    /**
239
     * __construct()
240
     */
241 34
    public function __construct()
242
    {
243 34
    }
244
245
    /**
246
     * Return the character at the specified position: $str[1] like functionality.
247
     *
248
     * EXAMPLE: <code>UTF8::access('fòô', 1); // 'ò'</code>
249
     *
250
     * @param string $str      <p>A UTF-8 string.</p>
251
     * @param int    $pos      <p>The position of character to return.</p>
252
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
253
     *
254
     * @psalm-pure
255
     *
256
     * @return string
257
     *                <p>Single multi-byte character.</p>
258
     */
259 3
    public static function access(string $str, int $pos, string $encoding = 'UTF-8'): string
260
    {
261 3
        if ($str === '' || $pos < 0) {
262 2
            return '';
263
        }
264
265 3
        if ($encoding === 'UTF-8') {
266 3
            return (string) \mb_substr($str, $pos, 1);
267
        }
268
269
        return (string) self::substr($str, $pos, 1, $encoding);
270
    }
271
272
    /**
273
     * Prepends UTF-8 BOM character to the string and returns the whole string.
274
     *
275
     * INFO: If BOM already existed there, the Input string is returned.
276
     *
277
     * EXAMPLE: <code>UTF8::add_bom_to_string('fòô'); // "\xEF\xBB\xBF" . 'fòô'</code>
278
     *
279
     * @param string $str <p>The input string.</p>
280
     *
281
     * @psalm-pure
282
     *
283
     * @return string
284
     *                <p>The output string that contains BOM.</p>
285
     */
286 2
    public static function add_bom_to_string(string $str): string
287
    {
288 2
        if (!self::string_has_bom($str)) {
289 2
            $str = self::bom() . $str;
290
        }
291
292 2
        return $str;
293
    }
294
295
    /**
296
     * Changes all keys in an array.
297
     *
298
     * @param array<string, mixed> $array    <p>The array to work on</p>
299
     * @param int                  $case     [optional] <p> Either <strong>CASE_UPPER</strong><br>
300
     *                                       or <strong>CASE_LOWER</strong> (default)</p>
301
     * @param string               $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
302
     *
303
     * @psalm-pure
304
     *
305
     * @return string[]
306
     *                  <p>An array with its keys lower- or uppercased.</p>
307
     */
308 2
    public static function array_change_key_case(
309
        array $array,
310
        int $case = \CASE_LOWER,
311
        string $encoding = 'UTF-8'
312
    ): array {
313
        if (
314 2
            $case !== \CASE_LOWER
315
            &&
316 2
            $case !== \CASE_UPPER
317
        ) {
318
            $case = \CASE_LOWER;
319
        }
320
321 2
        $return = [];
322 2
        foreach ($array as $key => &$value) {
323 2
            $key = $case === \CASE_LOWER
324 2
                ? self::strtolower($key, $encoding)
325 2
                : self::strtoupper($key, $encoding);
326
327 2
            $return[$key] = $value;
328
        }
329
330 2
        return $return;
331
    }
332
333
    /**
334
     * Returns the substring between $start and $end, if found, or an empty
335
     * string. An optional offset may be supplied from which to begin the
336
     * search for the start string.
337
     *
338
     * @param string $str
339
     * @param string $start    <p>Delimiter marking the start of the substring.</p>
340
     * @param string $end      <p>Delimiter marking the end of the substring.</p>
341
     * @param int    $offset   [optional] <p>Index from which to begin the search. Default: 0</p>
342
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
343
     *
344
     * @psalm-pure
345
     *
346
     * @return string
347
     */
348 16
    public static function between(
349
        string $str,
350
        string $start,
351
        string $end,
352
        int $offset = 0,
353
        string $encoding = 'UTF-8'
354
    ): string {
355 16
        if ($encoding === 'UTF-8') {
356 8
            $start_position = \mb_strpos($str, $start, $offset);
357 8
            if ($start_position === false) {
358 1
                return '';
359
            }
360
361 7
            $substr_index = $start_position + (int) \mb_strlen($start);
362 7
            $end_position = \mb_strpos($str, $end, $substr_index);
363
            if (
364 7
                $end_position === false
365
                ||
366 7
                $end_position === $substr_index
367
            ) {
368 2
                return '';
369
            }
370
371 5
            return (string) \mb_substr($str, $substr_index, $end_position - $substr_index);
372
        }
373
374 8
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
375
376 8
        $start_position = self::strpos($str, $start, $offset, $encoding);
377 8
        if ($start_position === false) {
378 1
            return '';
379
        }
380
381 7
        $substr_index = $start_position + (int) self::strlen($start, $encoding);
382 7
        $end_position = self::strpos($str, $end, $substr_index, $encoding);
383
        if (
384 7
            $end_position === false
385
            ||
386 7
            $end_position === $substr_index
387
        ) {
388 2
            return '';
389
        }
390
391 5
        return (string) self::substr(
392 5
            $str,
393 5
            $substr_index,
394 5
            $end_position - $substr_index,
395 5
            $encoding
396
        );
397
    }
398
399
    /**
400
     * Convert binary into a string.
401
     *
402
     * INFO: opposite to UTF8::str_to_binary()
403
     *
404
     * EXAMPLE: <code>UTF8::binary_to_str('11110000100111111001100010000011'); // '😃'</code>
405
     *
406
     * @param string $bin 1|0
407
     *
408
     * @psalm-pure
409
     *
410
     * @return string
411
     */
412 2
    public static function binary_to_str($bin): string
413
    {
414 2
        if (!isset($bin[0])) {
415
            return '';
416
        }
417
418 2
        $convert = \base_convert($bin, 2, 16);
419 2
        if ($convert === '0') {
420 1
            return '';
421
        }
422
423 2
        return \pack('H*', $convert);
424
    }
425
426
    /**
427
     * Returns the UTF-8 Byte Order Mark Character.
428
     *
429
     * INFO: take a look at UTF8::$bom for e.g. UTF-16 and UTF-32 BOM values
430
     *
431
     * EXAMPLE: <code>UTF8::bom(); // "\xEF\xBB\xBF"</code>
432
     *
433
     * @psalm-pure
434
     *
435
     * @return string
436
     *                <p>UTF-8 Byte Order Mark.</p>
437
     */
438 4
    public static function bom(): string
439
    {
440 4
        return "\xef\xbb\xbf";
441
    }
442
443
    /**
444
     * @alias of UTF8::chr_map()
445
     *
446
     * @param callable $callback
447
     * @param string   $str
448
     *
449
     * @psalm-pure
450
     *
451
     * @return string[]
452
     *
453
     * @see   UTF8::chr_map()
454
     */
455 2
    public static function callback($callback, string $str): array
456
    {
457 2
        return self::chr_map($callback, $str);
458
    }
459
460
    /**
461
     * Returns the character at $index, with indexes starting at 0.
462
     *
463
     * @param string $str      <p>The input string.</p>
464
     * @param int    $index    <p>Position of the character.</p>
465
     * @param string $encoding [optional] <p>Default is UTF-8</p>
466
     *
467
     * @psalm-pure
468
     *
469
     * @return string
470
     *                <p>The character at $index.</p>
471
     */
472 9
    public static function char_at(string $str, int $index, string $encoding = 'UTF-8'): string
473
    {
474 9
        if ($encoding === 'UTF-8') {
475 5
            return (string) \mb_substr($str, $index, 1);
476
        }
477
478 4
        return (string) self::substr($str, $index, 1, $encoding);
479
    }
480
481
    /**
482
     * Returns an array consisting of the characters in the string.
483
     *
484
     * @param string $str <p>The input string.</p>
485
     *
486
     * @psalm-pure
487
     *
488
     * @return string[]
489
     *                  <p>An array of chars.</p>
490
     */
491 4
    public static function chars(string $str): array
492
    {
493
        /** @var string[] */
494 4
        return self::str_split($str);
0 ignored issues
show
Bug Best Practice introduced by
The expression return self::str_split($str) returns an array which contains values of type string[] which are incompatible with the documented value type string.
Loading history...
495
    }
496
497
    /**
498
     * This method will auto-detect your server environment for UTF-8 support.
499
     *
500
     * @return true|null
501
     *
502
     * @internal <p>You don't need to run it manually, it will be triggered if it's needed.</p>
503
     */
504 4
    public static function checkForSupport()
505
    {
506 4
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
507
            self::$SUPPORT['already_checked_via_portable_utf8'] = true;
508
509
            // http://php.net/manual/en/book.mbstring.php
510
            self::$SUPPORT['mbstring'] = self::mbstring_loaded();
511
512
            self::$SUPPORT['mbstring_func_overload'] = self::mbstring_overloaded();
513
            if (self::$SUPPORT['mbstring'] === true) {
514
                \mb_internal_encoding('UTF-8');
515
                \mb_regex_encoding('UTF-8');
516
                self::$SUPPORT['mbstring_internal_encoding'] = 'UTF-8';
517
            }
518
519
            // http://php.net/manual/en/book.iconv.php
520
            self::$SUPPORT['iconv'] = self::iconv_loaded();
521
522
            // http://php.net/manual/en/book.intl.php
523
            self::$SUPPORT['intl'] = self::intl_loaded();
524
525
            // http://php.net/manual/en/class.intlchar.php
526
            self::$SUPPORT['intlChar'] = self::intlChar_loaded();
527
528
            // http://php.net/manual/en/book.ctype.php
529
            self::$SUPPORT['ctype'] = self::ctype_loaded();
530
531
            // http://php.net/manual/en/class.finfo.php
532
            self::$SUPPORT['finfo'] = self::finfo_loaded();
533
534
            // http://php.net/manual/en/book.json.php
535
            self::$SUPPORT['json'] = self::json_loaded();
536
537
            // http://php.net/manual/en/book.pcre.php
538
            self::$SUPPORT['pcre_utf8'] = self::pcre_utf8_support();
539
540
            self::$SUPPORT['symfony_polyfill_used'] = self::symfony_polyfill_used();
541
            if (self::$SUPPORT['symfony_polyfill_used'] === true) {
542
                \mb_internal_encoding('UTF-8');
543
                self::$SUPPORT['mbstring_internal_encoding'] = 'UTF-8';
544
            }
545
546
            return true;
547
        }
548
549 4
        return null;
550
    }
551
552
    /**
553
     * Generates a UTF-8 encoded character from the given code point.
554
     *
555
     * INFO: opposite to UTF8::ord()
556
     *
557
     * EXAMPLE: <code>UTF8::chr(0x2603); // '☃'</code>
558
     *
559
     * @param int    $code_point <p>The code point for which to generate a character.</p>
560
     * @param string $encoding   [optional] <p>Default is UTF-8</p>
561
     *
562
     * @psalm-pure
563
     *
564
     * @return string|null
565
     *                     <p>Multi-byte character, returns null on failure or empty input.</p>
566
     */
567 21
    public static function chr($code_point, string $encoding = 'UTF-8')
568
    {
569
        // init
570
        /**
571
         * @psalm-suppress ImpureStaticVariable
572
         *
573
         * @var array<string,string>
574
         */
575 21
        static $CHAR_CACHE = [];
576
577 21
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
578 5
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
579
        }
580
581
        if (
582 21
            $encoding !== 'UTF-8'
583
            &&
584 21
            $encoding !== 'ISO-8859-1'
585
            &&
586 21
            $encoding !== 'WINDOWS-1252'
587
            &&
588 21
            self::$SUPPORT['mbstring'] === false
589
        ) {
590
            /**
591
             * @psalm-suppress ImpureFunctionCall - is is only a warning
592
             */
593
            \trigger_error('UTF8::chr() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
594
        }
595
596 21
        if (!\is_int($code_point) || $code_point <= 0) {
0 ignored issues
show
introduced by
The condition is_int($code_point) is always true.
Loading history...
597 5
            return null;
598
        }
599
600 21
        $cache_key = $code_point . '_' . $encoding;
601 21
        if (isset($CHAR_CACHE[$cache_key])) {
602 19
            return $CHAR_CACHE[$cache_key];
603
        }
604
605 10
        if ($code_point <= 0x80) { // only for "simple"-chars
606
607 9
            if (self::$CHR === null) {
608 1
                self::$CHR = self::getData('chr');
609
            }
610
611
            /**
612
             * @psalm-suppress PossiblyNullArrayAccess
613
             */
614 9
            $chr = self::$CHR[$code_point];
615
616 9
            if ($encoding !== 'UTF-8') {
617 1
                $chr = self::encode($encoding, $chr);
618
            }
619
620 9
            return $CHAR_CACHE[$cache_key] = $chr;
621
        }
622
623
        //
624
        // fallback via "IntlChar"
625
        //
626
627 6
        if (self::$SUPPORT['intlChar'] === true) {
628 6
            $chr = \IntlChar::chr($code_point);
629
630 6
            if ($encoding !== 'UTF-8') {
631
                $chr = self::encode($encoding, $chr);
632
            }
633
634 6
            return $CHAR_CACHE[$cache_key] = $chr;
635
        }
636
637
        //
638
        // fallback via vanilla php
639
        //
640
641
        if (self::$CHR === null) {
642
            self::$CHR = self::getData('chr');
643
        }
644
645
        $code_point = (int) $code_point;
646
        if ($code_point <= 0x7FF) {
647
            /**
648
             * @psalm-suppress PossiblyNullArrayAccess
649
             */
650
            $chr = self::$CHR[($code_point >> 6) + 0xC0] .
651
                   self::$CHR[($code_point & 0x3F) + 0x80];
652
        } elseif ($code_point <= 0xFFFF) {
653
            /**
654
             * @psalm-suppress PossiblyNullArrayAccess
655
             */
656
            $chr = self::$CHR[($code_point >> 12) + 0xE0] .
657
                   self::$CHR[(($code_point >> 6) & 0x3F) + 0x80] .
658
                   self::$CHR[($code_point & 0x3F) + 0x80];
659
        } else {
660
            /**
661
             * @psalm-suppress PossiblyNullArrayAccess
662
             */
663
            $chr = self::$CHR[($code_point >> 18) + 0xF0] .
664
                   self::$CHR[(($code_point >> 12) & 0x3F) + 0x80] .
665
                   self::$CHR[(($code_point >> 6) & 0x3F) + 0x80] .
666
                   self::$CHR[($code_point & 0x3F) + 0x80];
667
        }
668
669
        if ($encoding !== 'UTF-8') {
670
            $chr = self::encode($encoding, $chr);
671
        }
672
673
        return $CHAR_CACHE[$cache_key] = $chr;
674
    }
675
676
    /**
677
     * Applies callback to all characters of a string.
678
     *
679
     * EXAMPLE: <code>UTF8::chr_map([UTF8::class, 'strtolower'], 'Κόσμε'); // ['κ','ό', 'σ', 'μ', 'ε']</code>
680
     *
681
     * @param callable $callback <p>The callback function.</p>
682
     * @param string   $str      <p>UTF-8 string to run callback on.</p>
683
     *
684
     * @psalm-pure
685
     *
686
     * @return string[]
687
     *                  <p>The outcome of the callback, as array.</p>
688
     */
689 2
    public static function chr_map($callback, string $str): array
690
    {
691 2
        return \array_map(
692 2
            $callback,
693 2
            self::str_split($str)
694
        );
695
    }
696
697
    /**
698
     * Generates an array of byte length of each character of a Unicode string.
699
     *
700
     * 1 byte => U+0000  - U+007F
701
     * 2 byte => U+0080  - U+07FF
702
     * 3 byte => U+0800  - U+FFFF
703
     * 4 byte => U+10000 - U+10FFFF
704
     *
705
     * EXAMPLE: <code>UTF8::chr_size_list('中文空白-test'); // [3, 3, 3, 3, 1, 1, 1, 1, 1]</code>
706
     *
707
     * @param string $str <p>The original unicode string.</p>
708
     *
709
     * @psalm-pure
710
     *
711
     * @return int[]
712
     *               <p>An array of byte lengths of each character.</p>
713
     */
714 4
    public static function chr_size_list(string $str): array
715
    {
716 4
        if ($str === '') {
717 4
            return [];
718
        }
719
720 4
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
721
            return \array_map(
722
                static function (string $data): int {
723
                    // "mb_" is available if overload is used, so use it ...
724
                    return \mb_strlen($data, 'CP850'); // 8-BIT
725
                },
726
                self::str_split($str)
727
            );
728
        }
729
730 4
        return \array_map('\strlen', self::str_split($str));
731
    }
732
733
    /**
734
     * Get a decimal code representation of a specific character.
735
     *
736
     * INFO: opposite to UTF8::decimal_to_chr()
737
     *
738
     * EXAMPLE: <code>UTF8::chr_to_decimal('§'); // 0xa7</code>
739
     *
740
     * @param string $char <p>The input character.</p>
741
     *
742
     * @psalm-pure
743
     *
744
     * @return int
745
     */
746 5
    public static function chr_to_decimal(string $char): int
747
    {
748 5
        if (self::$SUPPORT['iconv'] === true) {
749 5
            $chr_tmp = \iconv('UTF-8', 'UCS-4LE', $char);
750 5
            if ($chr_tmp !== false) {
751
                /** @phpstan-ignore-next-line - "unpack": only false if the format string contains errors */
752 5
                return \unpack('V', $chr_tmp)[1];
753
            }
754
        }
755
756
        $code = self::ord($char[0]);
757
        $bytes = 1;
758
759
        if (!($code & 0x80)) {
760
            // 0xxxxxxx
761
            return $code;
762
        }
763
764
        if (($code & 0xe0) === 0xc0) {
765
            // 110xxxxx
766
            $bytes = 2;
767
            $code &= ~0xc0;
768
        } elseif (($code & 0xf0) === 0xe0) {
769
            // 1110xxxx
770
            $bytes = 3;
771
            $code &= ~0xe0;
772
        } elseif (($code & 0xf8) === 0xf0) {
773
            // 11110xxx
774
            $bytes = 4;
775
            $code &= ~0xf0;
776
        }
777
778
        for ($i = 2; $i <= $bytes; ++$i) {
779
            // 10xxxxxx
780
            $code = ($code << 6) + (self::ord($char[$i - 1]) & ~0x80);
781
        }
782
783
        return $code;
784
    }
785
786
    /**
787
     * Get hexadecimal code point (U+xxxx) of a UTF-8 encoded character.
788
     *
789
     * EXAMPLE: <code>UTF8::chr_to_hex('§'); // U+00a7</code>
790
     *
791
     * @param int|string $char   <p>The input character</p>
792
     * @param string     $prefix [optional]
793
     *
794
     * @psalm-pure
795
     *
796
     * @return string
797
     *                <p>The code point encoded as U+xxxx.</p>
798
     */
799 2
    public static function chr_to_hex($char, string $prefix = 'U+'): string
800
    {
801 2
        if ($char === '') {
802 2
            return '';
803
        }
804
805 2
        if ($char === '&#0;') {
806
            $char = '';
807
        }
808
809 2
        return self::int_to_hex(self::ord((string) $char), $prefix);
810
    }
811
812
    /**
813
     * Splits a string into smaller chunks and multiple lines, using the specified line ending character.
814
     *
815
     * EXAMPLE: <code>UTF8::chunk_split('ABC-ÖÄÜ-中文空白-κόσμε', 3); // "ABC\r\n-ÖÄ\r\nÜ-中\r\n文空白\r\n-κό\r\nσμε"</code>
816
     *
817
     * @param string $body         <p>The original string to be split.</p>
818
     * @param int    $chunk_length [optional] <p>The maximum character length of a chunk.</p>
819
     * @param string $end          [optional] <p>The character(s) to be inserted at the end of each chunk.</p>
820
     *
821
     * @psalm-pure
822
     *
823
     * @return string
824
     *                <p>The chunked string.</p>
825
     */
826 4
    public static function chunk_split(string $body, int $chunk_length = 76, string $end = "\r\n"): string
827
    {
828 4
        return \implode($end, self::str_split($body, $chunk_length));
829
    }
830
831
    /**
832
     * Accepts a string and removes all non-UTF-8 characters from it + extras if needed.
833
     *
834
     * EXAMPLE: <code>UTF8::clean("\xEF\xBB\xBF„Abcdef\xc2\xa0\x20…” — 😃 - Düsseldorf", true, true); // '„Abcdef  …” — 😃 - Düsseldorf'</code>
835
     *
836
     * @param string $str                                     <p>The string to be sanitized.</p>
837
     * @param bool   $remove_bom                              [optional] <p>Set to true, if you need to remove
838
     *                                                        UTF-BOM.</p>
839
     * @param bool   $normalize_whitespace                    [optional] <p>Set to true, if you need to normalize the
840
     *                                                        whitespace.</p>
841
     * @param bool   $normalize_msword                        [optional] <p>Set to true, if you need to normalize MS
842
     *                                                        Word chars e.g.: "…"
843
     *                                                        => "..."</p>
844
     * @param bool   $keep_non_breaking_space                 [optional] <p>Set to true, to keep non-breaking-spaces,
845
     *                                                        in
846
     *                                                        combination with
847
     *                                                        $normalize_whitespace</p>
848
     * @param bool   $replace_diamond_question_mark           [optional] <p>Set to true, if you need to remove diamond
849
     *                                                        question mark e.g.: "�"</p>
850
     * @param bool   $remove_invisible_characters             [optional] <p>Set to false, if you not want to remove
851
     *                                                        invisible characters e.g.: "\0"</p>
852
     * @param bool   $remove_invisible_characters_url_encoded [optional] <p>Set to true, if you not want to remove
853
     *                                                        invisible url encoded characters e.g.: "%0B"<br> WARNING:
854
     *                                                        maybe contains false-positives e.g. aa%0Baa -> aaaa.
855
     *                                                        </p>
856
     *
857
     * @psalm-pure
858
     *
859
     * @return string
860
     *                <p>An clean UTF-8 encoded string.</p>
861
     */
862 90
    public static function clean(
863
        string $str,
864
        bool $remove_bom = false,
865
        bool $normalize_whitespace = false,
866
        bool $normalize_msword = false,
867
        bool $keep_non_breaking_space = false,
868
        bool $replace_diamond_question_mark = false,
869
        bool $remove_invisible_characters = true,
870
        bool $remove_invisible_characters_url_encoded = false
871
    ): string {
872
        // http://stackoverflow.com/questions/1401317/remove-non-utf8-characters-from-string
873
        // caused connection reset problem on larger strings
874
875 90
        $regex = '/
876
          (
877
            (?: [\x00-\x7F]               # single-byte sequences   0xxxxxxx
878
            |   [\xC0-\xDF][\x80-\xBF]    # double-byte sequences   110xxxxx 10xxxxxx
879
            |   [\xE0-\xEF][\x80-\xBF]{2} # triple-byte sequences   1110xxxx 10xxxxxx * 2
880
            |   [\xF0-\xF7][\x80-\xBF]{3} # quadruple-byte sequence 11110xxx 10xxxxxx * 3
881
            ){1,100}                      # ...one or more times
882
          )
883
        | ( [\x80-\xBF] )                 # invalid byte in range 10000000 - 10111111
884
        | ( [\xC0-\xFF] )                 # invalid byte in range 11000000 - 11111111
885
        /x';
886 90
        $str = (string) \preg_replace($regex, '$1', $str);
887
888 90
        if ($replace_diamond_question_mark) {
889 33
            $str = self::replace_diamond_question_mark($str);
890
        }
891
892 90
        if ($remove_invisible_characters) {
893 90
            $str = self::remove_invisible_characters($str, $remove_invisible_characters_url_encoded);
894
        }
895
896 90
        if ($normalize_whitespace) {
897 37
            $str = self::normalize_whitespace($str, $keep_non_breaking_space);
898
        }
899
900 90
        if ($normalize_msword) {
901 4
            $str = self::normalize_msword($str);
902
        }
903
904 90
        if ($remove_bom) {
905 37
            $str = self::remove_bom($str);
906
        }
907
908 90
        return $str;
909
    }
910
911
    /**
912
     * Clean-up a string and show only printable UTF-8 chars at the end  + fix UTF-8 encoding.
913
     *
914
     * EXAMPLE: <code>UTF8::cleanup("\xEF\xBB\xBF„Abcdef\xc2\xa0\x20…” — 😃 - Düsseldorf", true, true); // '„Abcdef  …” — 😃 - Düsseldorf'</code>
915
     *
916
     * @param string $str <p>The input string.</p>
917
     *
918
     * @psalm-pure
919
     *
920
     * @return string
921
     */
922 33
    public static function cleanup($str): string
923
    {
924
        // init
925 33
        $str = (string) $str;
926
927 33
        if ($str === '') {
928 5
            return '';
929
        }
930
931
        // fixed ISO <-> UTF-8 Errors
932 33
        $str = self::fix_simple_utf8($str);
933
934
        // remove all none UTF-8 symbols
935
        // && remove diamond question mark (�)
936
        // && remove remove invisible characters (e.g. "\0")
937
        // && remove BOM
938
        // && normalize whitespace chars (but keep non-breaking-spaces)
939 33
        return self::clean(
940 33
            $str,
941 33
            true,
942 33
            true,
943 33
            false,
944 33
            true,
945 33
            true
946
        );
947
    }
948
949
    /**
950
     * Accepts a string or a array of strings and returns an array of Unicode code points.
951
     *
952
     * INFO: opposite to UTF8::string()
953
     *
954
     * EXAMPLE: <code>
955
     * UTF8::codepoints('κöñ'); // array(954, 246, 241)
956
     * // ... OR ...
957
     * UTF8::codepoints('κöñ', true); // array('U+03ba', 'U+00f6', 'U+00f1')
958
     * </code>
959
     *
960
     * @param string|string[] $arg         <p>A UTF-8 encoded string or an array of such strings.</p>
961
     * @param bool            $use_u_style <p>If True, will return code points in U+xxxx format,
962
     *                                     default, code points will be returned as integers.</p>
963
     *
964
     * @psalm-pure
965
     *
966
     * @return int[]|string[]
967
     *                        <p>
968
     *                        The array of code points:<br>
969
     *                        int[] for $u_style === false<br>
970
     *                        string[] for $u_style === true<br>
971
     *                        </p>
972
     */
973 12
    public static function codepoints($arg, bool $use_u_style = false): array
974
    {
975 12
        if (\is_string($arg)) {
976 12
            $arg = self::str_split($arg);
977
        }
978
979
        /**
980
         * @psalm-suppress DocblockTypeContradiction
981
         */
982 12
        if (!\is_array($arg)) {
0 ignored issues
show
introduced by
The condition is_array($arg) is always true.
Loading history...
983 4
            return [];
984
        }
985
986 12
        if ($arg === []) {
987 7
            return [];
988
        }
989
990 11
        $arg = \array_map(
991
            [
992 11
                self::class,
993
                'ord',
994
            ],
995 11
            $arg
996
        );
997
998 11
        if ($use_u_style) {
999 2
            $arg = \array_map(
1000
                [
1001 2
                    self::class,
1002
                    'int_to_hex',
1003
                ],
1004 2
                $arg
1005
            );
1006
        }
1007
1008 11
        return $arg;
1009
    }
1010
1011
    /**
1012
     * Trims the string and replaces consecutive whitespace characters with a
1013
     * single space. This includes tabs and newline characters, as well as
1014
     * multibyte whitespace such as the thin space and ideographic space.
1015
     *
1016
     * @param string $str <p>The input string.</p>
1017
     *
1018
     * @psalm-pure
1019
     *
1020
     * @return string
1021
     *                <p>A string with trimmed $str and condensed whitespace.</p>
1022
     */
1023 13
    public static function collapse_whitespace(string $str): string
1024
    {
1025 13
        if (self::$SUPPORT['mbstring'] === true) {
1026 13
            return \trim((string) \mb_ereg_replace('[[:space:]]+', ' ', $str));
1027
        }
1028
1029
        return \trim(self::regex_replace($str, '[[:space:]]+', ' '));
1030
    }
1031
1032
    /**
1033
     * Returns count of characters used in a string.
1034
     *
1035
     * EXAMPLE: <code>UTF8::count_chars('κaκbκc'); // array('κ' => 3, 'a' => 1, 'b' => 1, 'c' => 1)</code>
1036
     *
1037
     * @param string $str                     <p>The input string.</p>
1038
     * @param bool   $clean_utf8              [optional] <p>Remove non UTF-8 chars from the string.</p>
1039
     * @param bool   $try_to_use_mb_functions [optional] <p>Set to false, if you don't want to use
1040
     *
1041
     * @psalm-pure
1042
     *
1043
     * @return int[]
1044
     *               <p>An associative array of Character as keys and
1045
     *               their count as values.</p>
1046
     */
1047 19
    public static function count_chars(
1048
        string $str,
1049
        bool $clean_utf8 = false,
1050
        bool $try_to_use_mb_functions = true
1051
    ): array {
1052 19
        return \array_count_values(
1053 19
            self::str_split(
1054 19
                $str,
1055 19
                1,
1056 19
                $clean_utf8,
1057 19
                $try_to_use_mb_functions
1058
            )
1059
        );
1060
    }
1061
1062
    /**
1063
     * Create a valid CSS identifier for e.g. "class"- or "id"-attributes.
1064
     *
1065
     * EXAMPLE: <code>UTF8::css_identifier('123foo/bar!!!'); // _23foo-bar</code>
1066
     *
1067
     * copy&past from https://github.com/drupal/core/blob/8.8.x/lib/Drupal/Component/Utility/Html.php#L95
1068
     *
1069
     * @param string   $str        <p>INFO: if no identifier is given e.g. " " or "", we will create a unique string automatically</p>
1070
     * @param string[] $filter
1071
     * @param bool     $strip_tags
1072
     * @param bool     $strtolower
1073
     *
1074
     * @psalm-pure
1075
     *
1076
     * @return string
1077
     *
1078
     * @phpstan-param array<string,string> $filter
1079
     */
1080 1
    public static function css_identifier(
1081
        string $str = '',
1082
        array $filter = [
1083
            ' ' => '-',
1084
            '/' => '-',
1085
            '[' => '',
1086
            ']' => '',
1087
        ],
1088
        bool $strip_tags = false,
1089
        bool $strtolower = true
1090
    ): string {
1091
        // We could also use strtr() here but its much slower than str_replace(). In
1092
        // order to keep '__' to stay '__' we first replace it with a different
1093
        // placeholder after checking that it is not defined as a filter.
1094 1
        $double_underscore_replacements = 0;
1095
1096
        // Fallback ...
1097 1
        if (\trim($str) === '') {
1098 1
            $str = \uniqid('auto-generated-css-class', true);
1099
        } else {
1100 1
            $str = self::clean($str);
1101
        }
1102
1103 1
        if ($strip_tags) {
1104
            $str = \strip_tags($str);
1105
        }
1106
1107 1
        if ($strtolower) {
1108 1
            $str = \strtolower($str);
1109
        }
1110
1111 1
        if (!isset($filter['__'])) {
1112 1
            $str = \str_replace('__', '##', $str, $double_underscore_replacements);
1113
        }
1114
1115 1
        $str = \str_replace(\array_keys($filter), \array_values($filter), $str);
1116
        // Replace temporary placeholder '##' with '__' only if the original
1117
        // $identifier contained '__'.
1118 1
        if ($double_underscore_replacements > 0) {
1119
            $str = \str_replace('##', '__', $str);
1120
        }
1121
1122
        // Valid characters in a CSS identifier are:
1123
        // - the hyphen (U+002D)
1124
        // - a-z (U+0030 - U+0039)
1125
        // - A-Z (U+0041 - U+005A)
1126
        // - the underscore (U+005F)
1127
        // - 0-9 (U+0061 - U+007A)
1128
        // - ISO 10646 characters U+00A1 and higher
1129
        // We strip out any character not in the above list.
1130 1
        $str = (string) \preg_replace('/[^\x{002D}\x{0030}-\x{0039}\x{0041}-\x{005A}\x{005F}\x{0061}-\x{007A}\x{00A1}-\x{FFFF}]/u', '', $str);
1131
        // Identifiers cannot start with a digit, two hyphens, or a hyphen followed by a digit.
1132 1
        $str = (string) \preg_replace(['/^[0-9]/', '/^(-[0-9])|^(--)/'], ['_', '__'], $str);
1133
1134 1
        return \trim($str, '-');
1135
    }
1136
1137
    /**
1138
     * Remove css media-queries.
1139
     *
1140
     * @param string $str
1141
     *
1142
     * @psalm-pure
1143
     *
1144
     * @return string
1145
     */
1146 1
    public static function css_stripe_media_queries(string $str): string
1147
    {
1148 1
        return (string) \preg_replace(
1149 1
            '#@media\\s+(?:only\\s)?(?:[\\s{(]|screen|all)\\s?[^{]+{.*}\\s*}\\s*#isumU',
1150 1
            '',
1151 1
            $str
1152
        );
1153
    }
1154
1155
    /**
1156
     * Checks whether ctype is available on the server.
1157
     *
1158
     * @psalm-pure
1159
     *
1160
     * @return bool
1161
     *              <p><strong>true</strong> if available, <strong>false</strong> otherwise</p>
1162
     *
1163
     * @internal <p>Please do not use it anymore, we will make is private in next major version.</p>
1164
     */
1165
    public static function ctype_loaded(): bool
1166
    {
1167
        return \extension_loaded('ctype');
1168
    }
1169
1170
    /**
1171
     * Converts an int value into a UTF-8 character.
1172
     *
1173
     * INFO: opposite to UTF8::string()
1174
     *
1175
     * EXAMPLE: <code>UTF8::decimal_to_chr(931); // 'Σ'</code>
1176
     *
1177
     * @param int|string $int
1178
     *
1179
     * @phpstan-param int|numeric-string $int
1180
     *
1181
     * @psalm-pure
1182
     *
1183
     * @return string
1184
     */
1185 20
    public static function decimal_to_chr($int): string
1186
    {
1187
        // We cannot use html_entity_decode() here, as it will not return
1188
        // characters for many values < 160.
1189 20
        return mb_convert_encoding('&#' . $int . ';', 'UTF-8', 'HTML-ENTITIES');
0 ignored issues
show
Bug Best Practice introduced by
The expression return mb_convert_encodi...TF-8', 'HTML-ENTITIES') could return the type array which is incompatible with the type-hinted return string. Consider adding an additional type-check to rule them out.
Loading history...
1190
    }
1191
1192
    /**
1193
     * Decodes a MIME header field
1194
     *
1195
     * @param string $str
1196
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
1197
     *
1198
     * @psalm-pure
1199
     *
1200
     * @return false|string
1201
     *                      <p>A decoded MIME field on success,
1202
     *                      or false if an error occurs during the decoding.</p>
1203
     */
1204 2
    public static function decode_mimeheader($str, string $encoding = 'UTF-8')
1205
    {
1206 2
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
1207 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
1208
        }
1209
1210
        // always fallback via symfony polyfill
1211 2
        return \iconv_mime_decode($str, \ICONV_MIME_DECODE_CONTINUE_ON_ERROR, $encoding);
1212
    }
1213
1214
    /**
1215
     * Convert any two-letter country code (ISO 3166-1) to the corresponding Emoji.
1216
     *
1217
     * @see https://en.wikipedia.org/wiki/ISO_3166-1
1218
     *
1219
     * @param string $country_code_iso_3166_1 <p>e.g. DE</p>
1220
     *
1221
     * @return string
1222
     *                <p>Emoji or empty string on error.</p>
1223
     */
1224 1
    public static function emoji_from_country_code(string $country_code_iso_3166_1): string
1225
    {
1226 1
        if ($country_code_iso_3166_1 === '') {
1227 1
            return '';
1228
        }
1229
1230 1
        if (self::strlen($country_code_iso_3166_1) !== 2) {
1231 1
            return '';
1232
        }
1233
1234 1
        $country_code_iso_3166_1 = \strtoupper($country_code_iso_3166_1);
1235
1236 1
        $flagOffset = 0x1F1E6;
1237 1
        $asciiOffset = 0x41;
1238
1239 1
        return (self::chr((self::ord($country_code_iso_3166_1[0]) - $asciiOffset + $flagOffset)) ?? '') .
1240 1
               (self::chr((self::ord($country_code_iso_3166_1[1]) - $asciiOffset + $flagOffset)) ?? '');
1241
    }
1242
1243
    /**
1244
     * Decodes a string which was encoded by "UTF8::emoji_encode()".
1245
     *
1246
     * INFO: opposite to UTF8::emoji_encode()
1247
     *
1248
     * EXAMPLE: <code>
1249
     * UTF8::emoji_decode('foo CHARACTER_OGRE', false); // 'foo 👹'
1250
     * //
1251
     * UTF8::emoji_decode('foo _-_PORTABLE_UTF8_-_308095726_-_627590803_-_8FTU_ELBATROP_-_', true); // 'foo 👹'
1252
     * </code>
1253
     *
1254
     * @param string $str                            <p>The input string.</p>
1255
     * @param bool   $use_reversible_string_mappings [optional] <p>
1256
     *                                               When <b>TRUE</b>, we se a reversible string mapping
1257
     *                                               between "emoji_encode" and "emoji_decode".</p>
1258
     *
1259
     * @psalm-pure
1260
     *
1261
     * @return string
1262
     */
1263 9
    public static function emoji_decode(
1264
        string $str,
1265
        bool $use_reversible_string_mappings = false
1266
    ): string {
1267 9
        if (self::$EMOJI_KEYS_CACHE === null) {
1268
            /** @phpstan-ignore-next-line - we need to load the data first */
1269
            self::initEmojiData();
1270
        }
1271
1272 9
        if ($use_reversible_string_mappings) {
1273 9
            return (string) \str_replace(
1274 9
                (array) self::$EMOJI_KEYS_REVERSIBLE_CACHE,
1275 9
                (array) self::$EMOJI_VALUES_CACHE,
1276 9
                $str
1277
            );
1278
        }
1279
1280 1
        return (string) \str_replace(
1281 1
            (array) self::$EMOJI_KEYS_CACHE,
1282 1
            (array) self::$EMOJI_VALUES_CACHE,
1283 1
            $str
1284
        );
1285
    }
1286
1287
    /**
1288
     * Encode a string with emoji chars into a non-emoji string.
1289
     *
1290
     * INFO: opposite to UTF8::emoji_decode()
1291
     *
1292
     * EXAMPLE: <code>
1293
     * UTF8::emoji_encode('foo 👹', false)); // 'foo CHARACTER_OGRE'
1294
     * //
1295
     * UTF8::emoji_encode('foo 👹', true)); // 'foo _-_PORTABLE_UTF8_-_308095726_-_627590803_-_8FTU_ELBATROP_-_'
1296
     * </code>
1297
     *
1298
     * @param string $str                            <p>The input string</p>
1299
     * @param bool   $use_reversible_string_mappings [optional] <p>
1300
     *                                               when <b>TRUE</b>, we use a reversible string mapping
1301
     *                                               between "emoji_encode" and "emoji_decode"</p>
1302
     *
1303
     * @psalm-pure
1304
     *
1305
     * @return string
1306
     */
1307 12
    public static function emoji_encode(
1308
        string $str,
1309
        bool $use_reversible_string_mappings = false
1310
    ): string {
1311 12
        if (self::$EMOJI_KEYS_CACHE === null) {
1312
            /** @phpstan-ignore-next-line - we need to load the data first */
1313 1
            self::initEmojiData();
1314
        }
1315
1316 12
        if ($use_reversible_string_mappings) {
1317 9
            return (string) \str_replace(
1318 9
                (array) self::$EMOJI_VALUES_CACHE,
1319 9
                (array) self::$EMOJI_KEYS_REVERSIBLE_CACHE,
1320 9
                $str
1321
            );
1322
        }
1323
1324 4
        return (string) \str_replace(
1325 4
            (array) self::$EMOJI_VALUES_CACHE,
1326 4
            (array) self::$EMOJI_KEYS_CACHE,
1327 4
            $str
1328
        );
1329
    }
1330
1331
    /**
1332
     * Encode a string with a new charset-encoding.
1333
     *
1334
     * INFO:  This function will also try to fix broken / double encoding,
1335
     *        so you can call this function also on a UTF-8 string and you don't mess up the string.
1336
     *
1337
     * EXAMPLE: <code>
1338
     * UTF8::encode('ISO-8859-1', '-ABC-中文空白-'); // '-ABC-????-'
1339
     * //
1340
     * UTF8::encode('UTF-8', '-ABC-中文空白-'); // '-ABC-中文空白-'
1341
     * //
1342
     * UTF8::encode('HTML', '-ABC-中文空白-'); // '-ABC-&#20013;&#25991;&#31354;&#30333;-'
1343
     * //
1344
     * UTF8::encode('BASE64', '-ABC-中文空白-'); // 'LUFCQy3kuK3mlofnqbrnmb0t'
1345
     * </code>
1346
     *
1347
     * @param string $to_encoding                   <p>e.g. 'UTF-16', 'UTF-8', 'ISO-8859-1', etc.</p>
1348
     * @param string $str                           <p>The input string</p>
1349
     * @param bool   $auto_detect_the_from_encoding [optional] <p>Force the new encoding (we try to fix broken / double
1350
     *                                              encoding for UTF-8)<br> otherwise we auto-detect the current
1351
     *                                              string-encoding</p>
1352
     * @param string $from_encoding                 [optional] <p>e.g. 'UTF-16', 'UTF-8', 'ISO-8859-1', etc.<br>
1353
     *                                              A empty string will trigger the autodetect anyway.</p>
1354
     *
1355
     * @psalm-pure
1356
     *
1357
     * @return string
1358
     *
1359
     * @psalm-suppress InvalidReturnStatement
1360
     */
1361 28
    public static function encode(
1362
        string $to_encoding,
1363
        string $str,
1364
        bool $auto_detect_the_from_encoding = true,
1365
        string $from_encoding = ''
1366
    ): string {
1367 28
        if ($str === '' || $to_encoding === '') {
1368 13
            return $str;
1369
        }
1370
1371 28
        if ($to_encoding !== 'UTF-8' && $to_encoding !== 'CP850') {
1372 7
            $to_encoding = self::normalize_encoding($to_encoding, 'UTF-8');
1373
        }
1374
1375 28
        if ($from_encoding && $from_encoding !== 'UTF-8' && $from_encoding !== 'CP850') {
1376 2
            $from_encoding = self::normalize_encoding($from_encoding);
1377
        }
1378
1379
        if (
1380 28
            $to_encoding
1381
            &&
1382 28
            $from_encoding
1383
            &&
1384 28
            $from_encoding === $to_encoding
1385
        ) {
1386
            return $str;
1387
        }
1388
1389 28
        if ($to_encoding === 'JSON') {
1390 1
            $return = self::json_encode($str);
1391 1
            if ($return === false) {
1392
                throw new \InvalidArgumentException('The input string [' . $str . '] can not be used for json_encode().');
1393
            }
1394
1395 1
            return $return;
1396
        }
1397 28
        if ($from_encoding === 'JSON') {
1398 1
            $str = self::json_decode($str);
1399 1
            $from_encoding = '';
1400
        }
1401
1402 28
        if ($to_encoding === 'BASE64') {
1403 2
            return \base64_encode($str);
1404
        }
1405 28
        if ($from_encoding === 'BASE64') {
1406 2
            $str = \base64_decode($str, true);
1407 2
            $from_encoding = '';
1408
        }
1409
1410 28
        if ($to_encoding === 'HTML-ENTITIES') {
1411 2
            return self::html_encode($str, true);
1412
        }
1413 28
        if ($from_encoding === 'HTML-ENTITIES') {
1414 2
            $str = self::html_entity_decode($str, \ENT_COMPAT);
1415 2
            $from_encoding = '';
1416
        }
1417
1418 28
        $from_encoding_auto_detected = false;
1419
        if (
1420 28
            $auto_detect_the_from_encoding
1421
            ||
1422 28
            !$from_encoding
1423
        ) {
1424 28
            $from_encoding_auto_detected = self::str_detect_encoding($str);
1425
        }
1426
1427
        // DEBUG
1428
        //var_dump($to_encoding, $from_encoding, $from_encoding_auto_detected, $str, "\n\n");
1429
1430 28
        if ($from_encoding_auto_detected !== false) {
1431 25
            $from_encoding = $from_encoding_auto_detected;
1432 6
        } elseif ($auto_detect_the_from_encoding) {
1433
            // fallback for the "autodetect"-mode
1434 6
            return self::to_utf8($str);
1435
        }
1436
1437
        if (
1438 25
            !$from_encoding
1439
            ||
1440 25
            $from_encoding === $to_encoding
1441
        ) {
1442 15
            return $str;
1443
        }
1444
1445
        if (
1446 20
            $to_encoding === 'UTF-8'
1447
            &&
1448
            (
1449 18
                $from_encoding === 'WINDOWS-1252'
1450
                ||
1451 20
                $from_encoding === 'ISO-8859-1'
1452
            )
1453
        ) {
1454 14
            return self::to_utf8($str);
1455
        }
1456
1457
        if (
1458 12
            $to_encoding === 'ISO-8859-1'
1459
            &&
1460
            (
1461 6
                $from_encoding === 'WINDOWS-1252'
1462
                ||
1463 12
                $from_encoding === 'UTF-8'
1464
            )
1465
        ) {
1466 6
            return self::to_iso8859($str);
1467
        }
1468
1469
        if (
1470 10
            $to_encoding !== 'UTF-8'
1471
            &&
1472 10
            $to_encoding !== 'ISO-8859-1'
1473
            &&
1474 10
            $to_encoding !== 'WINDOWS-1252'
1475
            &&
1476 10
            self::$SUPPORT['mbstring'] === false
1477
        ) {
1478
            /**
1479
             * @psalm-suppress ImpureFunctionCall - is is only a warning
1480
             */
1481
            \trigger_error('UTF8::encode() without mbstring cannot handle "' . $to_encoding . '" encoding', \E_USER_WARNING);
1482
        }
1483
1484 10
        if (self::$SUPPORT['mbstring'] === true) {
1485 10
            $str_encoded = \mb_convert_encoding(
1486 10
                $str,
1487 10
                $to_encoding,
1488 10
                $from_encoding
1489
            );
1490
1491 10
            if ($str_encoded) {
1492
                \assert(\is_string($str_encoded));
1493
1494 10
                return $str_encoded;
1495
            }
1496
        }
1497
1498
        /** @noinspection PhpUsageOfSilenceOperatorInspection - Detected an incomplete multibyte character in input string */
1499
        $return = @\iconv($from_encoding, $to_encoding, $str);
1500
        if ($return !== false) {
1501
            return $return;
1502
        }
1503
1504
        return $str;
1505
    }
1506
1507
    /**
1508
     * @param string $str
1509
     * @param string $from_charset      [optional] <p>Set the input charset.</p>
1510
     * @param string $to_charset        [optional] <p>Set the output charset.</p>
1511
     * @param string $transfer_encoding [optional] <p>Set the transfer encoding.</p>
1512
     * @param string $linefeed          [optional] <p>Set the used linefeed.</p>
1513
     * @param int    $indent            [optional] <p>Set the max length indent.</p>
1514
     *
1515
     * @psalm-pure
1516
     *
1517
     * @return false|string
1518
     *                      <p>An encoded MIME field on success,
1519
     *                      or false if an error occurs during the encoding.</p>
1520
     */
1521 1
    public static function encode_mimeheader(
1522
        string $str,
1523
        string $from_charset = 'UTF-8',
1524
        string $to_charset = 'UTF-8',
1525
        string $transfer_encoding = 'Q',
1526
        string $linefeed = "\r\n",
1527
        int $indent = 76
1528
    ) {
1529 1
        if ($from_charset !== 'UTF-8' && $from_charset !== 'CP850') {
1530
            $from_charset = self::normalize_encoding($from_charset, 'UTF-8');
1531
        }
1532
1533 1
        if ($to_charset !== 'UTF-8' && $to_charset !== 'CP850') {
1534 1
            $to_charset = self::normalize_encoding($to_charset, 'UTF-8');
1535
        }
1536
1537
        // always fallback via symfony polyfill
1538 1
        return \iconv_mime_encode(
1539 1
            '',
1540 1
            $str,
1541
            [
1542 1
                'scheme'           => $transfer_encoding,
1543 1
                'line-length'      => $indent,
1544 1
                'input-charset'    => $from_charset,
1545 1
                'output-charset'   => $to_charset,
1546 1
                'line-break-chars' => $linefeed,
1547
            ]
1548
        );
1549
    }
1550
1551
    /**
1552
     * Create an extract from a sentence, so if the search-string was found, it try to centered in the output.
1553
     *
1554
     * @param string   $str                       <p>The input string.</p>
1555
     * @param string   $search                    <p>The searched string.</p>
1556
     * @param int|null $length                    [optional] <p>Default: null === text->length / 2</p>
1557
     * @param string   $replacer_for_skipped_text [optional] <p>Default: …</p>
1558
     * @param string   $encoding                  [optional] <p>Set the charset for e.g. "mb_" function</p>
1559
     *
1560
     * @psalm-pure
1561
     *
1562
     * @return string
1563
     */
1564 1
    public static function extract_text(
1565
        string $str,
1566
        string $search = '',
1567
        int $length = null,
1568
        string $replacer_for_skipped_text = '…',
1569
        string $encoding = 'UTF-8'
1570
    ): string {
1571 1
        if ($str === '') {
1572 1
            return '';
1573
        }
1574
1575 1
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
1576
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
1577
        }
1578
1579 1
        $trim_chars = "\t\r\n -_()!~?=+/*\\,.:;\"'[]{}`&";
1580
1581 1
        if ($length === null) {
1582 1
            $length = (int) \round((int) self::strlen($str, $encoding) / 2);
1583
        }
1584
1585 1
        if ($search === '') {
1586 1
            if ($encoding === 'UTF-8') {
1587 1
                if ($length > 0) {
1588 1
                    $string_length = (int) \mb_strlen($str);
1589 1
                    $end = ($length - 1) > $string_length ? $string_length : ($length - 1);
1590
                } else {
1591 1
                    $end = 0;
1592
                }
1593
1594 1
                $pos = (int) \min(
1595 1
                    \mb_strpos($str, ' ', $end),
1596 1
                    \mb_strpos($str, '.', $end)
1597
                );
1598
            } else {
1599
                if ($length > 0) {
1600
                    $string_length = (int) self::strlen($str, $encoding);
1601
                    $end = ($length - 1) > $string_length ? $string_length : ($length - 1);
1602
                } else {
1603
                    $end = 0;
1604
                }
1605
1606
                $pos = (int) \min(
1607
                    self::strpos($str, ' ', $end, $encoding),
1608
                    self::strpos($str, '.', $end, $encoding)
1609
                );
1610
            }
1611
1612 1
            if ($pos) {
1613 1
                if ($encoding === 'UTF-8') {
1614 1
                    $str_sub = \mb_substr($str, 0, $pos);
1615
                } else {
1616
                    $str_sub = self::substr($str, 0, $pos, $encoding);
1617
                }
1618
1619 1
                if ($str_sub === false) {
1620
                    return '';
1621
                }
1622
1623 1
                return \rtrim($str_sub, $trim_chars) . $replacer_for_skipped_text;
1624
            }
1625
1626
            return $str;
1627
        }
1628
1629 1
        if ($encoding === 'UTF-8') {
1630 1
            $word_position = (int) \mb_stripos($str, $search);
1631 1
            $half_side = (int) ($word_position - $length / 2 + (int) \mb_strlen($search) / 2);
1632
        } else {
1633
            $word_position = (int) self::stripos($str, $search, 0, $encoding);
1634
            $half_side = (int) ($word_position - $length / 2 + (int) self::strlen($search, $encoding) / 2);
1635
        }
1636
1637 1
        $pos_start = 0;
1638 1
        if ($half_side > 0) {
1639 1
            if ($encoding === 'UTF-8') {
1640 1
                $half_text = \mb_substr($str, 0, $half_side);
1641
            } else {
1642
                $half_text = self::substr($str, 0, $half_side, $encoding);
1643
            }
1644 1
            if ($half_text !== false) {
1645 1
                if ($encoding === 'UTF-8') {
1646 1
                    $pos_start = (int) \max(
1647 1
                        \mb_strrpos($half_text, ' '),
1648 1
                        \mb_strrpos($half_text, '.')
1649
                    );
1650
                } else {
1651
                    $pos_start = (int) \max(
1652
                        self::strrpos($half_text, ' ', 0, $encoding),
1653
                        self::strrpos($half_text, '.', 0, $encoding)
1654
                    );
1655
                }
1656
            }
1657
        }
1658
1659 1
        if ($word_position && $half_side > 0) {
1660 1
            $offset = $pos_start + $length - 1;
1661 1
            $real_length = (int) self::strlen($str, $encoding);
1662
1663 1
            if ($offset > $real_length) {
1664
                $offset = $real_length;
1665
            }
1666
1667 1
            if ($encoding === 'UTF-8') {
1668 1
                $pos_end = (int) \min(
1669 1
                    \mb_strpos($str, ' ', $offset),
1670 1
                    \mb_strpos($str, '.', $offset)
1671 1
                ) - $pos_start;
1672
            } else {
1673
                $pos_end = (int) \min(
1674
                    self::strpos($str, ' ', $offset, $encoding),
1675
                    self::strpos($str, '.', $offset, $encoding)
1676
                ) - $pos_start;
1677
            }
1678
1679 1
            if (!$pos_end || $pos_end <= 0) {
1680 1
                if ($encoding === 'UTF-8') {
1681 1
                    $str_sub = \mb_substr($str, $pos_start, (int) \mb_strlen($str));
1682
                } else {
1683
                    $str_sub = self::substr($str, $pos_start, (int) self::strlen($str, $encoding), $encoding);
1684
                }
1685 1
                if ($str_sub !== false) {
1686 1
                    $extract = $replacer_for_skipped_text . \ltrim($str_sub, $trim_chars);
1687
                } else {
1688 1
                    $extract = '';
1689
                }
1690
            } else {
1691 1
                if ($encoding === 'UTF-8') {
1692 1
                    $str_sub = \mb_substr($str, $pos_start, $pos_end);
1693
                } else {
1694
                    $str_sub = self::substr($str, $pos_start, $pos_end, $encoding);
1695
                }
1696 1
                if ($str_sub !== false) {
1697 1
                    $extract = $replacer_for_skipped_text . \trim($str_sub, $trim_chars) . $replacer_for_skipped_text;
1698
                } else {
1699 1
                    $extract = '';
1700
                }
1701
            }
1702
        } else {
1703 1
            $offset = $length - 1;
1704 1
            $true_length = (int) self::strlen($str, $encoding);
1705
1706 1
            if ($offset > $true_length) {
1707
                $offset = $true_length;
1708
            }
1709
1710 1
            if ($encoding === 'UTF-8') {
1711 1
                $pos_end = (int) \min(
1712 1
                    \mb_strpos($str, ' ', $offset),
1713 1
                    \mb_strpos($str, '.', $offset)
1714
                );
1715
            } else {
1716
                $pos_end = (int) \min(
1717
                    self::strpos($str, ' ', $offset, $encoding),
1718
                    self::strpos($str, '.', $offset, $encoding)
1719
                );
1720
            }
1721
1722 1
            if ($pos_end) {
1723 1
                if ($encoding === 'UTF-8') {
1724 1
                    $str_sub = \mb_substr($str, 0, $pos_end);
1725
                } else {
1726
                    $str_sub = self::substr($str, 0, $pos_end, $encoding);
1727
                }
1728 1
                if ($str_sub !== false) {
1729 1
                    $extract = \rtrim($str_sub, $trim_chars) . $replacer_for_skipped_text;
1730
                } else {
1731 1
                    $extract = '';
1732
                }
1733
            } else {
1734 1
                $extract = $str;
1735
            }
1736
        }
1737
1738 1
        return $extract;
1739
    }
1740
1741
    /**
1742
     * Reads entire file into a string.
1743
     *
1744
     * EXAMPLE: <code>UTF8::file_get_contents('utf16le.txt'); // ...</code>
1745
     *
1746
     * WARNING: Do not use UTF-8 Option ($convert_to_utf8) for binary files (e.g.: images) !!!
1747
     *
1748
     * @see http://php.net/manual/en/function.file-get-contents.php
1749
     *
1750
     * @param string        $filename         <p>
1751
     *                                        Name of the file to read.
1752
     *                                        </p>
1753
     * @param bool          $use_include_path [optional] <p>
1754
     *                                        Prior to PHP 5, this parameter is called
1755
     *                                        use_include_path and is a bool.
1756
     *                                        As of PHP 5 the FILE_USE_INCLUDE_PATH can be used
1757
     *                                        to trigger include path
1758
     *                                        search.
1759
     *                                        </p>
1760
     * @param resource|null $context          [optional] <p>
1761
     *                                        A valid context resource created with
1762
     *                                        stream_context_create. If you don't need to use a
1763
     *                                        custom context, you can skip this parameter by &null;.
1764
     *                                        </p>
1765
     * @param int|null      $offset           [optional] <p>
1766
     *                                        The offset where the reading starts.
1767
     *                                        </p>
1768
     * @param int|null      $max_length       [optional] <p>
1769
     *                                        Maximum length of data read. The default is to read until end
1770
     *                                        of file is reached.
1771
     *                                        </p>
1772
     * @param int           $timeout          <p>The time in seconds for the timeout.</p>
1773
     * @param bool          $convert_to_utf8  <strong>WARNING!!!</strong> <p>Maybe you can't use this option for
1774
     *                                        some files, because they used non default utf-8 chars. Binary files
1775
     *                                        like images or pdf will not be converted.</p>
1776
     * @param string        $from_encoding    [optional] <p>e.g. 'UTF-16', 'UTF-8', 'ISO-8859-1', etc.<br>
1777
     *                                        A empty string will trigger the autodetect anyway.</p>
1778
     *
1779
     * @psalm-pure
1780
     *
1781
     * @return false|string
1782
     *                      <p>The function returns the read data as string or <b>false</b> on failure.</p>
1783
     */
1784 12
    public static function file_get_contents(
1785
        string $filename,
1786
        bool $use_include_path = false,
1787
        $context = null,
1788
        int $offset = null,
1789
        int $max_length = null,
1790
        int $timeout = 10,
1791
        bool $convert_to_utf8 = true,
1792
        string $from_encoding = ''
1793
    ) {
1794
        // init
1795
        /** @noinspection CallableParameterUseCaseInTypeContextInspection - is ok here */
1796 12
        $filename = Bootup::filter_sanitize_string_polyfill($filename);
1797 12
        if ($filename === false) {
1798
            return false;
1799
        }
1800
1801 12
        if ($timeout && $context === null) {
1802 9
            $context = \stream_context_create(
1803
                [
1804
                    'http' => [
1805 9
                        'timeout' => $timeout,
1806
                    ],
1807
                ]
1808
            );
1809
        }
1810
1811 12
        if ($offset === null) {
1812 12
            $offset = 0;
1813
        }
1814
1815 12
        if (\is_int($max_length)) {
1816 2
            if ($max_length < 0) {
1817
                $max_length = 0;
1818
            }
1819
1820 2
            $data = \file_get_contents($filename, $use_include_path, $context, $offset, $max_length);
1821
        } else {
1822 12
            $data = \file_get_contents($filename, $use_include_path, $context, $offset);
1823
        }
1824
1825
        // return false on error
1826 12
        if ($data === false) {
1827
            return false;
1828
        }
1829
1830 12
        if ($convert_to_utf8) {
1831
            if (
1832 12
                !self::is_binary($data, true)
1833
                ||
1834 9
                self::is_utf16($data, false) !== false
1835
                ||
1836 12
                self::is_utf32($data, false) !== false
1837
            ) {
1838 9
                $data = self::encode('UTF-8', $data, false, $from_encoding);
1839 9
                $data = self::cleanup($data);
1840
            }
1841
        }
1842
1843 12
        return $data;
1844
    }
1845
1846
    /**
1847
     * Checks if a file starts with BOM (Byte Order Mark) character.
1848
     *
1849
     * EXAMPLE: <code>UTF8::file_has_bom('utf8_with_bom.txt'); // true</code>
1850
     *
1851
     * @param string $file_path <p>Path to a valid file.</p>
1852
     *
1853
     * @throws \RuntimeException if file_get_contents() returned false
1854
     *
1855
     * @return bool
1856
     *              <p><strong>true</strong> if the file has BOM at the start, <strong>false</strong> otherwise</p>
1857
     *
1858
     * @psalm-pure
1859
     */
1860 2
    public static function file_has_bom(string $file_path): bool
1861
    {
1862 2
        $file_content = \file_get_contents($file_path);
1863 2
        if ($file_content === false) {
1864
            throw new \RuntimeException('file_get_contents() returned false for:' . $file_path);
1865
        }
1866
1867 2
        return self::string_has_bom($file_content);
1868
    }
1869
1870
    /**
1871
     * Normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1872
     *
1873
     * EXAMPLE: <code>UTF8::filter(array("\xE9", 'à', 'a')); // array('é', 'à', 'a')</code>
1874
     *
1875
     * @param array|object|string $var
1876
     * @param int                 $normalization_form
1877
     * @param string              $leading_combining
1878
     *
1879
     * @psalm-pure
1880
     *
1881
     * @return mixed
1882
     *
1883
     * @template TFilter
1884
     * @phpstan-param TFilter $var
1885
     * @phpstan-return TFilter
1886
     */
1887 64
    public static function filter(
1888
        $var,
1889
        int $normalization_form = \Normalizer::NFC,
1890
        string $leading_combining = '◌'
1891
    ) {
1892 64
        switch (\gettype($var)) {
1893 64
            case 'object':
1894 64
            case 'array':
1895 6
                foreach ($var as &$v) {
1896 6
                    $v = self::filter($v, $normalization_form, $leading_combining);
1897
                }
1898 6
                unset($v);
1899
1900 6
                break;
1901 64
            case 'string':
1902
1903 62
                if (\strpos($var, "\r") !== false) {
1904 2
                    $var = self::normalize_line_ending($var);
1905
                }
1906
1907 62
                if (!ASCII::is_ascii($var)) {
1908 32
                    if (\Normalizer::isNormalized($var, $normalization_form)) {
1909 26
                        $n = '-';
1910
                    } else {
1911 12
                        $n = \Normalizer::normalize($var, $normalization_form);
1912
1913 12
                        if ($n && isset($n[0])) {
1914 6
                            $var = $n;
1915
                        } else {
1916 8
                            $var = self::encode('UTF-8', $var);
1917
                        }
1918
                    }
1919
1920
                    \assert(\is_string($var));
1921
                    if (
1922 32
                        $n
1923
                        &&
1924 32
                        $var[0] >= "\x80"
1925
                        &&
1926 32
                        isset($n[0], $leading_combining[0])
1927
                        &&
1928 32
                        \preg_match('/^\\p{Mn}/u', $var)
1929
                    ) {
1930
                        // Prevent leading combining chars
1931
                        // for NFC-safe concatenations.
1932 2
                        $var = $leading_combining . $var;
1933
                    }
1934
                }
1935
1936 62
                break;
1937
            default:
1938
                // nothing
1939
        }
1940
1941
        /** @noinspection PhpSillyAssignmentInspection */
1942
        /** @phpstan-var TFilter $var */
1943 64
        $var = $var;
1944
1945 64
        return $var;
1946
    }
1947
1948
    /**
1949
     * "filter_input()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1950
     *
1951
     * Gets a specific external variable by name and optionally filters it.
1952
     *
1953
     * EXAMPLE: <code>
1954
     * // _GET['foo'] = 'bar';
1955
     * UTF8::filter_input(INPUT_GET, 'foo', FILTER_UNSAFE_RAW)); // 'bar'
1956
     * </code>
1957
     *
1958
     * @see http://php.net/manual/en/function.filter-input.php
1959
     *
1960
     * @param int            $type          <p>
1961
     *                                      One of <b>INPUT_GET</b>, <b>INPUT_POST</b>,
1962
     *                                      <b>INPUT_COOKIE</b>, <b>INPUT_SERVER</b>, or
1963
     *                                      <b>INPUT_ENV</b>.
1964
     *                                      </p>
1965
     * @param string         $variable_name <p>
1966
     *                                      Name of a variable to get.
1967
     *                                      </p>
1968
     * @param int            $filter        [optional] <p>
1969
     *                                      The ID of the filter to apply. The
1970
     *                                      manual page lists the available filters.
1971
     *                                      </p>
1972
     * @param int|int[]|null $options       [optional] <p>
1973
     *                                      Associative array of options or bitwise disjunction of flags. If filter
1974
     *                                      accepts options, flags can be provided in "flags" field of array.
1975
     *                                      </p>
1976
     *
1977
     * @psalm-pure
1978
     *
1979
     * @return mixed
1980
     *               <p>
1981
     *               Value of the requested variable on success, <b>FALSE</b> if the filter fails, or <b>NULL</b> if the
1982
     *               <i>variable_name</i> variable is not set. If the flag <b>FILTER_NULL_ON_FAILURE</b> is used, it
1983
     *               returns <b>FALSE</b> if the variable is not set and <b>NULL</b> if the filter fails.
1984
     *               </p>
1985
     */
1986 1
    public static function filter_input(
1987
        int $type,
1988
        string $variable_name,
1989
        int $filter = \FILTER_DEFAULT,
1990
        $options = null
1991
    ) {
1992
        /**
1993
         * @psalm-suppress ImpureFunctionCall - we use func_num_args only for args count matching here
1994
         */
1995 1
        if ($options === null || \func_num_args() < 4) {
1996 1
            $var = \filter_input($type, $variable_name, $filter);
1997
        } else {
1998
            $var = \filter_input($type, $variable_name, $filter, $options);
1999
        }
2000
2001 1
        return self::filter($var);
2002
    }
2003
2004
    /**
2005
     * "filter_input_array()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
2006
     *
2007
     * Gets external variables and optionally filters them.
2008
     *
2009
     * EXAMPLE: <code>
2010
     * // _GET['foo'] = 'bar';
2011
     * UTF8::filter_input_array(INPUT_GET, array('foo' => 'FILTER_UNSAFE_RAW')); // array('bar')
2012
     * </code>
2013
     *
2014
     * @see http://php.net/manual/en/function.filter-input-array.php
2015
     *
2016
     * @param int        $type       <p>
2017
     *                               One of <b>INPUT_GET</b>, <b>INPUT_POST</b>,
2018
     *                               <b>INPUT_COOKIE</b>, <b>INPUT_SERVER</b>, or
2019
     *                               <b>INPUT_ENV</b>.
2020
     *                               </p>
2021
     * @param array|null $definition [optional] <p>
2022
     *                               An array defining the arguments. A valid key is a string
2023
     *                               containing a variable name and a valid value is either a filter type, or an array
2024
     *                               optionally specifying the filter, flags and options. If the value is an
2025
     *                               array, valid keys are filter which specifies the
2026
     *                               filter type,
2027
     *                               flags which specifies any flags that apply to the
2028
     *                               filter, and options which specifies any options that
2029
     *                               apply to the filter. See the example below for a better understanding.
2030
     *                               </p>
2031
     *                               <p>
2032
     *                               This parameter can be also an integer holding a filter constant. Then all values in the
2033
     *                               input array are filtered by this filter.
2034
     *                               </p>
2035
     * @param bool       $add_empty  [optional] <p>
2036
     *                               Add missing keys as <b>NULL</b> to the return value.
2037
     *                               </p>
2038
     *
2039
     * @psalm-pure
2040
     *
2041
     * @return mixed
2042
     *               <p>
2043
     *               An array containing the values of the requested variables on success, or <b>FALSE</b> on failure.
2044
     *               An array value will be <b>FALSE</b> if the filter fails, or <b>NULL</b> if the variable is not
2045
     *               set. Or if the flag <b>FILTER_NULL_ON_FAILURE</b> is used, it returns <b>FALSE</b> if the variable
2046
     *               is not set and <b>NULL</b> if the filter fails.
2047
     *               </p>
2048
     */
2049 1
    public static function filter_input_array(
2050
        int $type,
2051
        $definition = null,
2052
        bool $add_empty = true
2053
    ) {
2054
        /**
2055
         * @psalm-suppress ImpureFunctionCall - we use func_num_args only for args count matching here
2056
         */
2057 1
        if ($definition === null || \func_num_args() < 2) {
2058
            $a = \filter_input_array($type);
2059
        } else {
2060 1
            $a = \filter_input_array($type, $definition, $add_empty);
2061
        }
2062
2063 1
        return self::filter($a);
2064
    }
2065
2066
    /**
2067
     * "filter_var()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
2068
     *
2069
     * Filters a variable with a specified filter.
2070
     *
2071
     * EXAMPLE: <code>UTF8::filter_var('-ABC-中文空白-', FILTER_VALIDATE_URL); // false</code>
2072
     *
2073
     * @see http://php.net/manual/en/function.filter-var.php
2074
     *
2075
     * @param float|int|string|null $variable <p>
2076
     *                                        Value to filter.
2077
     *                                        </p>
2078
     * @param int                   $filter   [optional] <p>
2079
     *                                        The ID of the filter to apply. The
2080
     *                                        manual page lists the available filters.
2081
     *                                        </p>
2082
     * @param int|int[]|null        $options  [optional] <p>
2083
     *                                        Associative array of options or bitwise disjunction of flags. If filter
2084
     *                                        accepts options, flags can be provided in "flags" field of array. For
2085
     *                                        the "callback" filter, callable type should be passed. The
2086
     *                                        callback must accept one argument, the value to be filtered, and return
2087
     *                                        the value after filtering/sanitizing it.
2088
     *                                        </p>
2089
     *                                        <p>
2090
     *                                        <code>
2091
     *                                        // for filters that accept options, use this format
2092
     *                                        $options = array(
2093
     *                                        'options' => array(
2094
     *                                        'default' => 3, // value to return if the filter fails
2095
     *                                        // other options here
2096
     *                                        'min_range' => 0
2097
     *                                        ),
2098
     *                                        'flags' => FILTER_FLAG_ALLOW_OCTAL,
2099
     *                                        );
2100
     *                                        $var = filter_var('0755', FILTER_VALIDATE_INT, $options);
2101
     *                                        // for filter that only accept flags, you can pass them directly
2102
     *                                        $var = filter_var('oops', FILTER_VALIDATE_BOOLEAN, FILTER_NULL_ON_FAILURE);
2103
     *                                        // for filter that only accept flags, you can also pass as an array
2104
     *                                        $var = filter_var('oops', FILTER_VALIDATE_BOOLEAN,
2105
     *                                        array('flags' => FILTER_NULL_ON_FAILURE));
2106
     *                                        // callback validate filter
2107
     *                                        function foo($value)
2108
     *                                        {
2109
     *                                        // Expected format: Surname, GivenNames
2110
     *                                        if (strpos($value, ", ") === false) return false;
2111
     *                                        list($surname, $givennames) = explode(", ", $value, 2);
2112
     *                                        $empty = (empty($surname) || empty($givennames));
2113
     *                                        $notstrings = (!is_string($surname) || !is_string($givennames));
2114
     *                                        if ($empty || $notstrings) {
2115
     *                                        return false;
2116
     *                                        } else {
2117
     *                                        return $value;
2118
     *                                        }
2119
     *                                        }
2120
     *                                        $var = filter_var('Doe, Jane Sue', FILTER_CALLBACK, array('options' => 'foo'));
2121
     *                                        </code>
2122
     *                                        </p>
2123
     *
2124
     * @psalm-pure
2125
     *
2126
     * @return mixed
2127
     *               <p>The filtered data, or <b>FALSE</b> if the filter fails.</p>
2128
     */
2129 2
    public static function filter_var(
2130
        $variable,
2131
        int $filter = \FILTER_DEFAULT,
2132
        $options = null
2133
    ) {
2134
        /**
2135
         * @psalm-suppress ImpureFunctionCall - we use func_num_args only for args count matching here
2136
         */
2137 2
        if (\func_num_args() < 3) {
2138 2
            $variable = \filter_var($variable, $filter);
2139
        } else {
2140 2
            $variable = \filter_var($variable, $filter, $options);
0 ignored issues
show
Bug introduced by
It seems like $options can also be of type null; however, parameter $options of filter_var() does only seem to accept array|integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

2140
            $variable = \filter_var($variable, $filter, /** @scrutinizer ignore-type */ $options);
Loading history...
2141
        }
2142
2143 2
        return self::filter($variable);
2144
    }
2145
2146
    /**
2147
     * "filter_var_array()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
2148
     *
2149
     * Gets multiple variables and optionally filters them.
2150
     *
2151
     * EXAMPLE: <code>
2152
     * $filters = [
2153
     *     'name'  => ['filter'  => FILTER_CALLBACK, 'options' => [UTF8::class, 'ucwords']],
2154
     *     'age'   => ['filter'  => FILTER_VALIDATE_INT, 'options' => ['min_range' => 1, 'max_range' => 120]],
2155
     *     'email' => FILTER_VALIDATE_EMAIL,
2156
     * ];
2157
     *
2158
     * $data = [
2159
     *     'name' => 'κόσμε',
2160
     *     'age' => '18',
2161
     *     'email' => '[email protected]'
2162
     * ];
2163
     *
2164
     * UTF8::filter_var_array($data, $filters, true); // ['name' => 'Κόσμε', 'age' => 18, 'email' => '[email protected]']
2165
     * </code>
2166
     *
2167
     * @see http://php.net/manual/en/function.filter-var-array.php
2168
     *
2169
     * @param array<mixed>   $data       <p>
2170
     *                                   An array with string keys containing the data to filter.
2171
     *                                   </p>
2172
     * @param array|int|null $definition [optional] <p>
2173
     *                                   An array defining the arguments. A valid key is a string
2174
     *                                   containing a variable name and a valid value is either a
2175
     *                                   filter type, or an
2176
     *                                   array optionally specifying the filter, flags and options.
2177
     *                                   If the value is an array, valid keys are filter
2178
     *                                   which specifies the filter type,
2179
     *                                   flags which specifies any flags that apply to the
2180
     *                                   filter, and options which specifies any options that
2181
     *                                   apply to the filter. See the example below for a better understanding.
2182
     *                                   </p>
2183
     *                                   <p>
2184
     *                                   This parameter can be also an integer holding a filter constant. Then all values
2185
     *                                   in the input array are filtered by this filter.
2186
     *                                   </p>
2187
     * @param bool           $add_empty  [optional] <p>
2188
     *                                   Add missing keys as <b>NULL</b> to the return value.
2189
     *                                   </p>
2190
     *
2191
     * @psalm-pure
2192
     *
2193
     * @return mixed
2194
     *               <p>
2195
     *               An array containing the values of the requested variables on success, or <b>FALSE</b> on failure.
2196
     *               An array value will be <b>FALSE</b> if the filter fails, or <b>NULL</b> if the variable is not
2197
     *               set.
2198
     *               </p>
2199
     */
2200 2
    public static function filter_var_array(
2201
        array $data,
2202
        $definition = null,
2203
        bool $add_empty = true
2204
    ) {
2205
        /**
2206
         * @psalm-suppress ImpureFunctionCall - we use func_num_args only for args count matching here
2207
         */
2208 2
        if (\func_num_args() < 2) {
2209 2
            $a = \filter_var_array($data);
2210
        } else {
2211 2
            $a = \filter_var_array($data, $definition, $add_empty);
0 ignored issues
show
Bug introduced by
It seems like $definition can also be of type null; however, parameter $options of filter_var_array() does only seem to accept array|integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

2211
            $a = \filter_var_array($data, /** @scrutinizer ignore-type */ $definition, $add_empty);
Loading history...
2212
        }
2213
2214 2
        return self::filter($a);
2215
    }
2216
2217
    /**
2218
     * Checks whether finfo is available on the server.
2219
     *
2220
     * @psalm-pure
2221
     *
2222
     * @return bool
2223
     *              <p><strong>true</strong> if available, <strong>false</strong> otherwise</p>
2224
     *
2225
     * @internal <p>Please do not use it anymore, we will make is private in next major version.</p>
2226
     */
2227
    public static function finfo_loaded(): bool
2228
    {
2229
        return \class_exists('finfo');
2230
    }
2231
2232
    /**
2233
     * Returns the first $n characters of the string.
2234
     *
2235
     * @param string $str      <p>The input string.</p>
2236
     * @param int    $n        <p>Number of characters to retrieve from the start.</p>
2237
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
2238
     *
2239
     * @psalm-pure
2240
     *
2241
     * @return string
2242
     */
2243 13
    public static function first_char(
2244
        string $str,
2245
        int $n = 1,
2246
        string $encoding = 'UTF-8'
2247
    ): string {
2248 13
        if ($str === '' || $n <= 0) {
2249 5
            return '';
2250
        }
2251
2252 8
        if ($encoding === 'UTF-8') {
2253 4
            return (string) \mb_substr($str, 0, $n);
2254
        }
2255
2256 4
        return (string) self::substr($str, 0, $n, $encoding);
2257
    }
2258
2259
    /**
2260
     * Check if the number of Unicode characters isn't greater than the specified integer.
2261
     *
2262
     * EXAMPLE: <code>UTF8::fits_inside('κόσμε', 6); // false</code>
2263
     *
2264
     * @param string $str      the original string to be checked
2265
     * @param int    $box_size the size in number of chars to be checked against string
2266
     *
2267
     * @psalm-pure
2268
     *
2269
     * @return bool
2270
     *              <p><strong>TRUE</strong> if string is less than or equal to $box_size, <strong>FALSE</strong> otherwise.</p>
2271
     */
2272 2
    public static function fits_inside(string $str, int $box_size): bool
2273
    {
2274 2
        return (int) self::strlen($str) <= $box_size;
2275
    }
2276
2277
    /**
2278
     * Try to fix simple broken UTF-8 strings.
2279
     *
2280
     * INFO: Take a look at "UTF8::fix_utf8()" if you need a more advanced fix for broken UTF-8 strings.
2281
     *
2282
     * EXAMPLE: <code>UTF8::fix_simple_utf8('Düsseldorf'); // 'Düsseldorf'</code>
2283
     *
2284
     * If you received an UTF-8 string that was converted from Windows-1252 as it was ISO-8859-1
2285
     * (ignoring Windows-1252 chars from 80 to 9F) use this function to fix it.
2286
     * See: http://en.wikipedia.org/wiki/Windows-1252
2287
     *
2288
     * @param string $str <p>The input string</p>
2289
     *
2290
     * @psalm-pure
2291
     *
2292
     * @return string
2293
     */
2294 46
    public static function fix_simple_utf8(string $str): string
2295
    {
2296 46
        if ($str === '') {
2297 4
            return '';
2298
        }
2299
2300
        /**
2301
         * @psalm-suppress ImpureStaticVariable
2302
         *
2303
         * @var array<mixed>|null
2304
         */
2305 46
        static $BROKEN_UTF8_TO_UTF8_KEYS_CACHE = null;
2306
2307
        /**
2308
         * @psalm-suppress ImpureStaticVariable
2309
         *
2310
         * @var array<mixed>|null
2311
         */
2312 46
        static $BROKEN_UTF8_TO_UTF8_VALUES_CACHE = null;
2313
2314 46
        if ($BROKEN_UTF8_TO_UTF8_KEYS_CACHE === null) {
2315 1
            if (self::$BROKEN_UTF8_FIX === null) {
2316 1
                self::$BROKEN_UTF8_FIX = self::getData('utf8_fix');
2317
            }
2318
2319 1
            $BROKEN_UTF8_TO_UTF8_KEYS_CACHE = \array_keys(self::$BROKEN_UTF8_FIX ?: []);
2320 1
            $BROKEN_UTF8_TO_UTF8_VALUES_CACHE = self::$BROKEN_UTF8_FIX;
2321
        }
2322
2323
        \assert(\is_array($BROKEN_UTF8_TO_UTF8_VALUES_CACHE));
2324
2325 46
        return \str_replace($BROKEN_UTF8_TO_UTF8_KEYS_CACHE, $BROKEN_UTF8_TO_UTF8_VALUES_CACHE, $str);
2326
    }
2327
2328
    /**
2329
     * Fix a double (or multiple) encoded UTF8 string.
2330
     *
2331
     * EXAMPLE: <code>UTF8::fix_utf8('Fédération'); // 'Fédération'</code>
2332
     *
2333
     * @param string|string[] $str you can use a string or an array of strings
2334
     *
2335
     * @psalm-pure
2336
     *
2337
     * @return string|string[]
2338
     *                         <p>Will return the fixed input-"array" or
2339
     *                         the fixed input-"string".</p>
2340
     *
2341
     * @template TFixUtf8
2342
     * @phpstan-param TFixUtf8 $str
2343
     * @phpstan-return TFixUtf8
2344
     */
2345 2
    public static function fix_utf8($str)
2346
    {
2347 2
        if (\is_array($str)) {
2348 2
            foreach ($str as &$v) {
2349 2
                $v = self::fix_utf8($v);
2350
            }
2351 2
            unset($v);
2352
2353
            /**
2354
             * @psalm-suppress InvalidReturnStatement
2355
             */
2356 2
            return $str;
2357
        }
2358
2359 2
        $str = (string) $str;
2360 2
        $last = '';
2361 2
        while ($last !== $str) {
2362 2
            $last = $str;
2363
            /**
2364
             * @psalm-suppress PossiblyInvalidArgument
2365
             */
2366 2
            $str = self::to_utf8(
2367 2
                self::utf8_decode($str, true)
2368
            );
2369
        }
2370
2371
        /**
2372
         * @psalm-suppress InvalidReturnStatement
2373
         */
2374 2
        return $str;
2375
    }
2376
2377
    /**
2378
     * Get character of a specific character.
2379
     *
2380
     * EXAMPLE: <code>UTF8::getCharDirection('ا'); // 'RTL'</code>
2381
     *
2382
     * @param string $char
2383
     *
2384
     * @psalm-pure
2385
     *
2386
     * @return string
2387
     *                <p>'RTL' or 'LTR'.</p>
2388
     */
2389 2
    public static function getCharDirection(string $char): string
2390
    {
2391 2
        if (self::$SUPPORT['intlChar'] === true) {
2392 2
            $tmp_return = \IntlChar::charDirection($char);
2393
2394
            // from "IntlChar"-Class
2395
            $char_direction = [
2396 2
                'RTL' => [1, 13, 14, 15, 21],
2397
                'LTR' => [0, 11, 12, 20],
2398
            ];
2399
2400 2
            if (\in_array($tmp_return, $char_direction['LTR'], true)) {
2401
                return 'LTR';
2402
            }
2403
2404 2
            if (\in_array($tmp_return, $char_direction['RTL'], true)) {
2405 2
                return 'RTL';
2406
            }
2407
        }
2408
2409 2
        $c = static::chr_to_decimal($char);
2410
2411 2
        if (!($c >= 0x5be && $c <= 0x10b7f)) {
2412 2
            return 'LTR';
2413
        }
2414
2415 2
        if ($c <= 0x85e) {
2416 2
            if ($c === 0x5be ||
2417 2
                $c === 0x5c0 ||
2418 2
                $c === 0x5c3 ||
2419 2
                $c === 0x5c6 ||
2420 2
                ($c >= 0x5d0 && $c <= 0x5ea) ||
2421 2
                ($c >= 0x5f0 && $c <= 0x5f4) ||
2422 2
                $c === 0x608 ||
2423 2
                $c === 0x60b ||
2424 2
                $c === 0x60d ||
2425 2
                $c === 0x61b ||
2426 2
                ($c >= 0x61e && $c <= 0x64a) ||
2427
                ($c >= 0x66d && $c <= 0x66f) ||
2428
                ($c >= 0x671 && $c <= 0x6d5) ||
2429
                ($c >= 0x6e5 && $c <= 0x6e6) ||
2430
                ($c >= 0x6ee && $c <= 0x6ef) ||
2431
                ($c >= 0x6fa && $c <= 0x70d) ||
2432
                $c === 0x710 ||
2433
                ($c >= 0x712 && $c <= 0x72f) ||
2434
                ($c >= 0x74d && $c <= 0x7a5) ||
2435
                $c === 0x7b1 ||
2436
                ($c >= 0x7c0 && $c <= 0x7ea) ||
2437
                ($c >= 0x7f4 && $c <= 0x7f5) ||
2438
                $c === 0x7fa ||
2439
                ($c >= 0x800 && $c <= 0x815) ||
2440
                $c === 0x81a ||
2441
                $c === 0x824 ||
2442
                $c === 0x828 ||
2443
                ($c >= 0x830 && $c <= 0x83e) ||
2444
                ($c >= 0x840 && $c <= 0x858) ||
2445 2
                $c === 0x85e
2446
            ) {
2447 2
                return 'RTL';
2448
            }
2449 2
        } elseif ($c === 0x200f) {
2450
            return 'RTL';
2451 2
        } elseif ($c >= 0xfb1d) {
2452 2
            if ($c === 0xfb1d ||
2453 2
                ($c >= 0xfb1f && $c <= 0xfb28) ||
2454 2
                ($c >= 0xfb2a && $c <= 0xfb36) ||
2455 2
                ($c >= 0xfb38 && $c <= 0xfb3c) ||
2456 2
                $c === 0xfb3e ||
2457 2
                ($c >= 0xfb40 && $c <= 0xfb41) ||
2458 2
                ($c >= 0xfb43 && $c <= 0xfb44) ||
2459 2
                ($c >= 0xfb46 && $c <= 0xfbc1) ||
2460 2
                ($c >= 0xfbd3 && $c <= 0xfd3d) ||
2461 2
                ($c >= 0xfd50 && $c <= 0xfd8f) ||
2462 2
                ($c >= 0xfd92 && $c <= 0xfdc7) ||
2463 2
                ($c >= 0xfdf0 && $c <= 0xfdfc) ||
2464 2
                ($c >= 0xfe70 && $c <= 0xfe74) ||
2465 2
                ($c >= 0xfe76 && $c <= 0xfefc) ||
2466 2
                ($c >= 0x10800 && $c <= 0x10805) ||
2467 2
                $c === 0x10808 ||
2468 2
                ($c >= 0x1080a && $c <= 0x10835) ||
2469 2
                ($c >= 0x10837 && $c <= 0x10838) ||
2470 2
                $c === 0x1083c ||
2471 2
                ($c >= 0x1083f && $c <= 0x10855) ||
2472 2
                ($c >= 0x10857 && $c <= 0x1085f) ||
2473 2
                ($c >= 0x10900 && $c <= 0x1091b) ||
2474 2
                ($c >= 0x10920 && $c <= 0x10939) ||
2475 2
                $c === 0x1093f ||
2476 2
                $c === 0x10a00 ||
2477 2
                ($c >= 0x10a10 && $c <= 0x10a13) ||
2478 2
                ($c >= 0x10a15 && $c <= 0x10a17) ||
2479 2
                ($c >= 0x10a19 && $c <= 0x10a33) ||
2480 2
                ($c >= 0x10a40 && $c <= 0x10a47) ||
2481 2
                ($c >= 0x10a50 && $c <= 0x10a58) ||
2482 2
                ($c >= 0x10a60 && $c <= 0x10a7f) ||
2483 2
                ($c >= 0x10b00 && $c <= 0x10b35) ||
2484 2
                ($c >= 0x10b40 && $c <= 0x10b55) ||
2485 2
                ($c >= 0x10b58 && $c <= 0x10b72) ||
2486 2
                ($c >= 0x10b78)
2487
            ) {
2488 2
                return 'RTL';
2489
            }
2490
        }
2491
2492 2
        return 'LTR';
2493
    }
2494
2495
    /**
2496
     * Check for php-support.
2497
     *
2498
     * @param string|null $key
2499
     *
2500
     * @psalm-pure
2501
     *
2502
     * @return mixed
2503
     *               Return the full support-"array", if $key === null<br>
2504
     *               return bool-value, if $key is used and available<br>
2505
     *               otherwise return <strong>null</strong>
2506
     */
2507 27
    public static function getSupportInfo(string $key = null)
2508
    {
2509 27
        if ($key === null) {
2510 4
            return self::$SUPPORT;
2511
        }
2512
2513 25
        if (self::$INTL_TRANSLITERATOR_LIST === null) {
2514 1
            self::$INTL_TRANSLITERATOR_LIST = self::getData('transliterator_list');
2515
        }
2516
        // compatibility fix for old versions
2517 25
        self::$SUPPORT['intl__transliterator_list_ids'] = self::$INTL_TRANSLITERATOR_LIST;
2518
2519 25
        return self::$SUPPORT[$key] ?? null;
2520
    }
2521
2522
    /**
2523
     * Warning: this method only works for some file-types (png, jpg)
2524
     *          if you need more supported types, please use e.g. "finfo"
2525
     *
2526
     * @param string $str
2527
     * @param array  $fallback <p>with this keys: 'ext', 'mime', 'type'
2528
     *
2529
     * @psalm-pure
2530
     *
2531
     * @return null[]|string[]
2532
     *                         <p>with this keys: 'ext', 'mime', 'type'</p>
2533
     *
2534
     * @phpstan-param array{ext: null|string, mime: null|string, type: null|string} $fallback
2535
     */
2536 39
    public static function get_file_type(
2537
        string $str,
2538
        array $fallback = [
2539
            'ext'  => null,
2540
            'mime' => 'application/octet-stream',
2541
            'type' => null,
2542
        ]
2543
    ): array {
2544 39
        if ($str === '') {
2545
            return $fallback;
2546
        }
2547
2548
        /** @var false|string $str_info - needed for PhpStan (stubs error) */
2549 39
        $str_info = \substr($str, 0, 2);
2550 39
        if ($str_info === false || \strlen($str_info) !== 2) {
2551 10
            return $fallback;
2552
        }
2553
2554
        // DEBUG
2555
        //var_dump($str_info);
2556
2557 36
        $str_info = \unpack('C2chars', $str_info);
2558
2559 36
        if ($str_info === false) {
2560
            return $fallback;
2561
        }
2562 36
        $type_code = (int) ($str_info['chars1'] . $str_info['chars2']);
2563
2564
        // DEBUG
2565
        //var_dump($type_code);
2566
2567
        //
2568
        // info: https://en.wikipedia.org/wiki/Magic_number_%28programming%29#Format_indicator
2569
        //
2570
        switch ($type_code) {
2571
            // WARNING: do not add too simple comparisons, because of false-positive results:
2572
            //
2573
            // 3780 => 'pdf', 7790 => 'exe', 7784 => 'midi', 8075 => 'zip',
2574
            // 8297 => 'rar', 7173 => 'gif', 7373 => 'tiff' 6677 => 'bmp', ...
2575
            //
2576 36
            case 255216:
2577
                $ext = 'jpg';
2578
                $mime = 'image/jpeg';
2579
                $type = 'binary';
2580
2581
                break;
2582 36
            case 13780:
2583 7
                $ext = 'png';
2584 7
                $mime = 'image/png';
2585 7
                $type = 'binary';
2586
2587 7
                break;
2588
            default:
2589 35
                return $fallback;
2590
        }
2591
2592
        return [
2593 7
            'ext'  => $ext,
2594 7
            'mime' => $mime,
2595 7
            'type' => $type,
2596
        ];
2597
    }
2598
2599
    /**
2600
     * @param int    $length         <p>Length of the random string.</p>
2601
     * @param string $possible_chars [optional] <p>Characters string for the random selection.</p>
2602
     * @param string $encoding       [optional] <p>Set the charset for e.g. "mb_" function</p>
2603
     *
2604
     * @return string
2605
     */
2606 1
    public static function get_random_string(
2607
        int $length,
2608
        string $possible_chars = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789',
2609
        string $encoding = 'UTF-8'
2610
    ): string {
2611
        // init
2612 1
        $i = 0;
2613 1
        $str = '';
2614
2615
        //
2616
        // add random chars
2617
        //
2618
2619 1
        if ($encoding === 'UTF-8') {
2620 1
            $max_length = (int) \mb_strlen($possible_chars);
2621 1
            if ($max_length === 0) {
2622 1
                return '';
2623
            }
2624
2625 1
            while ($i < $length) {
2626
                try {
2627 1
                    $rand_int = \random_int(0, $max_length - 1);
2628
                } catch (\Exception $e) {
2629
                    $rand_int = \mt_rand(0, $max_length - 1);
2630
                }
2631 1
                $char = \mb_substr($possible_chars, $rand_int, 1);
2632 1
                if ($char !== false) {
2633 1
                    $str .= $char;
2634 1
                    ++$i;
2635
                }
2636
            }
2637
        } else {
2638
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
2639
2640
            $max_length = (int) self::strlen($possible_chars, $encoding);
2641
            if ($max_length === 0) {
2642
                return '';
2643
            }
2644
2645
            while ($i < $length) {
2646
                try {
2647
                    $rand_int = \random_int(0, $max_length - 1);
2648
                } catch (\Exception $e) {
2649
                    $rand_int = \mt_rand(0, $max_length - 1);
2650
                }
2651
                $char = self::substr($possible_chars, $rand_int, 1, $encoding);
2652
                if ($char !== false) {
2653
                    $str .= $char;
2654
                    ++$i;
2655
                }
2656
            }
2657
        }
2658
2659 1
        return $str;
2660
    }
2661
2662
    /**
2663
     * @param int|string $extra_entropy [optional] <p>Extra entropy via a string or int value.</p>
2664
     * @param bool       $use_md5       [optional] <p>Return the unique identifier as md5-hash? Default: true</p>
2665
     *
2666
     * @return string
2667
     */
2668 1
    public static function get_unique_string($extra_entropy = '', bool $use_md5 = true): string
2669
    {
2670
        try {
2671 1
            $rand_int = \random_int(0, \mt_getrandmax());
2672
        } catch (\Exception $e) {
2673
            $rand_int = \mt_rand(0, \mt_getrandmax());
2674
        }
2675
2676
        $unique_helper = $rand_int .
2677 1
                         \session_id() .
2678 1
                         ($_SERVER['REMOTE_ADDR'] ?? '') .
2679 1
                         ($_SERVER['SERVER_ADDR'] ?? '') .
2680 1
                         $extra_entropy;
2681
2682 1
        $unique_string = \uniqid($unique_helper, true);
2683
2684 1
        if ($use_md5) {
2685 1
            $unique_string = \md5($unique_string . $unique_helper);
2686
        }
2687
2688 1
        return $unique_string;
2689
    }
2690
2691
    /**
2692
     * Returns true if the string contains a lower case char, false otherwise.
2693
     *
2694
     * @param string $str <p>The input string.</p>
2695
     *
2696
     * @psalm-pure
2697
     *
2698
     * @return bool
2699
     *              <p>Whether or not the string contains a lower case character.</p>
2700
     */
2701 47
    public static function has_lowercase(string $str): bool
2702
    {
2703 47
        if (self::$SUPPORT['mbstring'] === true) {
2704 47
            return \mb_ereg_match('.*[[:lower:]]', $str);
2705
        }
2706
2707
        return self::str_matches_pattern($str, '.*[[:lower:]]');
2708
    }
2709
2710
    /**
2711
     * Returns true if the string contains whitespace, false otherwise.
2712
     *
2713
     * @param string $str <p>The input string.</p>
2714
     *
2715
     * @psalm-pure
2716
     *
2717
     * @return bool
2718
     *              <p>Whether or not the string contains whitespace.</p>
2719
     */
2720 11
    public static function has_whitespace(string $str): bool
2721
    {
2722 11
        if (self::$SUPPORT['mbstring'] === true) {
2723 11
            return \mb_ereg_match('.*[[:space:]]', $str);
2724
        }
2725
2726
        return self::str_matches_pattern($str, '.*[[:space:]]');
2727
    }
2728
2729
    /**
2730
     * Returns true if the string contains an upper case char, false otherwise.
2731
     *
2732
     * @param string $str <p>The input string.</p>
2733
     *
2734
     * @psalm-pure
2735
     *
2736
     * @return bool
2737
     *              <p>Whether or not the string contains an upper case character.</p>
2738
     */
2739 12
    public static function has_uppercase(string $str): bool
2740
    {
2741 12
        if (self::$SUPPORT['mbstring'] === true) {
2742 12
            return \mb_ereg_match('.*[[:upper:]]', $str);
2743
        }
2744
2745
        return self::str_matches_pattern($str, '.*[[:upper:]]');
2746
    }
2747
2748
    /**
2749
     * Converts a hexadecimal value into a UTF-8 character.
2750
     *
2751
     * INFO: opposite to UTF8::chr_to_hex()
2752
     *
2753
     * EXAMPLE: <code>UTF8::hex_to_chr('U+00a7'); // '§'</code>
2754
     *
2755
     * @param string $hexdec <p>The hexadecimal value.</p>
2756
     *
2757
     * @psalm-pure
2758
     *
2759
     * @return false|string one single UTF-8 character
2760
     */
2761 4
    public static function hex_to_chr(string $hexdec)
2762
    {
2763
        /** @noinspection PhpUsageOfSilenceOperatorInspection - Invalid characters passed for attempted conversion, these have been ignored */
2764 4
        return self::decimal_to_chr((int) @\hexdec($hexdec));
2765
    }
2766
2767
    /**
2768
     * Converts hexadecimal U+xxxx code point representation to integer.
2769
     *
2770
     * INFO: opposite to UTF8::int_to_hex()
2771
     *
2772
     * EXAMPLE: <code>UTF8::hex_to_int('U+00f1'); // 241</code>
2773
     *
2774
     * @param string $hexdec <p>The hexadecimal code point representation.</p>
2775
     *
2776
     * @psalm-pure
2777
     *
2778
     * @return false|int
2779
     *                   <p>The code point, or false on failure.</p>
2780
     */
2781 2
    public static function hex_to_int($hexdec)
2782
    {
2783
        // init
2784 2
        $hexdec = (string) $hexdec;
2785
2786 2
        if ($hexdec === '') {
2787 2
            return false;
2788
        }
2789
2790 2
        if (\preg_match('/^(?:\\\u|U\+|)([a-zA-Z0-9]{4,6})$/', $hexdec, $match)) {
2791 2
            return \intval($match[1], 16);
2792
        }
2793
2794 2
        return false;
2795
    }
2796
2797
    /**
2798
     * Converts a UTF-8 string to a series of HTML numbered entities.
2799
     *
2800
     * INFO: opposite to UTF8::html_decode()
2801
     *
2802
     * EXAMPLE: <code>UTF8::html_encode('中文空白'); // '&#20013;&#25991;&#31354;&#30333;'</code>
2803
     *
2804
     * @param string $str              <p>The Unicode string to be encoded as numbered entities.</p>
2805
     * @param bool   $keep_ascii_chars [optional] <p>Keep ASCII chars.</p>
2806
     * @param string $encoding         [optional] <p>Set the charset for e.g. "mb_" function</p>
2807
     *
2808
     * @psalm-pure
2809
     *
2810
     * @return string HTML numbered entities
2811
     */
2812 14
    public static function html_encode(
2813
        string $str,
2814
        bool $keep_ascii_chars = false,
2815
        string $encoding = 'UTF-8'
2816
    ): string {
2817 14
        if ($str === '') {
2818 4
            return '';
2819
        }
2820
2821 14
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
2822 4
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
2823
        }
2824
2825
        // INFO: http://stackoverflow.com/questions/35854535/better-explanation-of-convmap-in-mb-encode-numericentity
2826 14
        if (self::$SUPPORT['mbstring'] === true) {
2827 14
            if ($keep_ascii_chars) {
2828 13
                $start_code = 0x80;
2829
            } else {
2830 3
                $start_code = 0x00;
2831
            }
2832
2833 14
            if ($encoding === 'UTF-8') {
2834
                /** @var false|string|null $return - needed for PhpStan (stubs error) */
2835 14
                $return = \mb_encode_numericentity(
0 ignored issues
show
Bug introduced by
The call to mb_encode_numericentity() has too few arguments starting with encoding. ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-call  annotation

2835
                $return = /** @scrutinizer ignore-call */ \mb_encode_numericentity(

This check compares calls to functions or methods with their respective definitions. If the call has less arguments than are defined, it raises an issue.

If a function is defined several times with a different number of parameters, the check may pick up the wrong definition and report false positives. One codebase where this has been known to happen is Wordpress. Please note the @ignore annotation hint above.

Loading history...
2836 14
                    $str,
2837 14
                    [$start_code, 0xfffff, 0, 0xfffff]
2838
                );
2839 14
                if ($return !== null && $return !== false) {
2840 14
                    return $return;
2841
                }
2842
            }
2843
2844
            /** @var false|string|null $return - needed for PhpStan (stubs error) */
2845 4
            $return = \mb_encode_numericentity(
2846 4
                $str,
2847 4
                [$start_code, 0xfffff, 0, 0xfffff],
2848 4
                $encoding
2849
            );
2850 4
            if ($return !== null && $return !== false) {
2851 4
                return $return;
2852
            }
2853
        }
2854
2855
        //
2856
        // fallback via vanilla php
2857
        //
2858
2859
        return \implode(
2860
            '',
2861
            \array_map(
2862
                static function (string $chr) use ($keep_ascii_chars, $encoding): string {
2863
                    return self::single_chr_html_encode($chr, $keep_ascii_chars, $encoding);
2864
                },
2865
                self::str_split($str)
2866
            )
2867
        );
2868
    }
2869
2870
    /**
2871
     * UTF-8 version of html_entity_decode()
2872
     *
2873
     * The reason we are not using html_entity_decode() by itself is because
2874
     * while it is not technically correct to leave out the semicolon
2875
     * at the end of an entity most browsers will still interpret the entity
2876
     * correctly. html_entity_decode() does not convert entities without
2877
     * semicolons, so we are left with our own little solution here. Bummer.
2878
     *
2879
     * Convert all HTML entities to their applicable characters.
2880
     *
2881
     * INFO: opposite to UTF8::html_encode()
2882
     *
2883
     * EXAMPLE: <code>UTF8::html_entity_decode('&#20013;&#25991;&#31354;&#30333;'); // '中文空白'</code>
2884
     *
2885
     * @see http://php.net/manual/en/function.html-entity-decode.php
2886
     *
2887
     * @param string   $str      <p>
2888
     *                           The input string.
2889
     *                           </p>
2890
     * @param int|null $flags    [optional] <p>
2891
     *                           A bitmask of one or more of the following flags, which specify how to handle quotes
2892
     *                           and which document type to use. The default is ENT_COMPAT | ENT_HTML401.
2893
     *                           <table>
2894
     *                           Available <i>flags</i> constants
2895
     *                           <tr valign="top">
2896
     *                           <td>Constant Name</td>
2897
     *                           <td>Description</td>
2898
     *                           </tr>
2899
     *                           <tr valign="top">
2900
     *                           <td><b>ENT_COMPAT</b></td>
2901
     *                           <td>Will convert double-quotes and leave single-quotes alone.</td>
2902
     *                           </tr>
2903
     *                           <tr valign="top">
2904
     *                           <td><b>ENT_QUOTES</b></td>
2905
     *                           <td>Will convert both double and single quotes.</td>
2906
     *                           </tr>
2907
     *                           <tr valign="top">
2908
     *                           <td><b>ENT_NOQUOTES</b></td>
2909
     *                           <td>Will leave both double and single quotes unconverted.</td>
2910
     *                           </tr>
2911
     *                           <tr valign="top">
2912
     *                           <td><b>ENT_HTML401</b></td>
2913
     *                           <td>
2914
     *                           Handle code as HTML 4.01.
2915
     *                           </td>
2916
     *                           </tr>
2917
     *                           <tr valign="top">
2918
     *                           <td><b>ENT_XML1</b></td>
2919
     *                           <td>
2920
     *                           Handle code as XML 1.
2921
     *                           </td>
2922
     *                           </tr>
2923
     *                           <tr valign="top">
2924
     *                           <td><b>ENT_XHTML</b></td>
2925
     *                           <td>
2926
     *                           Handle code as XHTML.
2927
     *                           </td>
2928
     *                           </tr>
2929
     *                           <tr valign="top">
2930
     *                           <td><b>ENT_HTML5</b></td>
2931
     *                           <td>
2932
     *                           Handle code as HTML 5.
2933
     *                           </td>
2934
     *                           </tr>
2935
     *                           </table>
2936
     *                           </p>
2937
     * @param string   $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
2938
     *
2939
     * @psalm-pure
2940
     *
2941
     * @return string the decoded string
2942
     */
2943 34
    public static function html_entity_decode(
2944
        string $str,
2945
        int $flags = null,
2946
        string $encoding = 'UTF-8'
2947
    ): string {
2948
        if (
2949 34
            !isset($str[3]) // examples: &; || &x;
2950
            ||
2951 34
            \strpos($str, '&') === false // no "&"
2952
        ) {
2953 23
            return $str;
2954
        }
2955
2956 34
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
2957 9
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
2958
        }
2959
2960 34
        if ($flags === null) {
2961 11
            $flags = \ENT_QUOTES | \ENT_HTML5;
2962
        }
2963
2964
        if (
2965 34
            $encoding !== 'UTF-8'
2966
            &&
2967 34
            $encoding !== 'ISO-8859-1'
2968
            &&
2969 34
            $encoding !== 'WINDOWS-1252'
2970
            &&
2971 34
            self::$SUPPORT['mbstring'] === false
2972
        ) {
2973
            /**
2974
             * @psalm-suppress ImpureFunctionCall - is is only a warning
2975
             */
2976
            \trigger_error('UTF8::html_entity_decode() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
2977
        }
2978
2979
        do {
2980 34
            $str_compare = $str;
2981
2982 34
            if (\strpos($str, '&') !== false) {
2983 34
                if (\strpos($str, '&#') !== false) {
2984
                    // decode also numeric & UTF16 two byte entities
2985 25
                    $str = (string) \preg_replace(
2986 25
                        '/(&#(?:x0*[0-9a-fA-F]{2,6}(?![0-9a-fA-F;])|(?:0*\d{2,6}(?![0-9;]))))/S',
2987 25
                        '$1;',
2988 25
                        $str
2989
                    );
2990
                }
2991
2992 34
                $str = \html_entity_decode(
2993 34
                    $str,
2994 34
                    $flags,
2995 34
                    $encoding
2996
                );
2997
            }
2998 34
        } while ($str_compare !== $str);
2999
3000 34
        return $str;
3001
    }
3002
3003
    /**
3004
     * Create a escape html version of the string via "UTF8::htmlspecialchars()".
3005
     *
3006
     * @param string $str
3007
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
3008
     *
3009
     * @psalm-pure
3010
     *
3011
     * @return string
3012
     */
3013 6
    public static function html_escape(string $str, string $encoding = 'UTF-8'): string
3014
    {
3015 6
        return self::htmlspecialchars(
3016 6
            $str,
3017 6
            \ENT_QUOTES | \ENT_SUBSTITUTE,
3018 6
            $encoding
3019
        );
3020
    }
3021
3022
    /**
3023
     * Remove empty html-tag.
3024
     *
3025
     * e.g.: <pre><tag></tag></pre>
3026
     *
3027
     * @param string $str
3028
     *
3029
     * @psalm-pure
3030
     *
3031
     * @return string
3032
     */
3033 1
    public static function html_stripe_empty_tags(string $str): string
3034
    {
3035 1
        return (string) \preg_replace(
3036 1
            '/<[^\\/>]*?>\\s*?<\\/[^>]*?>/u',
3037 1
            '',
3038 1
            $str
3039
        );
3040
    }
3041
3042
    /**
3043
     * Convert all applicable characters to HTML entities: UTF-8 version of htmlentities().
3044
     *
3045
     * EXAMPLE: <code>UTF8::htmlentities('<白-öäü>'); // '&lt;&#30333;-&ouml;&auml;&uuml;&gt;'</code>
3046
     *
3047
     * @see http://php.net/manual/en/function.htmlentities.php
3048
     *
3049
     * @param string $str           <p>
3050
     *                              The input string.
3051
     *                              </p>
3052
     * @param int    $flags         [optional] <p>
3053
     *                              A bitmask of one or more of the following flags, which specify how to handle
3054
     *                              quotes, invalid code unit sequences and the used document type. The default is
3055
     *                              ENT_COMPAT | ENT_HTML401.
3056
     *                              <table>
3057
     *                              Available <i>flags</i> constants
3058
     *                              <tr valign="top">
3059
     *                              <td>Constant Name</td>
3060
     *                              <td>Description</td>
3061
     *                              </tr>
3062
     *                              <tr valign="top">
3063
     *                              <td><b>ENT_COMPAT</b></td>
3064
     *                              <td>Will convert double-quotes and leave single-quotes alone.</td>
3065
     *                              </tr>
3066
     *                              <tr valign="top">
3067
     *                              <td><b>ENT_QUOTES</b></td>
3068
     *                              <td>Will convert both double and single quotes.</td>
3069
     *                              </tr>
3070
     *                              <tr valign="top">
3071
     *                              <td><b>ENT_NOQUOTES</b></td>
3072
     *                              <td>Will leave both double and single quotes unconverted.</td>
3073
     *                              </tr>
3074
     *                              <tr valign="top">
3075
     *                              <td><b>ENT_IGNORE</b></td>
3076
     *                              <td>
3077
     *                              Silently discard invalid code unit sequences instead of returning
3078
     *                              an empty string. Using this flag is discouraged as it
3079
     *                              may have security implications.
3080
     *                              </td>
3081
     *                              </tr>
3082
     *                              <tr valign="top">
3083
     *                              <td><b>ENT_SUBSTITUTE</b></td>
3084
     *                              <td>
3085
     *                              Replace invalid code unit sequences with a Unicode Replacement Character
3086
     *                              U+FFFD (UTF-8) or &#38;#38;#FFFD; (otherwise) instead of returning an empty
3087
     *                              string.
3088
     *                              </td>
3089
     *                              </tr>
3090
     *                              <tr valign="top">
3091
     *                              <td><b>ENT_DISALLOWED</b></td>
3092
     *                              <td>
3093
     *                              Replace invalid code points for the given document type with a
3094
     *                              Unicode Replacement Character U+FFFD (UTF-8) or &#38;#38;#FFFD;
3095
     *                              (otherwise) instead of leaving them as is. This may be useful, for
3096
     *                              instance, to ensure the well-formedness of XML documents with
3097
     *                              embedded external content.
3098
     *                              </td>
3099
     *                              </tr>
3100
     *                              <tr valign="top">
3101
     *                              <td><b>ENT_HTML401</b></td>
3102
     *                              <td>
3103
     *                              Handle code as HTML 4.01.
3104
     *                              </td>
3105
     *                              </tr>
3106
     *                              <tr valign="top">
3107
     *                              <td><b>ENT_XML1</b></td>
3108
     *                              <td>
3109
     *                              Handle code as XML 1.
3110
     *                              </td>
3111
     *                              </tr>
3112
     *                              <tr valign="top">
3113
     *                              <td><b>ENT_XHTML</b></td>
3114
     *                              <td>
3115
     *                              Handle code as XHTML.
3116
     *                              </td>
3117
     *                              </tr>
3118
     *                              <tr valign="top">
3119
     *                              <td><b>ENT_HTML5</b></td>
3120
     *                              <td>
3121
     *                              Handle code as HTML 5.
3122
     *                              </td>
3123
     *                              </tr>
3124
     *                              </table>
3125
     *                              </p>
3126
     * @param string $encoding      [optional] <p>
3127
     *                              Like <b>htmlspecialchars</b>,
3128
     *                              <b>htmlentities</b> takes an optional third argument
3129
     *                              <i>encoding</i> which defines encoding used in
3130
     *                              conversion.
3131
     *                              Although this argument is technically optional, you are highly
3132
     *                              encouraged to specify the correct value for your code.
3133
     *                              </p>
3134
     * @param bool   $double_encode [optional] <p>
3135
     *                              When <i>double_encode</i> is turned off PHP will not
3136
     *                              encode existing html entities. The default is to convert everything.
3137
     *                              </p>
3138
     *
3139
     * @psalm-pure
3140
     *
3141
     * @return string
3142
     *                <p>
3143
     *                The encoded string.
3144
     *                <br><br>
3145
     *                If the input <i>string</i> contains an invalid code unit
3146
     *                sequence within the given <i>encoding</i> an empty string
3147
     *                will be returned, unless either the <b>ENT_IGNORE</b> or
3148
     *                <b>ENT_SUBSTITUTE</b> flags are set.
3149
     *                </p>
3150
     */
3151 9
    public static function htmlentities(
3152
        string $str,
3153
        int $flags = \ENT_COMPAT,
3154
        string $encoding = 'UTF-8',
3155
        bool $double_encode = true
3156
    ): string {
3157 9
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
3158 7
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
3159
        }
3160
3161 9
        $str = \htmlentities(
3162 9
            $str,
3163 9
            $flags,
3164 9
            $encoding,
3165 9
            $double_encode
3166
        );
3167
3168
        /**
3169
         * PHP doesn't replace a backslash to its html entity since this is something
3170
         * that's mostly used to escape characters when inserting in a database. Since
3171
         * we're using a decent database layer, we don't need this shit and we're replacing
3172
         * the double backslashes by its' html entity equivalent.
3173
         *
3174
         * https://github.com/forkcms/library/blob/master/spoon/filter/filter.php#L303
3175
         */
3176 9
        $str = \str_replace('\\', '&#92;', $str);
3177
3178 9
        return self::html_encode($str, true, $encoding);
3179
    }
3180
3181
    /**
3182
     * Convert only special characters to HTML entities: UTF-8 version of htmlspecialchars()
3183
     *
3184
     * INFO: Take a look at "UTF8::htmlentities()"
3185
     *
3186
     * EXAMPLE: <code>UTF8::htmlspecialchars('<白-öäü>'); // '&lt;白-öäü&gt;'</code>
3187
     *
3188
     * @see http://php.net/manual/en/function.htmlspecialchars.php
3189
     *
3190
     * @param string $str           <p>
3191
     *                              The string being converted.
3192
     *                              </p>
3193
     * @param int    $flags         [optional] <p>
3194
     *                              A bitmask of one or more of the following flags, which specify how to handle
3195
     *                              quotes, invalid code unit sequences and the used document type. The default is
3196
     *                              ENT_COMPAT | ENT_HTML401.
3197
     *                              <table>
3198
     *                              Available <i>flags</i> constants
3199
     *                              <tr valign="top">
3200
     *                              <td>Constant Name</td>
3201
     *                              <td>Description</td>
3202
     *                              </tr>
3203
     *                              <tr valign="top">
3204
     *                              <td><b>ENT_COMPAT</b></td>
3205
     *                              <td>Will convert double-quotes and leave single-quotes alone.</td>
3206
     *                              </tr>
3207
     *                              <tr valign="top">
3208
     *                              <td><b>ENT_QUOTES</b></td>
3209
     *                              <td>Will convert both double and single quotes.</td>
3210
     *                              </tr>
3211
     *                              <tr valign="top">
3212
     *                              <td><b>ENT_NOQUOTES</b></td>
3213
     *                              <td>Will leave both double and single quotes unconverted.</td>
3214
     *                              </tr>
3215
     *                              <tr valign="top">
3216
     *                              <td><b>ENT_IGNORE</b></td>
3217
     *                              <td>
3218
     *                              Silently discard invalid code unit sequences instead of returning
3219
     *                              an empty string. Using this flag is discouraged as it
3220
     *                              may have security implications.
3221
     *                              </td>
3222
     *                              </tr>
3223
     *                              <tr valign="top">
3224
     *                              <td><b>ENT_SUBSTITUTE</b></td>
3225
     *                              <td>
3226
     *                              Replace invalid code unit sequences with a Unicode Replacement Character
3227
     *                              U+FFFD (UTF-8) or &#38;#38;#FFFD; (otherwise) instead of returning an empty
3228
     *                              string.
3229
     *                              </td>
3230
     *                              </tr>
3231
     *                              <tr valign="top">
3232
     *                              <td><b>ENT_DISALLOWED</b></td>
3233
     *                              <td>
3234
     *                              Replace invalid code points for the given document type with a
3235
     *                              Unicode Replacement Character U+FFFD (UTF-8) or &#38;#38;#FFFD;
3236
     *                              (otherwise) instead of leaving them as is. This may be useful, for
3237
     *                              instance, to ensure the well-formedness of XML documents with
3238
     *                              embedded external content.
3239
     *                              </td>
3240
     *                              </tr>
3241
     *                              <tr valign="top">
3242
     *                              <td><b>ENT_HTML401</b></td>
3243
     *                              <td>
3244
     *                              Handle code as HTML 4.01.
3245
     *                              </td>
3246
     *                              </tr>
3247
     *                              <tr valign="top">
3248
     *                              <td><b>ENT_XML1</b></td>
3249
     *                              <td>
3250
     *                              Handle code as XML 1.
3251
     *                              </td>
3252
     *                              </tr>
3253
     *                              <tr valign="top">
3254
     *                              <td><b>ENT_XHTML</b></td>
3255
     *                              <td>
3256
     *                              Handle code as XHTML.
3257
     *                              </td>
3258
     *                              </tr>
3259
     *                              <tr valign="top">
3260
     *                              <td><b>ENT_HTML5</b></td>
3261
     *                              <td>
3262
     *                              Handle code as HTML 5.
3263
     *                              </td>
3264
     *                              </tr>
3265
     *                              </table>
3266
     *                              </p>
3267
     * @param string $encoding      [optional] <p>
3268
     *                              Defines encoding used in conversion.
3269
     *                              </p>
3270
     *                              <p>
3271
     *                              For the purposes of this function, the encodings
3272
     *                              ISO-8859-1, ISO-8859-15,
3273
     *                              UTF-8, cp866,
3274
     *                              cp1251, cp1252, and
3275
     *                              KOI8-R are effectively equivalent, provided the
3276
     *                              <i>string</i> itself is valid for the encoding, as
3277
     *                              the characters affected by <b>htmlspecialchars</b> occupy
3278
     *                              the same positions in all of these encodings.
3279
     *                              </p>
3280
     * @param bool   $double_encode [optional] <p>
3281
     *                              When <i>double_encode</i> is turned off PHP will not
3282
     *                              encode existing html entities, the default is to convert everything.
3283
     *                              </p>
3284
     *
3285
     * @psalm-pure
3286
     *
3287
     * @return string the converted string.
3288
     *                </p>
3289
     *                <p>
3290
     *                If the input <i>string</i> contains an invalid code unit
3291
     *                sequence within the given <i>encoding</i> an empty string
3292
     *                will be returned, unless either the <b>ENT_IGNORE</b> or
3293
     *                <b>ENT_SUBSTITUTE</b> flags are set
3294
     */
3295 8
    public static function htmlspecialchars(
3296
        string $str,
3297
        int $flags = \ENT_COMPAT,
3298
        string $encoding = 'UTF-8',
3299
        bool $double_encode = true
3300
    ): string {
3301 8
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
3302 8
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
3303
        }
3304
3305 8
        return \htmlspecialchars(
3306 8
            $str,
3307 8
            $flags,
3308 8
            $encoding,
3309 8
            $double_encode
3310
        );
3311
    }
3312
3313
    /**
3314
     * Checks whether iconv is available on the server.
3315
     *
3316
     * @psalm-pure
3317
     *
3318
     * @return bool
3319
     *              <p><strong>true</strong> if available, <strong>false</strong> otherwise</p>
3320
     *
3321
     * @internal <p>Please do not use it anymore, we will make is private in next major version.</p>
3322
     */
3323
    public static function iconv_loaded(): bool
3324
    {
3325
        return \extension_loaded('iconv');
3326
    }
3327
3328
    /**
3329
     * Converts Integer to hexadecimal U+xxxx code point representation.
3330
     *
3331
     * INFO: opposite to UTF8::hex_to_int()
3332
     *
3333
     * EXAMPLE: <code>UTF8::int_to_hex(241); // 'U+00f1'</code>
3334
     *
3335
     * @param int    $int    <p>The integer to be converted to hexadecimal code point.</p>
3336
     * @param string $prefix [optional]
3337
     *
3338
     * @psalm-pure
3339
     *
3340
     * @return string the code point, or empty string on failure
3341
     */
3342 6
    public static function int_to_hex(int $int, string $prefix = 'U+'): string
3343
    {
3344 6
        $hex = \dechex($int);
3345
3346 6
        $hex = (\strlen($hex) < 4 ? \substr('0000' . $hex, -4) : $hex);
3347
3348 6
        return $prefix . $hex . '';
3349
    }
3350
3351
    /**
3352
     * Checks whether intl-char is available on the server.
3353
     *
3354
     * @psalm-pure
3355
     *
3356
     * @return bool
3357
     *              <p><strong>true</strong> if available, <strong>false</strong> otherwise</p>
3358
     *
3359
     * @internal <p>Please do not use it anymore, we will make is private in next major version.</p>
3360
     */
3361
    public static function intlChar_loaded(): bool
3362
    {
3363
        return \class_exists('IntlChar');
3364
    }
3365
3366
    /**
3367
     * Checks whether intl is available on the server.
3368
     *
3369
     * @psalm-pure
3370
     *
3371
     * @return bool
3372
     *              <p><strong>true</strong> if available, <strong>false</strong> otherwise</p>
3373
     *
3374
     * @internal <p>Please do not use it anymore, we will make is private in next major version.</p>
3375
     */
3376 5
    public static function intl_loaded(): bool
3377
    {
3378 5
        return \extension_loaded('intl');
3379
    }
3380
3381
    /**
3382
     * Returns true if the string contains only alphabetic chars, false otherwise.
3383
     *
3384
     * @param string $str <p>The input string.</p>
3385
     *
3386
     * @psalm-pure
3387
     *
3388
     * @return bool
3389
     *              <p>Whether or not $str contains only alphabetic chars.</p>
3390
     */
3391 10
    public static function is_alpha(string $str): bool
3392
    {
3393 10
        if (self::$SUPPORT['mbstring'] === true) {
3394 10
            return \mb_ereg_match('^[[:alpha:]]*$', $str);
3395
        }
3396
3397
        return self::str_matches_pattern($str, '^[[:alpha:]]*$');
3398
    }
3399
3400
    /**
3401
     * Returns true if the string contains only alphabetic and numeric chars, false otherwise.
3402
     *
3403
     * @param string $str <p>The input string.</p>
3404
     *
3405
     * @psalm-pure
3406
     *
3407
     * @return bool
3408
     *              <p>Whether or not $str contains only alphanumeric chars.</p>
3409
     */
3410 13
    public static function is_alphanumeric(string $str): bool
3411
    {
3412 13
        if (self::$SUPPORT['mbstring'] === true) {
3413 13
            return \mb_ereg_match('^[[:alnum:]]*$', $str);
3414
        }
3415
3416
        return self::str_matches_pattern($str, '^[[:alnum:]]*$');
3417
    }
3418
3419
    /**
3420
     * Returns true if the string contains only punctuation chars, false otherwise.
3421
     *
3422
     * @param string $str <p>The input string.</p>
3423
     *
3424
     * @psalm-pure
3425
     *
3426
     * @return bool
3427
     *              <p>Whether or not $str contains only punctuation chars.</p>
3428
     */
3429 10
    public static function is_punctuation(string $str): bool
3430
    {
3431 10
        return self::str_matches_pattern($str, '^[[:punct:]]*$');
3432
    }
3433
3434
    /**
3435
     * Returns true if the string contains only printable (non-invisible) chars, false otherwise.
3436
     *
3437
     * @param string $str                       <p>The input string.</p>
3438
     * @param bool   $ignore_control_characters [optional] <p>Ignore control characters like [LRM] or [LSEP].</p>
3439
     *
3440
     * @psalm-pure
3441
     *
3442
     * @return bool
3443
     *              <p>Whether or not $str contains only printable (non-invisible) chars.</p>
3444
     */
3445 1
    public static function is_printable(string $str, bool $ignore_control_characters = false): bool
3446
    {
3447 1
        return self::remove_invisible_characters($str, false, '', $ignore_control_characters) === $str;
3448
    }
3449
3450
    /**
3451
     * Checks if a string is 7 bit ASCII.
3452
     *
3453
     * EXAMPLE: <code>UTF8::is_ascii('白'); // false</code>
3454
     *
3455
     * @param string $str <p>The string to check.</p>
3456
     *
3457
     * @psalm-pure
3458
     *
3459
     * @return bool
3460
     *              <p>
3461
     *              <strong>true</strong> if it is ASCII<br>
3462
     *              <strong>false</strong> otherwise
3463
     *              </p>
3464
     */
3465 8
    public static function is_ascii(string $str): bool
3466
    {
3467 8
        return ASCII::is_ascii($str);
3468
    }
3469
3470
    /**
3471
     * Returns true if the string is base64 encoded, false otherwise.
3472
     *
3473
     * EXAMPLE: <code>UTF8::is_base64('4KSu4KWL4KSo4KS/4KSa'); // true</code>
3474
     *
3475
     * @param string|null $str                   <p>The input string.</p>
3476
     * @param bool        $empty_string_is_valid [optional] <p>Is an empty string valid base64 or not?</p>
3477
     *
3478
     * @psalm-pure
3479
     *
3480
     * @return bool
3481
     *              <p>Whether or not $str is base64 encoded.</p>
3482
     */
3483 16
    public static function is_base64($str, bool $empty_string_is_valid = false): bool
3484
    {
3485
        if (
3486 16
            !$empty_string_is_valid
3487
            &&
3488 16
            $str === ''
3489
        ) {
3490 3
            return false;
3491
        }
3492
3493 15
        if (!\is_string($str)) {
3494 2
            return false;
3495
        }
3496
3497 15
        $base64String = \base64_decode($str, true);
3498
3499 15
        return $base64String !== false && \base64_encode($base64String) === $str;
3500
    }
3501
3502
    /**
3503
     * Check if the input is binary... (is look like a hack).
3504
     *
3505
     * EXAMPLE: <code>UTF8::is_binary(01); // true</code>
3506
     *
3507
     * @param int|string $input
3508
     * @param bool       $strict
3509
     *
3510
     * @psalm-pure
3511
     *
3512
     * @return bool
3513
     */
3514 39
    public static function is_binary($input, bool $strict = false): bool
3515
    {
3516 39
        $input = (string) $input;
3517 39
        if ($input === '') {
3518 10
            return false;
3519
        }
3520
3521 39
        if (\preg_match('~^[01]+$~', $input)) {
3522 13
            return true;
3523
        }
3524
3525 39
        $ext = self::get_file_type($input);
3526 39
        if ($ext['type'] === 'binary') {
3527 7
            return true;
3528
        }
3529
3530 38
        if (!$strict) {
3531 7
            $test_length = \strlen($input);
3532 7
            $test_null_counting = \substr_count($input, "\x0", 0, $test_length);
3533 7
            if (($test_null_counting / $test_length) > 0.25) {
3534 5
                return true;
3535
            }
3536
        }
3537
3538 38
        if ($strict) {
3539 38
            if (self::$SUPPORT['finfo'] === false) {
3540
                throw new \RuntimeException('ext-fileinfo: is not installed');
3541
            }
3542
3543
            /**
3544
             * @psalm-suppress ImpureMethodCall - it will return the same result for the same file ...
3545
             */
3546 38
            $finfo_encoding = (new \finfo(\FILEINFO_MIME_ENCODING))->buffer($input);
3547 38
            if ($finfo_encoding && $finfo_encoding === 'binary') {
3548 20
                return true;
3549
            }
3550
        }
3551
3552 33
        return false;
3553
    }
3554
3555
    /**
3556
     * Check if the file is binary.
3557
     *
3558
     * EXAMPLE: <code>UTF8::is_binary('./utf32.txt'); // true</code>
3559
     *
3560
     * @param string $file
3561
     *
3562
     * @return bool
3563
     */
3564 6
    public static function is_binary_file($file): bool
3565
    {
3566
        // init
3567 6
        $block = '';
3568
3569 6
        $fp = \fopen($file, 'rb');
3570 6
        if (\is_resource($fp)) {
3571 6
            $block = \fread($fp, 512);
3572 6
            \fclose($fp);
3573
        }
3574
3575 6
        if ($block === '' || $block === false) {
3576 2
            return false;
3577
        }
3578
3579 6
        return self::is_binary($block, true);
3580
    }
3581
3582
    /**
3583
     * Returns true if the string contains only whitespace chars, false otherwise.
3584
     *
3585
     * @param string $str <p>The input string.</p>
3586
     *
3587
     * @psalm-pure
3588
     *
3589
     * @return bool
3590
     *              <p>Whether or not $str contains only whitespace characters.</p>
3591
     */
3592 15
    public static function is_blank(string $str): bool
3593
    {
3594 15
        if (self::$SUPPORT['mbstring'] === true) {
3595 15
            return \mb_ereg_match('^[[:space:]]*$', $str);
3596
        }
3597
3598
        return self::str_matches_pattern($str, '^[[:space:]]*$');
3599
    }
3600
3601
    /**
3602
     * Checks if the given string is equal to any "Byte Order Mark".
3603
     *
3604
     * WARNING: Use "UTF8::string_has_bom()" if you will check BOM in a string.
3605
     *
3606
     * EXAMPLE: <code>UTF8::is_bom("\xef\xbb\xbf"); // true</code>
3607
     *
3608
     * @param string $str <p>The input string.</p>
3609
     *
3610
     * @psalm-pure
3611
     *
3612
     * @return bool
3613
     *              <p><strong>true</strong> if the $utf8_chr is Byte Order Mark, <strong>false</strong> otherwise.</p>
3614
     */
3615 2
    public static function is_bom($str): bool
3616
    {
3617
        /** @noinspection PhpUnusedLocalVariableInspection */
3618 2
        foreach (self::$BOM as $bom_string => &$bom_byte_length) {
3619 2
            if ($str === $bom_string) {
3620 2
                return true;
3621
            }
3622
        }
3623
3624 2
        return false;
3625
    }
3626
3627
    /**
3628
     * Determine whether the string is considered to be empty.
3629
     *
3630
     * A variable is considered empty if it does not exist or if its value equals FALSE.
3631
     * empty() does not generate a warning if the variable does not exist.
3632
     *
3633
     * @param array|float|int|string $str
3634
     *
3635
     * @psalm-pure
3636
     *
3637
     * @return bool
3638
     *              <p>Whether or not $str is empty().</p>
3639
     */
3640 1
    public static function is_empty($str): bool
3641
    {
3642 1
        return empty($str);
3643
    }
3644
3645
    /**
3646
     * Returns true if the string contains only hexadecimal chars, false otherwise.
3647
     *
3648
     * @param string $str <p>The input string.</p>
3649
     *
3650
     * @psalm-pure
3651
     *
3652
     * @return bool
3653
     *              <p>Whether or not $str contains only hexadecimal chars.</p>
3654
     */
3655 13
    public static function is_hexadecimal(string $str): bool
3656
    {
3657 13
        if (self::$SUPPORT['mbstring'] === true) {
3658 13
            return \mb_ereg_match('^[[:xdigit:]]*$', $str);
3659
        }
3660
3661
        return self::str_matches_pattern($str, '^[[:xdigit:]]*$');
3662
    }
3663
3664
    /**
3665
     * Check if the string contains any HTML tags.
3666
     *
3667
     * EXAMPLE: <code>UTF8::is_html('<b>lall</b>'); // true</code>
3668
     *
3669
     * @param string $str <p>The input string.</p>
3670
     *
3671
     * @psalm-pure
3672
     *
3673
     * @return bool
3674
     *              <p>Whether or not $str contains html elements.</p>
3675
     */
3676 3
    public static function is_html(string $str): bool
3677
    {
3678 3
        if ($str === '') {
3679 3
            return false;
3680
        }
3681
3682
        // init
3683 3
        $matches = [];
3684
3685 3
        $str = self::emoji_encode($str); // hack for emoji support :/
3686
3687 3
        \preg_match("/<\\/?\\w+(?:(?:\\s+\\w+(?:\\s*=\\s*(?:\".*?\"|'.*?'|[^'\">\\s]+))?)*\\s*|\\s*)\\/?>/u", $str, $matches);
3688
3689 3
        return $matches !== [];
3690
    }
3691
3692
    /**
3693
     * Check if $url is an correct url.
3694
     *
3695
     * @param string $url
3696
     * @param bool   $disallow_localhost
3697
     *
3698
     * @psalm-pure
3699
     *
3700
     * @return bool
3701
     */
3702 1
    public static function is_url(string $url, bool $disallow_localhost = false): bool
3703
    {
3704 1
        if ($url === '') {
3705 1
            return false;
3706
        }
3707
3708
        // WARNING: keep this as hack protection
3709 1
        if (!self::str_istarts_with_any($url, ['http://', 'https://'])) {
3710 1
            return false;
3711
        }
3712
3713
        // e.g. -> the server itself connect to "https://foo.localhost/phpmyadmin/...
3714 1
        if ($disallow_localhost) {
3715 1
            if (self::str_istarts_with_any(
3716 1
                $url,
3717
                [
3718 1
                    'http://localhost',
3719
                    'https://localhost',
3720
                    'http://127.0.0.1',
3721
                    'https://127.0.0.1',
3722
                    'http://::1',
3723
                    'https://::1',
3724
                ]
3725
            )) {
3726 1
                return false;
3727
            }
3728
3729 1
            $regex = '/^(?:http(?:s)?:\/\/).*?(?:\.localhost)/iu';
3730 1
            if (\preg_match($regex, $url)) {
3731 1
                return false;
3732
            }
3733
        }
3734
3735
        // INFO: this is needed for e.g. "http://müller.de/" (internationalized domain names) and non ASCII-parameters
3736 1
        $regex = '/^(?:http(?:s)?:\\/\\/)(?:[\p{L}0-9][\p{L}0-9_-]*(?:\\.[\p{L}0-9][\p{L}0-9_-]*))(?:\\d+)?(?:\\/\\.*)?/iu';
3737 1
        if (\preg_match($regex, $url)) {
3738 1
            return true;
3739
        }
3740
3741 1
        return \filter_var($url, \FILTER_VALIDATE_URL) !== false;
3742
    }
3743
3744
    /**
3745
     * Try to check if "$str" is a JSON-string.
3746
     *
3747
     * EXAMPLE: <code>UTF8::is_json('{"array":[1,"¥","ä"]}'); // true</code>
3748
     *
3749
     * @param string $str                                    <p>The input string.</p>
3750
     * @param bool   $only_array_or_object_results_are_valid [optional] <p>Only array and objects are valid json
3751
     *                                                       results.</p>
3752
     *
3753
     * @return bool
3754
     *              <p>Whether or not the $str is in JSON format.</p>
3755
     */
3756 42
    public static function is_json(string $str, bool $only_array_or_object_results_are_valid = true): bool
3757
    {
3758 42
        if ($str === '') {
3759 4
            return false;
3760
        }
3761
3762 40
        if (self::$SUPPORT['json'] === false) {
3763
            throw new \RuntimeException('ext-json: is not installed');
3764
        }
3765
3766 40
        $jsonOrNull = self::json_decode($str);
3767 40
        if ($jsonOrNull === null && \strtoupper($str) !== 'NULL') {
3768 18
            return false;
3769
        }
3770
3771
        if (
3772 24
            $only_array_or_object_results_are_valid
3773
            &&
3774 24
            !\is_object($jsonOrNull)
3775
            &&
3776 24
            !\is_array($jsonOrNull)
3777
        ) {
3778 5
            return false;
3779
        }
3780
3781 19
        return \json_last_error() === \JSON_ERROR_NONE;
3782
    }
3783
3784
    /**
3785
     * @param string $str <p>The input string.</p>
3786
     *
3787
     * @psalm-pure
3788
     *
3789
     * @return bool
3790
     *              <p>Whether or not $str contains only lowercase chars.</p>
3791
     */
3792 8
    public static function is_lowercase(string $str): bool
3793
    {
3794 8
        if (self::$SUPPORT['mbstring'] === true) {
3795 8
            return \mb_ereg_match('^[[:lower:]]*$', $str);
3796
        }
3797
3798
        return self::str_matches_pattern($str, '^[[:lower:]]*$');
3799
    }
3800
3801
    /**
3802
     * Returns true if the string is serialized, false otherwise.
3803
     *
3804
     * @param string $str <p>The input string.</p>
3805
     *
3806
     * @psalm-pure
3807
     *
3808
     * @return bool
3809
     *              <p>Whether or not $str is serialized.</p>
3810
     */
3811 7
    public static function is_serialized(string $str): bool
3812
    {
3813 7
        if ($str === '') {
3814 1
            return false;
3815
        }
3816
3817
        /** @noinspection PhpUsageOfSilenceOperatorInspection */
3818
        /** @noinspection UnserializeExploitsInspection */
3819 6
        return $str === 'b:0;'
3820
               ||
3821 6
               @\unserialize($str, []) !== false;
3822
    }
3823
3824
    /**
3825
     * Returns true if the string contains only lower case chars, false
3826
     * otherwise.
3827
     *
3828
     * @param string $str <p>The input string.</p>
3829
     *
3830
     * @psalm-pure
3831
     *
3832
     * @return bool
3833
     *              <p>Whether or not $str contains only lower case characters.</p>
3834
     */
3835 8
    public static function is_uppercase(string $str): bool
3836
    {
3837 8
        if (self::$SUPPORT['mbstring'] === true) {
3838 8
            return \mb_ereg_match('^[[:upper:]]*$', $str);
3839
        }
3840
3841
        return self::str_matches_pattern($str, '^[[:upper:]]*$');
3842
    }
3843
3844
    /**
3845
     * Check if the string is UTF-16.
3846
     *
3847
     * EXAMPLE: <code>
3848
     * UTF8::is_utf16(file_get_contents('utf-16-le.txt')); // 1
3849
     * //
3850
     * UTF8::is_utf16(file_get_contents('utf-16-be.txt')); // 2
3851
     * //
3852
     * UTF8::is_utf16(file_get_contents('utf-8.txt')); // false
3853
     * </code>
3854
     *
3855
     * @param string $str                       <p>The input string.</p>
3856
     * @param bool   $check_if_string_is_binary
3857
     *
3858
     * @psalm-pure
3859
     *
3860
     * @return false|int
3861
     *                   <strong>false</strong> if is't not UTF-16,<br>
3862
     *                   <strong>1</strong> for UTF-16LE,<br>
3863
     *                   <strong>2</strong> for UTF-16BE
3864
     */
3865 21
    public static function is_utf16($str, bool $check_if_string_is_binary = true)
3866
    {
3867
        // init
3868 21
        $str = (string) $str;
3869 21
        $str_chars = [];
3870
3871
        // fix for the "binary"-check
3872 21
        if ($check_if_string_is_binary !== false && self::string_has_bom($str)) {
3873 2
            $check_if_string_is_binary = false;
3874
        }
3875
3876
        if (
3877 21
            $check_if_string_is_binary
3878
            &&
3879 21
            !self::is_binary($str, true)
3880
        ) {
3881 2
            return false;
3882
        }
3883
3884 21
        if (self::$SUPPORT['mbstring'] === false) {
3885
            /**
3886
             * @psalm-suppress ImpureFunctionCall - is is only a warning
3887
             */
3888 3
            \trigger_error('UTF8::is_utf16() without mbstring may did not work correctly', \E_USER_WARNING);
3889
        }
3890
3891 21
        $str = self::remove_bom($str);
3892
3893 21
        $maybe_utf16le = 0;
3894 21
        $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-16LE');
3895 21
        if ($test) {
3896 15
            $test2 = \mb_convert_encoding($test, 'UTF-16LE', 'UTF-8');
3897 15
            $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-16LE');
3898 15
            if ($test3 === $test) {
3899
                /**
3900
                 * @psalm-suppress RedundantCondition
3901
                 */
3902 15
                if ($str_chars === []) {
3903 15
                    $str_chars = self::count_chars($str, true, false);
3904
                }
3905 15
                foreach (self::count_chars($test3) as $test3char => &$test3charEmpty) {
0 ignored issues
show
Bug introduced by
The expression self::count_chars($test3) cannot be used as a reference.

Let?s assume that you have the following foreach statement:

foreach ($array as &$itemValue) { }

$itemValue is assigned by reference. This is possible because the expression (in the example $array) can be used as a reference target.

However, if we were to replace $array with something different like the result of a function call as in

foreach (getArray() as &$itemValue) { }

then assigning by reference is not possible anymore as there is no target that could be modified.

Available Fixes

1. Do not assign by reference
foreach (getArray() as $itemValue) { }
2. Assign to a local variable first
$array = getArray();
foreach ($array as &$itemValue) {}
3. Return a reference
function &getArray() { $array = array(); return $array; }

foreach (getArray() as &$itemValue) { }
Loading history...
Bug introduced by
It seems like $test3 can also be of type array; however, parameter $str of voku\helper\UTF8::count_chars() does only seem to accept string, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

3905
                foreach (self::count_chars(/** @scrutinizer ignore-type */ $test3) as $test3char => &$test3charEmpty) {
Loading history...
3906 15
                    if (\in_array($test3char, $str_chars, true)) {
3907 15
                        ++$maybe_utf16le;
3908
                    }
3909
                }
3910 15
                unset($test3charEmpty);
3911
            }
3912
        }
3913
3914 21
        $maybe_utf16be = 0;
3915 21
        $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-16BE');
3916 21
        if ($test) {
3917 15
            $test2 = \mb_convert_encoding($test, 'UTF-16BE', 'UTF-8');
3918 15
            $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-16BE');
3919 15
            if ($test3 === $test) {
3920 15
                if ($str_chars === []) {
3921 7
                    $str_chars = self::count_chars($str, true, false);
3922
                }
3923 15
                foreach (self::count_chars($test3) as $test3char => &$test3charEmpty) {
0 ignored issues
show
Bug introduced by
The expression self::count_chars($test3) cannot be used as a reference.

Let?s assume that you have the following foreach statement:

foreach ($array as &$itemValue) { }

$itemValue is assigned by reference. This is possible because the expression (in the example $array) can be used as a reference target.

However, if we were to replace $array with something different like the result of a function call as in

foreach (getArray() as &$itemValue) { }

then assigning by reference is not possible anymore as there is no target that could be modified.

Available Fixes

1. Do not assign by reference
foreach (getArray() as $itemValue) { }
2. Assign to a local variable first
$array = getArray();
foreach ($array as &$itemValue) {}
3. Return a reference
function &getArray() { $array = array(); return $array; }

foreach (getArray() as &$itemValue) { }
Loading history...
3924 15
                    if (\in_array($test3char, $str_chars, true)) {
3925 15
                        ++$maybe_utf16be;
3926
                    }
3927
                }
3928 15
                unset($test3charEmpty);
3929
            }
3930
        }
3931
3932 21
        if ($maybe_utf16be !== $maybe_utf16le) {
3933 7
            if ($maybe_utf16le > $maybe_utf16be) {
3934 5
                return 1;
3935
            }
3936
3937 6
            return 2;
3938
        }
3939
3940 17
        return false;
3941
    }
3942
3943
    /**
3944
     * Check if the string is UTF-32.
3945
     *
3946
     * EXAMPLE: <code>
3947
     * UTF8::is_utf32(file_get_contents('utf-32-le.txt')); // 1
3948
     * //
3949
     * UTF8::is_utf32(file_get_contents('utf-32-be.txt')); // 2
3950
     * //
3951
     * UTF8::is_utf32(file_get_contents('utf-8.txt')); // false
3952
     * </code>
3953
     *
3954
     * @param string $str                       <p>The input string.</p>
3955
     * @param bool   $check_if_string_is_binary
3956
     *
3957
     * @psalm-pure
3958
     *
3959
     * @return false|int
3960
     *                   <strong>false</strong> if is't not UTF-32,<br>
3961
     *                   <strong>1</strong> for UTF-32LE,<br>
3962
     *                   <strong>2</strong> for UTF-32BE
3963
     */
3964 19
    public static function is_utf32($str, bool $check_if_string_is_binary = true)
3965
    {
3966
        // init
3967 19
        $str = (string) $str;
3968 19
        $str_chars = [];
3969
3970
        // fix for the "binary"-check
3971 19
        if ($check_if_string_is_binary !== false && self::string_has_bom($str)) {
3972 2
            $check_if_string_is_binary = false;
3973
        }
3974
3975
        if (
3976 19
            $check_if_string_is_binary
3977
            &&
3978 19
            !self::is_binary($str, true)
3979
        ) {
3980 2
            return false;
3981
        }
3982
3983 19
        if (self::$SUPPORT['mbstring'] === false) {
3984
            /**
3985
             * @psalm-suppress ImpureFunctionCall - is is only a warning
3986
             */
3987 3
            \trigger_error('UTF8::is_utf32() without mbstring may did not work correctly', \E_USER_WARNING);
3988
        }
3989
3990 19
        $str = self::remove_bom($str);
3991
3992 19
        $maybe_utf32le = 0;
3993 19
        $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-32LE');
3994 19
        if ($test) {
3995 13
            $test2 = \mb_convert_encoding($test, 'UTF-32LE', 'UTF-8');
3996 13
            $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-32LE');
3997 13
            if ($test3 === $test) {
3998
                /**
3999
                 * @psalm-suppress RedundantCondition
4000
                 */
4001 13
                if ($str_chars === []) {
4002 13
                    $str_chars = self::count_chars($str, true, false);
4003
                }
4004 13
                foreach (self::count_chars($test3) as $test3char => &$test3charEmpty) {
0 ignored issues
show
Bug introduced by
The expression self::count_chars($test3) cannot be used as a reference.

Let?s assume that you have the following foreach statement:

foreach ($array as &$itemValue) { }

$itemValue is assigned by reference. This is possible because the expression (in the example $array) can be used as a reference target.

However, if we were to replace $array with something different like the result of a function call as in

foreach (getArray() as &$itemValue) { }

then assigning by reference is not possible anymore as there is no target that could be modified.

Available Fixes

1. Do not assign by reference
foreach (getArray() as $itemValue) { }
2. Assign to a local variable first
$array = getArray();
foreach ($array as &$itemValue) {}
3. Return a reference
function &getArray() { $array = array(); return $array; }

foreach (getArray() as &$itemValue) { }
Loading history...
Bug introduced by
It seems like $test3 can also be of type array; however, parameter $str of voku\helper\UTF8::count_chars() does only seem to accept string, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

4004
                foreach (self::count_chars(/** @scrutinizer ignore-type */ $test3) as $test3char => &$test3charEmpty) {
Loading history...
4005 13
                    if (\in_array($test3char, $str_chars, true)) {
4006 13
                        ++$maybe_utf32le;
4007
                    }
4008
                }
4009 13
                unset($test3charEmpty);
4010
            }
4011
        }
4012
4013 19
        $maybe_utf32be = 0;
4014 19
        $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-32BE');
4015 19
        if ($test) {
4016 13
            $test2 = \mb_convert_encoding($test, 'UTF-32BE', 'UTF-8');
4017 13
            $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-32BE');
4018 13
            if ($test3 === $test) {
4019 13
                if ($str_chars === []) {
4020 7
                    $str_chars = self::count_chars($str, true, false);
4021
                }
4022 13
                foreach (self::count_chars($test3) as $test3char => &$test3charEmpty) {
0 ignored issues
show
Bug introduced by
The expression self::count_chars($test3) cannot be used as a reference.

Let?s assume that you have the following foreach statement:

foreach ($array as &$itemValue) { }

$itemValue is assigned by reference. This is possible because the expression (in the example $array) can be used as a reference target.

However, if we were to replace $array with something different like the result of a function call as in

foreach (getArray() as &$itemValue) { }

then assigning by reference is not possible anymore as there is no target that could be modified.

Available Fixes

1. Do not assign by reference
foreach (getArray() as $itemValue) { }
2. Assign to a local variable first
$array = getArray();
foreach ($array as &$itemValue) {}
3. Return a reference
function &getArray() { $array = array(); return $array; }

foreach (getArray() as &$itemValue) { }
Loading history...
4023 13
                    if (\in_array($test3char, $str_chars, true)) {
4024 13
                        ++$maybe_utf32be;
4025
                    }
4026
                }
4027 13
                unset($test3charEmpty);
4028
            }
4029
        }
4030
4031 19
        if ($maybe_utf32be !== $maybe_utf32le) {
4032 3
            if ($maybe_utf32le > $maybe_utf32be) {
4033 2
                return 1;
4034
            }
4035
4036 3
            return 2;
4037
        }
4038
4039 19
        return false;
4040
    }
4041
4042
    /**
4043
     * Checks whether the passed input contains only byte sequences that appear valid UTF-8.
4044
     *
4045
     * EXAMPLE: <code>
4046
     * UTF8::is_utf8(['Iñtërnâtiônàlizætiøn', 'foo']); // true
4047
     * //
4048
     * UTF8::is_utf8(["Iñtërnâtiônàlizætiøn\xA0\xA1", 'bar']); // false
4049
     * </code>
4050
     *
4051
     * @param int|string|string[]|null $str    <p>The input to be checked.</p>
4052
     * @param bool                     $strict <p>Check also if the string is not UTF-16 or UTF-32.</p>
4053
     *
4054
     * @psalm-pure
4055
     *
4056
     * @return bool
4057
     */
4058 83
    public static function is_utf8($str, bool $strict = false): bool
4059
    {
4060 83
        if (\is_array($str)) {
4061 2
            foreach ($str as &$v) {
4062 2
                if (!self::is_utf8($v, $strict)) {
4063 2
                    return false;
4064
                }
4065
            }
4066
4067
            return true;
4068
        }
4069
4070 83
        return self::is_utf8_string((string) $str, $strict);
4071
    }
4072
4073
    /**
4074
     * (PHP 5 &gt;= 5.2.0, PECL json &gt;= 1.2.0)<br/>
4075
     * Decodes a JSON string
4076
     *
4077
     * EXAMPLE: <code>UTF8::json_decode('[1,"\u00a5","\u00e4"]'); // array(1, '¥', 'ä')</code>
4078
     *
4079
     * @see http://php.net/manual/en/function.json-decode.php
4080
     *
4081
     * @param string $json    <p>
4082
     *                        The <i>json</i> string being decoded.
4083
     *                        </p>
4084
     *                        <p>
4085
     *                        This function only works with UTF-8 encoded strings.
4086
     *                        </p>
4087
     *                        <p>PHP implements a superset of
4088
     *                        JSON - it will also encode and decode scalar types and <b>NULL</b>. The JSON standard
4089
     *                        only supports these values when they are nested inside an array or an object.
4090
     *                        </p>
4091
     * @param bool   $assoc   [optional] <p>
4092
     *                        When <b>TRUE</b>, returned objects will be converted into
4093
     *                        associative arrays.
4094
     *                        </p>
4095
     * @param int    $depth   [optional] <p>
4096
     *                        User specified recursion depth.
4097
     *                        </p>
4098
     * @param int    $options [optional] <p>
4099
     *                        Bitmask of JSON decode options. Currently only
4100
     *                        <b>JSON_BIGINT_AS_STRING</b>
4101
     *                        is supported (default is to cast large integers as floats)
4102
     *                        </p>
4103
     *
4104
     * @psalm-pure
4105
     *
4106
     * @return mixed
4107
     *               <p>The value encoded in <i>json</i> in appropriate PHP type. Values true, false and
4108
     *               null (case-insensitive) are returned as <b>TRUE</b>, <b>FALSE</b> and <b>NULL</b> respectively.
4109
     *               <b>NULL</b> is returned if the <i>json</i> cannot be decoded or if the encoded data
4110
     *               is deeper than the recursion limit.</p>
4111
     */
4112 43
    public static function json_decode(
4113
        string $json,
4114
        bool $assoc = false,
4115
        int $depth = 512,
4116
        int $options = 0
4117
    ) {
4118 43
        $json = self::filter($json);
4119
4120 43
        if (self::$SUPPORT['json'] === false) {
4121
            throw new \RuntimeException('ext-json: is not installed');
4122
        }
4123
4124 43
        if ($depth < 1) {
4125
            $depth = 1;
4126
        }
4127
4128 43
        return \json_decode($json, $assoc, $depth, $options);
4129
    }
4130
4131
    /**
4132
     * (PHP 5 &gt;= 5.2.0, PECL json &gt;= 1.2.0)<br/>
4133
     * Returns the JSON representation of a value.
4134
     *
4135
     * EXAMPLE: <code>UTF8::json_enocde(array(1, '¥', 'ä')); // '[1,"\u00a5","\u00e4"]'</code>
4136
     *
4137
     * @see http://php.net/manual/en/function.json-encode.php
4138
     *
4139
     * @param mixed $value   <p>
4140
     *                       The <i>value</i> being encoded. Can be any type except
4141
     *                       a resource.
4142
     *                       </p>
4143
     *                       <p>
4144
     *                       All string data must be UTF-8 encoded.
4145
     *                       </p>
4146
     *                       <p>PHP implements a superset of
4147
     *                       JSON - it will also encode and decode scalar types and <b>NULL</b>. The JSON standard
4148
     *                       only supports these values when they are nested inside an array or an object.
4149
     *                       </p>
4150
     * @param int   $options [optional] <p>
4151
     *                       Bitmask consisting of <b>JSON_HEX_QUOT</b>,
4152
     *                       <b>JSON_HEX_TAG</b>,
4153
     *                       <b>JSON_HEX_AMP</b>,
4154
     *                       <b>JSON_HEX_APOS</b>,
4155
     *                       <b>JSON_NUMERIC_CHECK</b>,
4156
     *                       <b>JSON_PRETTY_PRINT</b>,
4157
     *                       <b>JSON_UNESCAPED_SLASHES</b>,
4158
     *                       <b>JSON_FORCE_OBJECT</b>,
4159
     *                       <b>JSON_UNESCAPED_UNICODE</b>. The behaviour of these
4160
     *                       constants is described on
4161
     *                       the JSON constants page.
4162
     *                       </p>
4163
     * @param int   $depth   [optional] <p>
4164
     *                       Set the maximum depth. Must be greater than zero.
4165
     *                       </p>
4166
     *
4167
     * @psalm-pure
4168
     *
4169
     * @return false|string
4170
     *                      A JSON encoded <strong>string</strong> on success or<br>
4171
     *                      <strong>FALSE</strong> on failure
4172
     */
4173 5
    public static function json_encode($value, int $options = 0, int $depth = 512)
4174
    {
4175 5
        $value = self::filter($value);
4176
4177 5
        if (self::$SUPPORT['json'] === false) {
4178
            throw new \RuntimeException('ext-json: is not installed');
4179
        }
4180
4181 5
        if ($depth < 1) {
4182
            $depth = 1;
4183
        }
4184
4185 5
        return \json_encode($value, $options, $depth);
4186
    }
4187
4188
    /**
4189
     * Checks whether JSON is available on the server.
4190
     *
4191
     * @psalm-pure
4192
     *
4193
     * @return bool
4194
     *              <p><strong>true</strong> if available, <strong>false</strong> otherwise</p>
4195
     *
4196
     * @internal <p>Please do not use it anymore, we will make is private in next major version.</p>
4197
     */
4198
    public static function json_loaded(): bool
4199
    {
4200
        return \function_exists('json_decode');
4201
    }
4202
4203
    /**
4204
     * Makes string's first char lowercase.
4205
     *
4206
     * EXAMPLE: <code>UTF8::lcfirst('ÑTËRNÂTIÔNÀLIZÆTIØN'); // ñTËRNÂTIÔNÀLIZÆTIØN</code>
4207
     *
4208
     * @param string      $str                           <p>The input string</p>
4209
     * @param string      $encoding                      [optional] <p>Set the charset for e.g. "mb_" function</p>
4210
     * @param bool        $clean_utf8                    [optional] <p>Remove non UTF-8 chars from the string.</p>
4211
     * @param string|null $lang                          [optional] <p>Set the language for special cases: az, el, lt,
4212
     *                                                   tr</p>
4213
     * @param bool        $try_to_keep_the_string_length [optional] <p>true === try to keep the string length: e.g. ẞ
4214
     *                                                   -> ß</p>
4215
     *
4216
     * @psalm-pure
4217
     *
4218
     * @return string the resulting string
4219
     */
4220 46
    public static function lcfirst(
4221
        string $str,
4222
        string $encoding = 'UTF-8',
4223
        bool $clean_utf8 = false,
4224
        string $lang = null,
4225
        bool $try_to_keep_the_string_length = false
4226
    ): string {
4227 46
        if ($clean_utf8) {
4228
            $str = self::clean($str);
4229
        }
4230
4231 46
        $use_mb_functions = ($lang === null && !$try_to_keep_the_string_length);
4232
4233 46
        if ($encoding === 'UTF-8') {
4234 43
            $str_part_two = (string) \mb_substr($str, 1);
4235
4236 43
            if ($use_mb_functions) {
4237 43
                $str_part_one = \mb_strtolower(
4238 43
                    (string) \mb_substr($str, 0, 1)
4239
                );
4240
            } else {
4241
                $str_part_one = self::strtolower(
4242
                    (string) \mb_substr($str, 0, 1),
4243
                    $encoding,
4244
                    false,
4245
                    $lang,
4246 43
                    $try_to_keep_the_string_length
4247
                );
4248
            }
4249
        } else {
4250 3
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
4251
4252 3
            $str_part_two = (string) self::substr($str, 1, null, $encoding);
4253
4254 3
            $str_part_one = self::strtolower(
4255 3
                (string) self::substr($str, 0, 1, $encoding),
4256 3
                $encoding,
4257 3
                false,
4258 3
                $lang,
4259 3
                $try_to_keep_the_string_length
4260
            );
4261
        }
4262
4263 46
        return $str_part_one . $str_part_two;
4264
    }
4265
4266
    /**
4267
     * Lowercase for all words in the string.
4268
     *
4269
     * @param string      $str                           <p>The input string.</p>
4270
     * @param string[]    $exceptions                    [optional] <p>Exclusion for some words.</p>
4271
     * @param string      $char_list                     [optional] <p>Additional chars that contains to words and do
4272
     *                                                   not start a new word.</p>
4273
     * @param string      $encoding                      [optional] <p>Set the charset.</p>
4274
     * @param bool        $clean_utf8                    [optional] <p>Remove non UTF-8 chars from the string.</p>
4275
     * @param string|null $lang                          [optional] <p>Set the language for special cases: az, el, lt,
4276
     *                                                   tr</p>
4277
     * @param bool        $try_to_keep_the_string_length [optional] <p>true === try to keep the string length: e.g. ẞ
4278
     *                                                   -> ß</p>
4279
     *
4280
     * @psalm-pure
4281
     *
4282
     * @return string
4283
     */
4284 4
    public static function lcwords(
4285
        string $str,
4286
        array $exceptions = [],
4287
        string $char_list = '',
4288
        string $encoding = 'UTF-8',
4289
        bool $clean_utf8 = false,
4290
        string $lang = null,
4291
        bool $try_to_keep_the_string_length = false
4292
    ): string {
4293 4
        if (!$str) {
4294 2
            return '';
4295
        }
4296
4297 4
        $words = self::str_to_words($str, $char_list);
4298 4
        $use_exceptions = $exceptions !== [];
4299
4300 4
        $words_str = '';
4301 4
        foreach ($words as &$word) {
4302 4
            if (!$word) {
4303 4
                continue;
4304
            }
4305
4306
            if (
4307 4
                !$use_exceptions
4308
                ||
4309 4
                !\in_array($word, $exceptions, true)
4310
            ) {
4311 4
                $words_str .= self::lcfirst($word, $encoding, $clean_utf8, $lang, $try_to_keep_the_string_length);
4312
            } else {
4313 4
                $words_str .= $word;
4314
            }
4315
        }
4316
4317 4
        return $words_str;
4318
    }
4319
4320
    /**
4321
     * Calculate Levenshtein distance between two strings.
4322
     *
4323
     * For better performance, in a real application with a single input string
4324
     * matched against many strings from a database, you will probably want to pre-
4325
     * encode the input only once and use \levenshtein().
4326
     *
4327
     * Source: https://github.com/KEINOS/mb_levenshtein
4328
     * @see https://www.php.net/manual/en/function.levenshtein
4329
     *
4330
     * @param  string  $str1            <p>One of the strings being evaluated for Levenshtein distance.</p>
4331
     * @param  string  $str2            <p>One of the strings being evaluated for Levenshtein distance.</p>
4332
     * @param  integer $insertionCost   [optional] <p>Defines the cost of insertion.</p>
4333
     * @param  integer $replacementCost [optional] <p>Defines the cost of replacement.</p>
4334
     * @param  integer $deletionCost    [optional] <p>Defines the cost of deletion.</p>
4335
     *
4336
     * @return int
4337
     */
4338 5
    public static function levenshtein(string $str1, string $str2, int $insertionCost = 1, int $replacementCost = 1, int $deletionCost = 1)
4339
    {
4340 5
        $charMap = [];
4341 5
        self::convertMbAscii($str1, $charMap);
4342 5
        self::convertMbAscii($str2, $charMap);
4343
4344 5
        return \levenshtein($str1, $str2, $insertionCost, $replacementCost, $deletionCost);
4345
    }
4346
4347
    /**
4348
     * Strip whitespace or other characters from the beginning of a UTF-8 string.
4349
     *
4350
     * EXAMPLE: <code>UTF8::ltrim(' 中文空白  '); // '中文空白  '</code>
4351
     *
4352
     * @param string      $str   <p>The string to be trimmed</p>
4353
     * @param string|null $chars <p>Optional characters to be stripped</p>
4354
     *
4355
     * @psalm-pure
4356
     *
4357
     * @return string the string with unwanted characters stripped from the left
4358
     */
4359 23
    public static function ltrim(string $str = '', string $chars = null): string
4360
    {
4361 23
        if ($str === '') {
4362 3
            return '';
4363
        }
4364
4365 22
        if (self::$SUPPORT['mbstring'] === true) {
4366 22
            if ($chars !== null) {
4367
                /** @noinspection PregQuoteUsageInspection */
4368 11
                $chars = \preg_quote($chars);
4369 11
                $pattern = "^[${chars}]+";
4370
            } else {
4371 14
                $pattern = '^[\\s]+';
4372
            }
4373
4374 22
            return (string) \mb_ereg_replace($pattern, '', $str);
4375
        }
4376
4377
        if ($chars !== null) {
4378
            $chars = \preg_quote($chars, '/');
4379
            $pattern = "^[${chars}]+";
4380
        } else {
4381
            $pattern = '^[\\s]+';
4382
        }
4383
4384
        return self::regex_replace($str, $pattern, '');
4385
    }
4386
4387
    /**
4388
     * Returns the UTF-8 character with the maximum code point in the given data.
4389
     *
4390
     * EXAMPLE: <code>UTF8::max('abc-äöü-中文空白'); // 'ø'</code>
4391
     *
4392
     * @param array<string>|string $arg <p>A UTF-8 encoded string or an array of such strings.</p>
4393
     *
4394
     * @psalm-pure
4395
     *
4396
     * @return string|null the character with the highest code point than others, returns null on failure or empty input
4397
     */
4398 2
    public static function max($arg)
4399
    {
4400 2
        if (\is_array($arg)) {
4401 2
            $arg = \implode('', $arg);
4402
        }
4403
4404 2
        $codepoints = self::codepoints($arg);
4405 2
        if ($codepoints === []) {
4406 2
            return null;
4407
        }
4408
4409 2
        $codepoint_max = \max($codepoints);
4410
4411 2
        return self::chr((int) $codepoint_max);
4412
    }
4413
4414
    /**
4415
     * Calculates and returns the maximum number of bytes taken by any
4416
     * UTF-8 encoded character in the given string.
4417
     *
4418
     * EXAMPLE: <code>UTF8::max_chr_width('Intërnâtiônàlizætiøn'); // 2</code>
4419
     *
4420
     * @param string $str <p>The original Unicode string.</p>
4421
     *
4422
     * @psalm-pure
4423
     *
4424
     * @return int
4425
     *             <p>Max byte lengths of the given chars.</p>
4426
     */
4427 2
    public static function max_chr_width(string $str): int
4428
    {
4429 2
        $bytes = self::chr_size_list($str);
4430 2
        if ($bytes !== []) {
4431 2
            return (int) \max($bytes);
4432
        }
4433
4434 2
        return 0;
4435
    }
4436
4437
    /**
4438
     * Checks whether mbstring is available on the server.
4439
     *
4440
     * @psalm-pure
4441
     *
4442
     * @return bool
4443
     *              <p><strong>true</strong> if available, <strong>false</strong> otherwise</p>
4444
     *
4445
     * @internal <p>Please do not use it anymore, we will make is private in next major version.</p>
4446
     */
4447 28
    public static function mbstring_loaded(): bool
4448
    {
4449 28
        return \extension_loaded('mbstring');
4450
    }
4451
4452
    /**
4453
     * Returns the UTF-8 character with the minimum code point in the given data.
4454
     *
4455
     * EXAMPLE: <code>UTF8::min('abc-äöü-中文空白'); // '-'</code>
4456
     *
4457
     * @param string|string[] $arg <strong>A UTF-8 encoded string or an array of such strings.</strong>
4458
     *
4459
     * @psalm-pure
4460
     *
4461
     * @return string|null
4462
     *                     <p>The character with the lowest code point than others, returns null on failure or empty input.</p>
4463
     */
4464 2
    public static function min($arg)
4465
    {
4466 2
        if (\is_array($arg)) {
4467 2
            $arg = \implode('', $arg);
4468
        }
4469
4470 2
        $codepoints = self::codepoints($arg);
4471 2
        if ($codepoints === []) {
4472 2
            return null;
4473
        }
4474
4475 2
        $codepoint_min = \min($codepoints);
4476
4477 2
        return self::chr((int) $codepoint_min);
4478
    }
4479
4480
    /**
4481
     * Normalize the encoding-"name" input.
4482
     *
4483
     * EXAMPLE: <code>UTF8::normalize_encoding('UTF8'); // 'UTF-8'</code>
4484
     *
4485
     * @param mixed $encoding <p>e.g.: ISO, UTF8, WINDOWS-1251 etc.</p>
4486
     * @param mixed $fallback <p>e.g.: UTF-8</p>
4487
     *
4488
     * @psalm-pure
4489
     *
4490
     * @return mixed|string
4491
     *                      <p>e.g.: ISO-8859-1, UTF-8, WINDOWS-1251 etc.<br>Will return a empty string as fallback (by default)</p>
4492
     *
4493
     * @template TNormalizeEncodingFallback
4494
     * @phpstan-param string|TNormalizeEncodingFallback $fallback
4495
     * @phpstan-return string|TNormalizeEncodingFallback
4496
     */
4497 339
    public static function normalize_encoding($encoding, $fallback = '')
4498
    {
4499
        /**
4500
         * @psalm-suppress ImpureStaticVariable
4501
         *
4502
         * @var array<string,string>
4503
         */
4504 339
        static $STATIC_NORMALIZE_ENCODING_CACHE = [];
4505
4506
        // init
4507 339
        $encoding = (string) $encoding;
4508
4509 339
        if (!$encoding) {
4510 290
            return $fallback;
4511
        }
4512
4513
        if (
4514 53
            $encoding === 'UTF-8'
4515
            ||
4516 53
            $encoding === 'UTF8'
4517
        ) {
4518 29
            return 'UTF-8';
4519
        }
4520
4521
        if (
4522 44
            $encoding === '8BIT'
4523
            ||
4524 44
            $encoding === 'BINARY'
4525
        ) {
4526
            return 'CP850';
4527
        }
4528
4529
        if (
4530 44
            $encoding === 'HTML'
4531
            ||
4532 44
            $encoding === 'HTML-ENTITIES'
4533
        ) {
4534 2
            return 'HTML-ENTITIES';
4535
        }
4536
4537
        if (
4538 44
            $encoding === 'ISO'
4539
            ||
4540 44
            $encoding === 'ISO-8859-1'
4541
        ) {
4542 41
            return 'ISO-8859-1';
4543
        }
4544
4545
        if (
4546 11
            $encoding === '1' // only a fallback, for non "strict_types" usage ...
4547
            ||
4548 11
            $encoding === '0' // only a fallback, for non "strict_types" usage ...
4549
        ) {
4550
            return $fallback;
4551
        }
4552
4553 11
        if (isset($STATIC_NORMALIZE_ENCODING_CACHE[$encoding])) {
4554 8
            return $STATIC_NORMALIZE_ENCODING_CACHE[$encoding];
4555
        }
4556
4557 5
        if (self::$ENCODINGS === null) {
4558 1
            self::$ENCODINGS = self::getData('encodings');
4559
        }
4560
4561 5
        if (\in_array($encoding, self::$ENCODINGS, true)) {
4562 3
            $STATIC_NORMALIZE_ENCODING_CACHE[$encoding] = $encoding;
4563
4564 3
            return $encoding;
4565
        }
4566
4567 4
        $encoding_original = $encoding;
4568 4
        $encoding = \strtoupper($encoding);
4569 4
        $encoding_upper_helper = (string) \preg_replace('/[^a-zA-Z0-9]/u', '', $encoding);
4570
4571
        $equivalences = [
4572 4
            'ISO8859'     => 'ISO-8859-1',
4573
            'ISO88591'    => 'ISO-8859-1',
4574
            'ISO'         => 'ISO-8859-1',
4575
            'LATIN'       => 'ISO-8859-1',
4576
            'LATIN1'      => 'ISO-8859-1', // Western European
4577
            'ISO88592'    => 'ISO-8859-2',
4578
            'LATIN2'      => 'ISO-8859-2', // Central European
4579
            'ISO88593'    => 'ISO-8859-3',
4580
            'LATIN3'      => 'ISO-8859-3', // Southern European
4581
            'ISO88594'    => 'ISO-8859-4',
4582
            'LATIN4'      => 'ISO-8859-4', // Northern European
4583
            'ISO88595'    => 'ISO-8859-5',
4584
            'ISO88596'    => 'ISO-8859-6', // Greek
4585
            'ISO88597'    => 'ISO-8859-7',
4586
            'ISO88598'    => 'ISO-8859-8', // Hebrew
4587
            'ISO88599'    => 'ISO-8859-9',
4588
            'LATIN5'      => 'ISO-8859-9', // Turkish
4589
            'ISO885911'   => 'ISO-8859-11',
4590
            'TIS620'      => 'ISO-8859-11', // Thai
4591
            'ISO885910'   => 'ISO-8859-10',
4592
            'LATIN6'      => 'ISO-8859-10', // Nordic
4593
            'ISO885913'   => 'ISO-8859-13',
4594
            'LATIN7'      => 'ISO-8859-13', // Baltic
4595
            'ISO885914'   => 'ISO-8859-14',
4596
            'LATIN8'      => 'ISO-8859-14', // Celtic
4597
            'ISO885915'   => 'ISO-8859-15',
4598
            'LATIN9'      => 'ISO-8859-15', // Western European (with some extra chars e.g. €)
4599
            'ISO885916'   => 'ISO-8859-16',
4600
            'LATIN10'     => 'ISO-8859-16', // Southeast European
4601
            'CP1250'      => 'WINDOWS-1250',
4602
            'WIN1250'     => 'WINDOWS-1250',
4603
            'WINDOWS1250' => 'WINDOWS-1250',
4604
            'CP1251'      => 'WINDOWS-1251',
4605
            'WIN1251'     => 'WINDOWS-1251',
4606
            'WINDOWS1251' => 'WINDOWS-1251',
4607
            'CP1252'      => 'WINDOWS-1252',
4608
            'WIN1252'     => 'WINDOWS-1252',
4609
            'WINDOWS1252' => 'WINDOWS-1252',
4610
            'CP1253'      => 'WINDOWS-1253',
4611
            'WIN1253'     => 'WINDOWS-1253',
4612
            'WINDOWS1253' => 'WINDOWS-1253',
4613
            'CP1254'      => 'WINDOWS-1254',
4614
            'WIN1254'     => 'WINDOWS-1254',
4615
            'WINDOWS1254' => 'WINDOWS-1254',
4616
            'CP1255'      => 'WINDOWS-1255',
4617
            'WIN1255'     => 'WINDOWS-1255',
4618
            'WINDOWS1255' => 'WINDOWS-1255',
4619
            'CP1256'      => 'WINDOWS-1256',
4620
            'WIN1256'     => 'WINDOWS-1256',
4621
            'WINDOWS1256' => 'WINDOWS-1256',
4622
            'CP1257'      => 'WINDOWS-1257',
4623
            'WIN1257'     => 'WINDOWS-1257',
4624
            'WINDOWS1257' => 'WINDOWS-1257',
4625
            'CP1258'      => 'WINDOWS-1258',
4626
            'WIN1258'     => 'WINDOWS-1258',
4627
            'WINDOWS1258' => 'WINDOWS-1258',
4628
            'UTF16'       => 'UTF-16',
4629
            'UTF32'       => 'UTF-32',
4630
            'UTF8'        => 'UTF-8',
4631
            'UTF'         => 'UTF-8',
4632
            'UTF7'        => 'UTF-7',
4633
            '8BIT'        => 'CP850',
4634
            'BINARY'      => 'CP850',
4635
        ];
4636
4637 4
        if (!empty($equivalences[$encoding_upper_helper])) {
4638 3
            $encoding = $equivalences[$encoding_upper_helper];
4639
        }
4640
4641 4
        $STATIC_NORMALIZE_ENCODING_CACHE[$encoding_original] = $encoding;
4642
4643 4
        return $encoding;
4644
    }
4645
4646
    /**
4647
     * Standardize line ending to unix-like.
4648
     *
4649
     * @param string          $str      <p>The input string.</p>
4650
     * @param string|string[] $replacer <p>The replacer char e.g. "\n" (Linux) or "\r\n" (Windows). You can also use \PHP_EOL
4651
     *                                  here.</p>
4652
     *
4653
     * @psalm-pure
4654
     *
4655
     * @return string
4656
     *                <p>A string with normalized line ending.</p>
4657
     */
4658 4
    public static function normalize_line_ending(string $str, $replacer = "\n"): string
4659
    {
4660 4
        return \str_replace(["\r\n", "\r", "\n"], $replacer, $str);
4661
    }
4662
4663
    /**
4664
     * Normalize some MS Word special characters.
4665
     *
4666
     * EXAMPLE: <code>UTF8::normalize_msword('„Abcdef…”'); // '"Abcdef..."'</code>
4667
     *
4668
     * @param string $str <p>The string to be normalized.</p>
4669
     *
4670
     * @psalm-pure
4671
     *
4672
     * @return string
4673
     *                <p>A string with normalized characters for commonly used chars in Word documents.</p>
4674
     */
4675 10
    public static function normalize_msword(string $str): string
4676
    {
4677 10
        return ASCII::normalize_msword($str);
4678
    }
4679
4680
    /**
4681
     * Normalize the whitespace.
4682
     *
4683
     * EXAMPLE: <code>UTF8::normalize_whitespace("abc-\xc2\xa0-öäü-\xe2\x80\xaf-\xE2\x80\xAC", true); // "abc-\xc2\xa0-öäü- -"</code>
4684
     *
4685
     * @param string $str                          <p>The string to be normalized.</p>
4686
     * @param bool   $keep_non_breaking_space      [optional] <p>Set to true, to keep non-breaking-spaces.</p>
4687
     * @param bool   $keep_bidi_unicode_controls   [optional] <p>Set to true, to keep non-printable (for the web)
4688
     *                                             bidirectional text chars.</p>
4689
     * @param bool   $normalize_control_characters [optional] <p>Set to true, to convert e.g. LINE-, PARAGRAPH-SEPARATOR with "\n" and LINE TABULATION with "\t".</p>
4690
     *
4691
     * @psalm-pure
4692
     *
4693
     * @return string
4694
     *                <p>A string with normalized whitespace.</p>
4695
     */
4696 61
    public static function normalize_whitespace(
4697
        string $str,
4698
        bool $keep_non_breaking_space = false,
4699
        bool $keep_bidi_unicode_controls = false,
4700
        bool $normalize_control_characters = false
4701
    ): string {
4702 61
        return ASCII::normalize_whitespace(
4703 61
            $str,
4704 61
            $keep_non_breaking_space,
4705 61
            $keep_bidi_unicode_controls,
4706 61
            $normalize_control_characters
4707
        );
4708
    }
4709
4710
    /**
4711
     * Calculates Unicode code point of the given UTF-8 encoded character.
4712
     *
4713
     * INFO: opposite to UTF8::chr()
4714
     *
4715
     * EXAMPLE: <code>UTF8::ord('☃'); // 0x2603</code>
4716
     *
4717
     * @param string $chr      <p>The character of which to calculate code point.<p/>
4718
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
4719
     *
4720
     * @psalm-pure
4721
     *
4722
     * @return int
4723
     *             <p>Unicode code point of the given character,<br>
4724
     *             0 on invalid UTF-8 byte sequence</p>
4725
     */
4726 27
    public static function ord($chr, string $encoding = 'UTF-8'): int
4727
    {
4728
        /**
4729
         * @psalm-suppress ImpureStaticVariable
4730
         *
4731
         * @var array<string,int>
4732
         */
4733 27
        static $CHAR_CACHE = [];
4734
4735
        // init
4736 27
        $chr = (string) $chr;
4737
4738 27
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
4739 5
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
4740
        }
4741
4742 27
        $cache_key = $chr . '_' . $encoding;
4743 27
        if (isset($CHAR_CACHE[$cache_key])) {
4744 27
            return $CHAR_CACHE[$cache_key];
4745
        }
4746
4747
        // check again, if it's still not UTF-8
4748 11
        if ($encoding !== 'UTF-8') {
4749 3
            $chr = self::encode($encoding, $chr);
4750
        }
4751
4752 11
        if (self::$ORD === null) {
4753 1
            self::$ORD = self::getData('ord');
4754
        }
4755
4756 11
        if (isset(self::$ORD[$chr])) {
4757 11
            return $CHAR_CACHE[$cache_key] = self::$ORD[$chr];
4758
        }
4759
4760
        //
4761
        // fallback via "IntlChar"
4762
        //
4763
4764 6
        if (self::$SUPPORT['intlChar'] === true) {
4765 5
            $code = \IntlChar::ord($chr);
4766 5
            if ($code) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $code of type integer|null is loosely compared to true; this is ambiguous if the integer can be 0. You might want to explicitly use !== null instead.

In PHP, under loose comparison (like ==, or !=, or switch conditions), values of different types might be equal.

For integer values, zero is a special case, in particular the following results might be unexpected:

0   == false // true
0   == null  // true
123 == false // false
123 == null  // false

// It is often better to use strict comparison
0 === false // false
0 === null  // false
Loading history...
4767 5
                return $CHAR_CACHE[$cache_key] = $code;
4768
            }
4769
        }
4770
4771
        //
4772
        // fallback via vanilla php
4773
        //
4774
4775 1
        $chr = \unpack('C*', (string) \substr($chr, 0, 4));
4776
        /** @noinspection PhpSillyAssignmentInspection - hack for phpstan */
4777
        /** @var int[] $chr - "unpack": only false if the format string contains errors */
4778 1
        $chr = $chr;
4779 1
        $code = $chr ? $chr[1] : 0;
4780
4781 1
        if ($code >= 0xF0 && isset($chr[4])) {
4782
            return $CHAR_CACHE[$cache_key] = ((($code - 0xF0) << 18) + (($chr[2] - 0x80) << 12) + (($chr[3] - 0x80) << 6) + $chr[4] - 0x80);
4783
        }
4784
4785 1
        if ($code >= 0xE0 && isset($chr[3])) {
4786 1
            return $CHAR_CACHE[$cache_key] = ((($code - 0xE0) << 12) + (($chr[2] - 0x80) << 6) + $chr[3] - 0x80);
4787
        }
4788
4789 1
        if ($code >= 0xC0 && isset($chr[2])) {
4790 1
            return $CHAR_CACHE[$cache_key] = ((($code - 0xC0) << 6) + $chr[2] - 0x80);
4791
        }
4792
4793
        return $CHAR_CACHE[$cache_key] = $code;
4794
    }
4795
4796
    /**
4797
     * Parses the string into an array (into the the second parameter).
4798
     *
4799
     * WARNING: Unlike "parse_str()", this method does not (re-)place variables in the current scope,
4800
     *          if the second parameter is not set!
4801
     *
4802
     * EXAMPLE: <code>
4803
     * UTF8::parse_str('Iñtërnâtiônéàlizætiøn=測試&arr[]=foo+測試&arr[]=ການທົດສອບ', $array);
4804
     * echo $array['Iñtërnâtiônéàlizætiøn']; // '測試'
4805
     * </code>
4806
     *
4807
     * @see http://php.net/manual/en/function.parse-str.php
4808
     *
4809
     * @param string $str        <p>The input string.</p>
4810
     * @param array  $result     <p>The result will be returned into this reference parameter.</p>
4811
     * @param bool   $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
4812
     *
4813
     * @psalm-pure
4814
     *
4815
     * @return bool
4816
     *              <p>Will return <strong>false</strong> if php can't parse the string and we haven't any $result.</p>
4817
     */
4818 2
    public static function parse_str(string $str, &$result, bool $clean_utf8 = false): bool
4819
    {
4820 2
        if ($clean_utf8) {
4821 2
            $str = self::clean($str);
4822
        }
4823
4824 2
        if (self::$SUPPORT['mbstring'] === true) {
4825 2
            $return = \mb_parse_str($str, $result);
4826
4827 2
            return $return !== false && $result !== [];
4828
        }
4829
4830
        /**
4831
         * @psalm-suppress ImpureFunctionCall - we use the second parameter, so we don't change variables by magic
4832
         */
4833
        \parse_str($str, $result);
4834
4835
        return $result !== [];
4836
    }
4837
4838
    /**
4839
     * Checks if \u modifier is available that enables Unicode support in PCRE.
4840
     *
4841
     * @psalm-pure
4842
     *
4843
     * @return bool
4844
     *              <p>
4845
     *              <strong>true</strong> if support is available,<br>
4846
     *              <strong>false</strong> otherwise
4847
     *              </p>
4848
     */
4849
    public static function pcre_utf8_support(): bool
4850
    {
4851
        /** @noinspection PhpUsageOfSilenceOperatorInspection */
4852
        return (bool) @\preg_match('//u', '');
4853
    }
4854
4855
    /**
4856
     * Create an array containing a range of UTF-8 characters.
4857
     *
4858
     * EXAMPLE: <code>UTF8::range('κ', 'ζ'); // array('κ', 'ι', 'θ', 'η', 'ζ',)</code>
4859
     *
4860
     * @param int|string $var1      <p>Numeric or hexadecimal code points, or a UTF-8 character to start from.</p>
4861
     * @param int|string $var2      <p>Numeric or hexadecimal code points, or a UTF-8 character to end at.</p>
4862
     * @param bool       $use_ctype <p>use ctype to detect numeric and hexadecimal, otherwise we will use a simple
4863
     *                              "is_numeric"</p>
4864
     * @param string     $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
4865
     * @param float|int  $step      [optional] <p>
4866
     *                              If a step value is given, it will be used as the
4867
     *                              increment between elements in the sequence. step
4868
     *                              should be given as a positive number. If not specified,
4869
     *                              step will default to 1.
4870
     *                              </p>
4871
     *
4872
     * @psalm-pure
4873
     *
4874
     * @return string[]
4875
     */
4876 2
    public static function range(
4877
        $var1,
4878
        $var2,
4879
        bool $use_ctype = true,
4880
        string $encoding = 'UTF-8',
4881
        $step = 1
4882
    ): array {
4883 2
        if (!$var1 || !$var2) {
4884 2
            return [];
4885
        }
4886
4887 2
        if ($step !== 1) {
4888
            /**
4889
             * @psalm-suppress RedundantConditionGivenDocblockType
4890
             * @psalm-suppress DocblockTypeContradiction
4891
             */
4892 1
            if (!\is_numeric($step)) {
0 ignored issues
show
introduced by
The condition is_numeric($step) is always true.
Loading history...
4893
                throw new \InvalidArgumentException('$step need to be a number, type given: ' . \gettype($step));
4894
            }
4895
4896
            /**
4897
             * @psalm-suppress RedundantConditionGivenDocblockType - false-positive from psalm?
4898
             */
4899 1
            if ($step <= 0) {
4900
                throw new \InvalidArgumentException('$step need to be a positive number, given: ' . $step);
4901
            }
4902
        }
4903
4904 2
        if ($use_ctype && self::$SUPPORT['ctype'] === false) {
4905
            throw new \RuntimeException('ext-ctype: is not installed');
4906
        }
4907
4908 2
        $is_digit = false;
4909 2
        $is_xdigit = false;
4910
4911 2
        if ($use_ctype && \ctype_digit((string) $var1) && \ctype_digit((string) $var2)) {
4912 2
            $is_digit = true;
4913 2
            $start = (int) $var1;
4914 2
        } elseif ($use_ctype && \ctype_xdigit($var1) && \ctype_xdigit($var2)) {
4915
            $is_xdigit = true;
4916
            $start = (int) self::hex_to_int((string) $var1);
4917 2
        } elseif (!$use_ctype && \is_numeric($var1)) {
4918 1
            $start = (int) $var1;
4919
        } else {
4920 2
            $start = self::ord((string) $var1);
4921
        }
4922
4923 2
        if (!$start) {
4924
            return [];
4925
        }
4926
4927 2
        if ($is_digit) {
4928 2
            $end = (int) $var2;
4929 2
        } elseif ($is_xdigit) {
4930
            $end = (int) self::hex_to_int((string) $var2);
4931 2
        } elseif (!$use_ctype && \is_numeric($var2)) {
4932 1
            $end = (int) $var2;
4933
        } else {
4934 2
            $end = self::ord((string) $var2);
4935
        }
4936
4937 2
        if (!$end) {
4938
            return [];
4939
        }
4940
4941 2
        $array = [];
4942 2
        foreach (\range($start, $end, $step) as $i) {
4943 2
            $array[] = (string) self::chr((int) $i, $encoding);
4944
        }
4945
4946 2
        return $array;
4947
    }
4948
4949
    /**
4950
     * Multi decode HTML entity + fix urlencoded-win1252-chars.
4951
     *
4952
     * EXAMPLE: <code>UTF8::rawurldecode('tes%20öäü%20\u00edtest+test'); // 'tes öäü ítest+test'</code>
4953
     *
4954
     * e.g:
4955
     * 'test+test'                     => 'test+test'
4956
     * 'D&#252;sseldorf'               => 'Düsseldorf'
4957
     * 'D%FCsseldorf'                  => 'Düsseldorf'
4958
     * 'D&#xFC;sseldorf'               => 'Düsseldorf'
4959
     * 'D%26%23xFC%3Bsseldorf'         => 'Düsseldorf'
4960
     * 'Düsseldorf'                   => 'Düsseldorf'
4961
     * 'D%C3%BCsseldorf'               => 'Düsseldorf'
4962
     * 'D%C3%83%C2%BCsseldorf'         => 'Düsseldorf'
4963
     * 'D%25C3%2583%25C2%25BCsseldorf' => 'Düsseldorf'
4964
     *
4965
     * @param string $str          <p>The input string.</p>
4966
     * @param bool   $multi_decode <p>Decode as often as possible.</p>
4967
     *
4968
     * @psalm-pure
4969
     *
4970
     * @return string
4971
     *                <p>The decoded URL, as a string.</p>
4972
     */
4973 6
    public static function rawurldecode(string $str, bool $multi_decode = true): string
4974
    {
4975 6
        if ($str === '') {
4976 4
            return '';
4977
        }
4978
4979 6
        $str = self::urldecode_unicode_helper($str);
4980
4981 6
        if ($multi_decode) {
4982
            do {
4983 5
                $str_compare = $str;
4984
4985
                /**
4986
                 * @psalm-suppress PossiblyInvalidArgument
4987
                 */
4988 5
                $str = \rawurldecode(
4989 5
                    self::html_entity_decode(
4990 5
                        self::to_utf8($str),
4991 5
                        \ENT_QUOTES | \ENT_HTML5
4992
                    )
4993
                );
4994 5
            } while ($str_compare !== $str);
4995
        } else {
4996
            /**
4997
             * @psalm-suppress PossiblyInvalidArgument
4998
             */
4999 1
            $str = \rawurldecode(
5000 1
                self::html_entity_decode(
5001 1
                    self::to_utf8($str),
5002 1
                    \ENT_QUOTES | \ENT_HTML5
5003
                )
5004
            );
5005
        }
5006
5007 6
        return self::fix_simple_utf8($str);
5008
    }
5009
5010
    /**
5011
     * Replaces all occurrences of $pattern in $str by $replacement.
5012
     *
5013
     * @param string $str         <p>The input string.</p>
5014
     * @param string $pattern     <p>The regular expression pattern.</p>
5015
     * @param string $replacement <p>The string to replace with.</p>
5016
     * @param string $options     [optional] <p>Matching conditions to be used.</p>
5017
     * @param string $delimiter   [optional] <p>Delimiter the the regex. Default: '/'</p>
5018
     *
5019
     * @psalm-pure
5020
     *
5021
     * @return string
5022
     */
5023 18
    public static function regex_replace(
5024
        string $str,
5025
        string $pattern,
5026
        string $replacement,
5027
        string $options = '',
5028
        string $delimiter = '/'
5029
    ): string {
5030 18
        if ($options === 'msr') {
5031 9
            $options = 'ms';
5032
        }
5033
5034
        // fallback
5035 18
        if (!$delimiter) {
5036
            $delimiter = '/';
5037
        }
5038
5039 18
        return (string) \preg_replace(
5040 18
            $delimiter . $pattern . $delimiter . 'u' . $options,
5041 18
            $replacement,
5042 18
            $str
5043
        );
5044
    }
5045
5046
    /**
5047
     * Remove the BOM from UTF-8 / UTF-16 / UTF-32 strings.
5048
     *
5049
     * EXAMPLE: <code>UTF8::remove_bom("\xEF\xBB\xBFΜπορώ να"); // 'Μπορώ να'</code>
5050
     *
5051
     * @param string $str <p>The input string.</p>
5052
     *
5053
     * @psalm-pure
5054
     *
5055
     * @return string
5056
     *                <p>A string without UTF-BOM.</p>
5057
     */
5058 54
    public static function remove_bom(string $str): string
5059
    {
5060 54
        if ($str === '') {
5061 9
            return '';
5062
        }
5063
5064 54
        $str_length = \strlen($str);
5065 54
        foreach (self::$BOM as $bom_string => $bom_byte_length) {
5066 54
            if (\strncmp($str, $bom_string, $bom_byte_length) === 0) {
5067
                /** @var false|string $str_tmp - needed for PhpStan (stubs error) */
5068 9
                $str_tmp = \substr($str, $bom_byte_length, $str_length);
5069 9
                if ($str_tmp === false) {
5070
                    return '';
5071
                }
5072
5073 9
                $str_length -= $bom_byte_length;
5074
5075 54
                $str = (string) $str_tmp;
5076
            }
5077
        }
5078
5079 54
        return $str;
5080
    }
5081
5082
    /**
5083
     * Removes duplicate occurrences of a string in another string.
5084
     *
5085
     * EXAMPLE: <code>UTF8::remove_duplicates('öäü-κόσμεκόσμε-äöü', 'κόσμε'); // 'öäü-κόσμε-äöü'</code>
5086
     *
5087
     * @param string          $str  <p>The base string.</p>
5088
     * @param string|string[] $what <p>String to search for in the base string.</p>
5089
     *
5090
     * @psalm-pure
5091
     *
5092
     * @return string
5093
     *                <p>A string with removed duplicates.</p>
5094
     */
5095 2
    public static function remove_duplicates(string $str, $what = ' '): string
5096
    {
5097 2
        if (\is_string($what)) {
5098 2
            $what = [$what];
5099
        }
5100
5101
        /**
5102
         * @psalm-suppress RedundantConditionGivenDocblockType
5103
         */
5104 2
        if (\is_array($what)) {
0 ignored issues
show
introduced by
The condition is_array($what) is always true.
Loading history...
5105 2
            foreach ($what as $item) {
5106 2
                $str = (string) \preg_replace('/(' . \preg_quote($item, '/') . ')+/u', $item, $str);
5107
            }
5108
        }
5109
5110 2
        return $str;
5111
    }
5112
5113
    /**
5114
     * Remove html via "strip_tags()" from the string.
5115
     *
5116
     * @param string $str            <p>The input string.</p>
5117
     * @param string $allowable_tags [optional] <p>You can use the optional second parameter to specify tags which
5118
     *                               should not be stripped. Default: null
5119
     *                               </p>
5120
     *
5121
     * @psalm-pure
5122
     *
5123
     * @return string
5124
     *                <p>A string with without html tags.</p>
5125
     */
5126 6
    public static function remove_html(string $str, string $allowable_tags = ''): string
5127
    {
5128 6
        return \strip_tags($str, $allowable_tags);
5129
    }
5130
5131
    /**
5132
     * Remove all breaks [<br> | \r\n | \r | \n | ...] from the string.
5133
     *
5134
     * @param string $str         <p>The input string.</p>
5135
     * @param string $replacement [optional] <p>Default is a empty string.</p>
5136
     *
5137
     * @psalm-pure
5138
     *
5139
     * @return string
5140
     *                <p>A string without breaks.</p>
5141
     */
5142 6
    public static function remove_html_breaks(string $str, string $replacement = ''): string
5143
    {
5144 6
        return (string) \preg_replace("#/\r\n|\r|\n|<br.*/?>#isU", $replacement, $str);
5145
    }
5146
5147
    /**
5148
     * Remove invisible characters from a string.
5149
     *
5150
     * e.g.: This prevents sandwiching null characters between ascii characters, like Java\0script.
5151
     *
5152
     * EXAMPLE: <code>UTF8::remove_invisible_characters("κόσ\0με"); // 'κόσμε'</code>
5153
     *
5154
     * copy&past from https://github.com/bcit-ci/CodeIgniter/blob/develop/system/core/Common.php
5155
     *
5156
     * @param string $str                           <p>The input string.</p>
5157
     * @param bool   $url_encoded                   [optional] <p>
5158
     *                                              Try to remove url encoded control character.
5159
     *                                              WARNING: maybe contains false-positives e.g. aa%0Baa -> aaaa.
5160
     *                                              <br>
5161
     *                                              Default: false
5162
     *                                              </p>
5163
     * @param string $replacement                   [optional] <p>The replacement character.</p>
5164
     * @param bool   $keep_basic_control_characters [optional] <p>Keep control characters like [LRM] or [LSEP].</p>
5165
     *
5166
     * @psalm-pure
5167
     *
5168
     * @return string
5169
     *                <p>A string without invisible chars.</p>
5170
     */
5171 92
    public static function remove_invisible_characters(
5172
        string $str,
5173
        bool $url_encoded = false,
5174
        string $replacement = '',
5175
        bool $keep_basic_control_characters = true
5176
    ): string {
5177 92
        return ASCII::remove_invisible_characters(
5178 92
            $str,
5179 92
            $url_encoded,
5180 92
            $replacement,
5181 92
            $keep_basic_control_characters
5182
        );
5183
    }
5184
5185
    /**
5186
     * Returns a new string with the prefix $substring removed, if present.
5187
     *
5188
     * @param string $str       <p>The input string.</p>
5189
     * @param string $substring <p>The prefix to remove.</p>
5190
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
5191
     *
5192
     * @psalm-pure
5193
     *
5194
     * @return string
5195
     *                <p>A string without the prefix $substring.</p>
5196
     */
5197 12
    public static function remove_left(
5198
        string $str,
5199
        string $substring,
5200
        string $encoding = 'UTF-8'
5201
    ): string {
5202
        if (
5203 12
            $substring
5204
            &&
5205 12
            \strpos($str, $substring) === 0
5206
        ) {
5207 6
            if ($encoding === 'UTF-8') {
5208 4
                return (string) \mb_substr(
5209 4
                    $str,
5210 4
                    (int) \mb_strlen($substring)
5211
                );
5212
            }
5213
5214 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
5215
5216 2
            return (string) self::substr(
5217 2
                $str,
5218 2
                (int) self::strlen($substring, $encoding),
5219 2
                null,
5220 2
                $encoding
5221
            );
5222
        }
5223
5224 6
        return $str;
5225
    }
5226
5227
    /**
5228
     * Returns a new string with the suffix $substring removed, if present.
5229
     *
5230
     * @param string $str
5231
     * @param string $substring <p>The suffix to remove.</p>
5232
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
5233
     *
5234
     * @psalm-pure
5235
     *
5236
     * @return string
5237
     *                <p>A string having a $str without the suffix $substring.</p>
5238
     */
5239 12
    public static function remove_right(
5240
        string $str,
5241
        string $substring,
5242
        string $encoding = 'UTF-8'
5243
    ): string {
5244 12
        if ($substring && \substr($str, -\strlen($substring)) === $substring) {
5245 6
            if ($encoding === 'UTF-8') {
5246 4
                return (string) \mb_substr(
5247 4
                    $str,
5248 4
                    0,
5249 4
                    (int) \mb_strlen($str) - (int) \mb_strlen($substring)
5250
                );
5251
            }
5252
5253 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
5254
5255 2
            return (string) self::substr(
5256 2
                $str,
5257 2
                0,
5258 2
                (int) self::strlen($str, $encoding) - (int) self::strlen($substring, $encoding),
5259 2
                $encoding
5260
            );
5261
        }
5262
5263 6
        return $str;
5264
    }
5265
5266
    /**
5267
     * Replaces all occurrences of $search in $str by $replacement.
5268
     *
5269
     * @param string $str            <p>The input string.</p>
5270
     * @param string $search         <p>The needle to search for.</p>
5271
     * @param string $replacement    <p>The string to replace with.</p>
5272
     * @param bool   $case_sensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
5273
     *
5274
     * @psalm-pure
5275
     *
5276
     * @return string
5277
     *                <p>A string with replaced parts.</p>
5278
     */
5279 29
    public static function replace(
5280
        string $str,
5281
        string $search,
5282
        string $replacement,
5283
        bool $case_sensitive = true
5284
    ): string {
5285 29
        if ($case_sensitive) {
5286 22
            return \str_replace($search, $replacement, $str);
5287
        }
5288
5289 7
        return self::str_ireplace($search, $replacement, $str);
0 ignored issues
show
Bug Best Practice introduced by
The expression return self::str_ireplac...ch, $replacement, $str) could return the type string[] which is incompatible with the type-hinted return string. Consider adding an additional type-check to rule them out.
Loading history...
5290
    }
5291
5292
    /**
5293
     * Replaces all occurrences of $search in $str by $replacement.
5294
     *
5295
     * @param string       $str            <p>The input string.</p>
5296
     * @param array        $search         <p>The elements to search for.</p>
5297
     * @param array|string $replacement    <p>The string to replace with.</p>
5298
     * @param bool         $case_sensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
5299
     *
5300
     * @psalm-pure
5301
     *
5302
     * @return string
5303
     *                <p>A string with replaced parts.</p>
5304
     */
5305 30
    public static function replace_all(
5306
        string $str,
5307
        array $search,
5308
        $replacement,
5309
        bool $case_sensitive = true
5310
    ): string {
5311 30
        if ($case_sensitive) {
5312 23
            return \str_replace($search, $replacement, $str);
5313
        }
5314
5315 7
        return self::str_ireplace($search, $replacement, $str);
0 ignored issues
show
Bug Best Practice introduced by
The expression return self::str_ireplac...ch, $replacement, $str) could return the type string[] which is incompatible with the type-hinted return string. Consider adding an additional type-check to rule them out.
Loading history...
5316
    }
5317
5318
    /**
5319
     * Replace the diamond question mark (�) and invalid-UTF8 chars with the replacement.
5320
     *
5321
     * EXAMPLE: <code>UTF8::replace_diamond_question_mark('中文空白�', ''); // '中文空白'</code>
5322
     *
5323
     * @param string $str                        <p>The input string</p>
5324
     * @param string $replacement_char           <p>The replacement character.</p>
5325
     * @param bool   $process_invalid_utf8_chars <p>Convert invalid UTF-8 chars </p>
5326
     *
5327
     * @psalm-pure
5328
     *
5329
     * @return string
5330
     *                <p>A string without diamond question marks (�).</p>
5331
     */
5332 35
    public static function replace_diamond_question_mark(
5333
        string $str,
5334
        string $replacement_char = '',
5335
        bool $process_invalid_utf8_chars = true
5336
    ): string {
5337 35
        if ($str === '') {
5338 9
            return '';
5339
        }
5340
5341 35
        if ($process_invalid_utf8_chars) {
5342 35
            if ($replacement_char === '') {
5343 35
                $replacement_char_helper = 'none';
5344
            } else {
5345 2
                $replacement_char_helper = \ord($replacement_char);
5346
            }
5347
5348 35
            if (self::$SUPPORT['mbstring'] === false) {
5349
                // if there is no native support for "mbstring",
5350
                // then we need to clean the string before ...
5351
                $str = self::clean($str);
5352
            }
5353
5354
            /**
5355
             * @psalm-suppress ImpureFunctionCall - we will reset the value in the next step
5356
             */
5357 35
            $save = \mb_substitute_character();
5358
            /** @noinspection PhpUsageOfSilenceOperatorInspection - ignore "Unknown character" warnings, it's working anyway */
5359 35
            @\mb_substitute_character($replacement_char_helper);
0 ignored issues
show
Security Best Practice introduced by
It seems like you do not handle an error condition for mb_substitute_character(). This can introduce security issues, and is generally not recommended. ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-unhandled  annotation

5359
            /** @scrutinizer ignore-unhandled */ @\mb_substitute_character($replacement_char_helper);

If you suppress an error, we recommend checking for the error condition explicitly:

// For example instead of
@mkdir($dir);

// Better use
if (@mkdir($dir) === false) {
    throw new \RuntimeException('The directory '.$dir.' could not be created.');
}
Loading history...
5360
            // the polyfill maybe return false, so cast to string
5361 35
            $str = (string) \mb_convert_encoding($str, 'UTF-8', 'UTF-8');
5362 35
            \mb_substitute_character($save);
0 ignored issues
show
Bug introduced by
It seems like $save can also be of type true; however, parameter $substitute_character of mb_substitute_character() does only seem to accept integer|null|string, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

5362
            \mb_substitute_character(/** @scrutinizer ignore-type */ $save);
Loading history...
5363
        }
5364
5365 35
        return \str_replace(
5366
            [
5367 35
                "\xEF\xBF\xBD",
5368
                '�',
5369
            ],
5370
            [
5371 35
                $replacement_char,
5372 35
                $replacement_char,
5373
            ],
5374 35
            $str
5375
        );
5376
    }
5377
5378
    /**
5379
     * Strip whitespace or other characters from the end of a UTF-8 string.
5380
     *
5381
     * EXAMPLE: <code>UTF8::rtrim('-ABC-中文空白-  '); // '-ABC-中文空白-'</code>
5382
     *
5383
     * @param string      $str   <p>The string to be trimmed.</p>
5384
     * @param string|null $chars <p>Optional characters to be stripped.</p>
5385
     *
5386
     * @psalm-pure
5387
     *
5388
     * @return string
5389
     *                <p>A string with unwanted characters stripped from the right.</p>
5390
     */
5391 21
    public static function rtrim(string $str = '', string $chars = null): string
5392
    {
5393 21
        if ($str === '') {
5394 3
            return '';
5395
        }
5396
5397 20
        if (self::$SUPPORT['mbstring'] === true) {
5398 20
            if ($chars !== null) {
5399
                /** @noinspection PregQuoteUsageInspection */
5400 9
                $chars = \preg_quote($chars);
5401 9
                $pattern = "[${chars}]+$";
5402
            } else {
5403 14
                $pattern = '[\\s]+$';
5404
            }
5405
5406 20
            return (string) \mb_ereg_replace($pattern, '', $str);
5407
        }
5408
5409
        if ($chars !== null) {
5410
            $chars = \preg_quote($chars, '/');
5411
            $pattern = "[${chars}]+$";
5412
        } else {
5413
            $pattern = '[\\s]+$';
5414
        }
5415
5416
        return self::regex_replace($str, $pattern, '');
5417
    }
5418
5419
    /**
5420
     * WARNING: Print native UTF-8 support (libs) by default, e.g. for debugging.
5421
     *
5422
     * @param bool $useEcho
5423
     *
5424
     * @psalm-pure
5425
     *
5426
     * @return string|void
5427
     */
5428 2
    public static function showSupport(bool $useEcho = true)
5429
    {
5430
        // init
5431 2
        $html = '';
5432
5433 2
        $html .= '<pre>';
5434 2
        foreach (self::$SUPPORT as $key => &$value) {
5435 2
            $html .= $key . ' - ' . \print_r($value, true) . "\n<br>";
0 ignored issues
show
Bug introduced by
Are you sure print_r($value, true) of type string|true can be used in concatenation? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

5435
            $html .= $key . ' - ' . /** @scrutinizer ignore-type */ \print_r($value, true) . "\n<br>";
Loading history...
5436
        }
5437 2
        $html .= '</pre>';
5438
5439 2
        if ($useEcho) {
5440 1
            echo $html;
5441
        }
5442
5443 2
        return $html;
5444
    }
5445
5446
    /**
5447
     * Converts a UTF-8 character to HTML Numbered Entity like "&#123;".
5448
     *
5449
     * EXAMPLE: <code>UTF8::single_chr_html_encode('κ'); // '&#954;'</code>
5450
     *
5451
     * @param string $char             <p>The Unicode character to be encoded as numbered entity.</p>
5452
     * @param bool   $keep_ascii_chars <p>Set to <strong>true</strong> to keep ASCII chars.</>
5453
     * @param string $encoding         [optional] <p>Set the charset for e.g. "mb_" function</p>
5454
     *
5455
     * @psalm-pure
5456
     *
5457
     * @return string
5458
     *                <p>The HTML numbered entity for the given character.</p>
5459
     */
5460 2
    public static function single_chr_html_encode(
5461
        string $char,
5462
        bool $keep_ascii_chars = false,
5463
        string $encoding = 'UTF-8'
5464
    ): string {
5465 2
        if ($char === '') {
5466 2
            return '';
5467
        }
5468
5469
        if (
5470 2
            $keep_ascii_chars
5471
            &&
5472 2
            ASCII::is_ascii($char)
5473
        ) {
5474 2
            return $char;
5475
        }
5476
5477 2
        return '&#' . self::ord($char, $encoding) . ';';
5478
    }
5479
5480
    /**
5481
     * @param string $str
5482
     * @param int    $tab_length
5483
     *
5484
     * @psalm-pure
5485
     *
5486
     * @return string
5487
     */
5488 5
    public static function spaces_to_tabs(string $str, int $tab_length = 4): string
5489
    {
5490 5
        if ($tab_length === 4) {
5491 3
            $tab = '    ';
5492 2
        } elseif ($tab_length === 2) {
5493 1
            $tab = '  ';
5494
        } else {
5495 1
            $tab = \str_repeat(' ', $tab_length);
5496
        }
5497
5498 5
        return \str_replace($tab, "\t", $str);
5499
    }
5500
5501
    /**
5502
     * Returns a camelCase version of the string. Trims surrounding spaces,
5503
     * capitalizes letters following digits, spaces, dashes and underscores,
5504
     * and removes spaces, dashes, as well as underscores.
5505
     *
5506
     * @param string      $str                           <p>The input string.</p>
5507
     * @param string      $encoding                      [optional] <p>Default: 'UTF-8'</p>
5508
     * @param bool        $clean_utf8                    [optional] <p>Remove non UTF-8 chars from the string.</p>
5509
     * @param string|null $lang                          [optional] <p>Set the language for special cases: az, el, lt,
5510
     *                                                   tr</p>
5511
     * @param bool        $try_to_keep_the_string_length [optional] <p>true === try to keep the string length: e.g. ẞ
5512
     *                                                   -> ß</p>
5513
     *
5514
     * @psalm-pure
5515
     *
5516
     * @return string
5517
     */
5518 32
    public static function str_camelize(
5519
        string $str,
5520
        string $encoding = 'UTF-8',
5521
        bool $clean_utf8 = false,
5522
        string $lang = null,
5523
        bool $try_to_keep_the_string_length = false
5524
    ): string {
5525 32
        if ($clean_utf8) {
5526
            $str = self::clean($str);
5527
        }
5528
5529 32
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
5530 26
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
5531
        }
5532
5533 32
        $str = self::lcfirst(
5534 32
            \trim($str),
5535 32
            $encoding,
5536 32
            false,
5537 32
            $lang,
5538 32
            $try_to_keep_the_string_length
5539
        );
5540 32
        $str = (string) \preg_replace('/^[-_]+/', '', $str);
5541
5542 32
        $use_mb_functions = $lang === null && !$try_to_keep_the_string_length;
5543
5544 32
        $str = (string) \preg_replace_callback(
5545 32
            '/[-_\\s]+(.)?/u',
5546
            /**
5547
             * @param array $match
5548
             *
5549
             * @psalm-pure
5550
             *
5551
             * @return string
5552
             */
5553
            static function (array $match) use ($use_mb_functions, $encoding, $lang, $try_to_keep_the_string_length): string {
5554 27
                if (isset($match[1])) {
5555 27
                    if ($use_mb_functions) {
5556 27
                        if ($encoding === 'UTF-8') {
5557 27
                            return \mb_strtoupper($match[1]);
5558
                        }
5559
5560
                        return \mb_strtoupper($match[1], $encoding);
5561
                    }
5562
5563
                    return self::strtoupper($match[1], $encoding, false, $lang, $try_to_keep_the_string_length);
5564
                }
5565
5566 1
                return '';
5567 32
            },
5568 32
            $str
5569
        );
5570
5571 32
        return (string) \preg_replace_callback(
5572 32
            '/[\\p{N}]+(.)?/u',
5573
            /**
5574
             * @param array $match
5575
             *
5576
             * @psalm-pure
5577
             *
5578
             * @return string
5579
             */
5580
            static function (array $match) use ($use_mb_functions, $encoding, $clean_utf8, $lang, $try_to_keep_the_string_length): string {
5581 6
                if ($use_mb_functions) {
5582 6
                    if ($encoding === 'UTF-8') {
5583 6
                        return \mb_strtoupper($match[0]);
5584
                    }
5585
5586
                    return \mb_strtoupper($match[0], $encoding);
5587
                }
5588
5589
                return self::strtoupper($match[0], $encoding, $clean_utf8, $lang, $try_to_keep_the_string_length);
5590 32
            },
5591 32
            $str
5592
        );
5593
    }
5594
5595
    /**
5596
     * Returns the string with the first letter of each word capitalized,
5597
     * except for when the word is a name which shouldn't be capitalized.
5598
     *
5599
     * @param string $str
5600
     *
5601
     * @psalm-pure
5602
     *
5603
     * @return string
5604
     *                <p>A string with $str capitalized.</p>
5605
     */
5606 1
    public static function str_capitalize_name(string $str): string
5607
    {
5608 1
        return self::str_capitalize_name_helper(
5609 1
            self::str_capitalize_name_helper(
5610 1
                self::collapse_whitespace($str),
5611 1
                ' '
5612
            ),
5613 1
            '-'
5614
        );
5615
    }
5616
5617
    /**
5618
     * Returns true if the string contains $needle, false otherwise. By default
5619
     * the comparison is case-sensitive, but can be made insensitive by setting
5620
     * $case_sensitive to false.
5621
     *
5622
     * @param string $haystack       <p>The input string.</p>
5623
     * @param string $needle         <p>Substring to look for.</p>
5624
     * @param bool   $case_sensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
5625
     *
5626
     * @psalm-pure
5627
     *
5628
     * @return bool
5629
     *              <p>Whether or not $haystack contains $needle.</p>
5630
     */
5631 21
    public static function str_contains(
5632
        string $haystack,
5633
        string $needle,
5634
        bool $case_sensitive = true
5635
    ): bool {
5636 21
        if ($case_sensitive) {
5637 11
            if (\PHP_VERSION_ID >= 80000) {
5638
                /** @phpstan-ignore-next-line - only for PHP8 */
5639
                return \str_contains($haystack, $needle);
5640
            }
5641
5642 11
            return \strpos($haystack, $needle) !== false;
5643
        }
5644
5645 10
        return \mb_stripos($haystack, $needle) !== false;
5646
    }
5647
5648
    /**
5649
     * Returns true if the string contains all $needles, false otherwise. By
5650
     * default the comparison is case-sensitive, but can be made insensitive by
5651
     * setting $case_sensitive to false.
5652
     *
5653
     * @param string $haystack       <p>The input string.</p>
5654
     * @param array  $needles        <p>SubStrings to look for.</p>
5655
     * @param bool   $case_sensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
5656
     *
5657
     * @psalm-pure
5658
     *
5659
     * @return bool
5660
     *              <p>Whether or not $haystack contains $needle.</p>
5661
     */
5662 45
    public static function str_contains_all(
5663
        string $haystack,
5664
        array $needles,
5665
        bool $case_sensitive = true
5666
    ): bool {
5667 45
        if ($haystack === '' || $needles === []) {
5668 1
            return false;
5669
        }
5670
5671 44
        foreach ($needles as &$needle) {
5672 44
            if ($case_sensitive) {
5673 24
                if (!$needle || \strpos($haystack, $needle) === false) {
5674 12
                    return false;
5675
                }
5676
            }
5677
5678 33
            if (!$needle || \mb_stripos($haystack, $needle) === false) {
5679 33
                return false;
5680
            }
5681
        }
5682
5683 24
        return true;
5684
    }
5685
5686
    /**
5687
     * Returns true if the string contains any $needles, false otherwise. By
5688
     * default the comparison is case-sensitive, but can be made insensitive by
5689
     * setting $case_sensitive to false.
5690
     *
5691
     * @param string $haystack       <p>The input string.</p>
5692
     * @param array  $needles        <p>SubStrings to look for.</p>
5693
     * @param bool   $case_sensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
5694
     *
5695
     * @psalm-pure
5696
     *
5697
     * @return bool
5698
     *              <p>Whether or not $str contains $needle.</p>
5699
     */
5700 46
    public static function str_contains_any(
5701
        string $haystack,
5702
        array $needles,
5703
        bool $case_sensitive = true
5704
    ): bool {
5705 46
        if ($haystack === '' || $needles === []) {
5706 1
            return false;
5707
        }
5708
5709 45
        foreach ($needles as &$needle) {
5710 45
            if (!$needle) {
5711
                continue;
5712
            }
5713
5714 45
            if ($case_sensitive) {
5715 25
                if (\strpos($haystack, $needle) !== false) {
5716 14
                    return true;
5717
                }
5718
5719 13
                continue;
5720
            }
5721
5722 20
            if (\mb_stripos($haystack, $needle) !== false) {
5723 20
                return true;
5724
            }
5725
        }
5726
5727 19
        return false;
5728
    }
5729
5730
    /**
5731
     * Returns a lowercase and trimmed string separated by dashes. Dashes are
5732
     * inserted before uppercase characters (with the exception of the first
5733
     * character of the string), and in place of spaces as well as underscores.
5734
     *
5735
     * @param string $str      <p>The input string.</p>
5736
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
5737
     *
5738
     * @psalm-pure
5739
     *
5740
     * @return string
5741
     */
5742 19
    public static function str_dasherize(string $str, string $encoding = 'UTF-8'): string
5743
    {
5744 19
        return self::str_delimit($str, '-', $encoding);
5745
    }
5746
5747
    /**
5748
     * Returns a lowercase and trimmed string separated by the given delimiter.
5749
     * Delimiters are inserted before uppercase characters (with the exception
5750
     * of the first character of the string), and in place of spaces, dashes,
5751
     * and underscores. Alpha delimiters are not converted to lowercase.
5752
     *
5753
     * @param string      $str                           <p>The input string.</p>
5754
     * @param string      $delimiter                     <p>Sequence used to separate parts of the string.</p>
5755
     * @param string      $encoding                      [optional] <p>Set the charset for e.g. "mb_" function</p>
5756
     * @param bool        $clean_utf8                    [optional] <p>Remove non UTF-8 chars from the string.</p>
5757
     * @param string|null $lang                          [optional] <p>Set the language for special cases: az, el, lt,
5758
     *                                                   tr</p>
5759
     * @param bool        $try_to_keep_the_string_length [optional] <p>true === try to keep the string length: e.g. ẞ ->
5760
     *                                                   ß</p>
5761
     *
5762
     * @psalm-pure
5763
     *
5764
     * @return string
5765
     */
5766 49
    public static function str_delimit(
5767
        string $str,
5768
        string $delimiter,
5769
        string $encoding = 'UTF-8',
5770
        bool $clean_utf8 = false,
5771
        string $lang = null,
5772
        bool $try_to_keep_the_string_length = false
5773
    ): string {
5774 49
        if (self::$SUPPORT['mbstring'] === true) {
5775 49
            $str = (string) \mb_ereg_replace('\\B(\\p{Lu})', '-\1', \trim($str));
5776
5777 49
            $use_mb_functions = $lang === null && !$try_to_keep_the_string_length;
5778 49
            if ($use_mb_functions && $encoding === 'UTF-8') {
5779 22
                $str = \mb_strtolower($str);
5780
            } else {
5781 27
                $str = self::strtolower($str, $encoding, $clean_utf8, $lang, $try_to_keep_the_string_length);
5782
            }
5783
5784 49
            return (string) \mb_ereg_replace('[\\-_\\s]+', $delimiter, $str);
5785
        }
5786
5787
        $str = (string) \preg_replace('/\\B(\\p{Lu})/u', '-\1', \trim($str));
5788
5789
        $use_mb_functions = $lang === null && !$try_to_keep_the_string_length;
5790
        if ($use_mb_functions && $encoding === 'UTF-8') {
5791
            $str = \mb_strtolower($str);
5792
        } else {
5793
            $str = self::strtolower($str, $encoding, $clean_utf8, $lang, $try_to_keep_the_string_length);
5794
        }
5795
5796
        return (string) \preg_replace('/[\\-_\\s]+/u', $delimiter, $str);
5797
    }
5798
5799
    /**
5800
     * Optimized "mb_detect_encoding()"-function -> with support for UTF-16 and UTF-32.
5801
     *
5802
     * EXAMPLE: <code>
5803
     * UTF8::str_detect_encoding('中文空白'); // 'UTF-8'
5804
     * UTF8::str_detect_encoding('Abc'); // 'ASCII'
5805
     * </code>
5806
     *
5807
     * @param string $str <p>The input string.</p>
5808
     *
5809
     * @psalm-pure
5810
     *
5811
     * @return false|string
5812
     *                      <p>
5813
     *                      The detected string-encoding e.g. UTF-8 or UTF-16BE,<br>
5814
     *                      otherwise it will return false e.g. for BINARY or not detected encoding.
5815
     *                      </p>
5816
     */
5817 30
    public static function str_detect_encoding($str)
5818
    {
5819
        // init
5820 30
        $str = (string) $str;
5821
5822
        //
5823
        // 1.) check binary strings (010001001...) like UTF-16 / UTF-32 / PDF / Images / ...
5824
        //
5825
5826 30
        if (self::is_binary($str, self::string_has_bom($str) ? false : true)) {
5827 10
            $is_utf32 = self::is_utf32($str, false);
5828 10
            if ($is_utf32 === 1) {
5829
                return 'UTF-32LE';
5830
            }
5831 10
            if ($is_utf32 === 2) {
5832 1
                return 'UTF-32BE';
5833
            }
5834
5835 10
            $is_utf16 = self::is_utf16($str, false);
5836 10
            if ($is_utf16 === 1) {
5837 3
                return 'UTF-16LE';
5838
            }
5839 10
            if ($is_utf16 === 2) {
5840 2
                return 'UTF-16BE';
5841
            }
5842
5843
            // is binary but not "UTF-16" or "UTF-32"
5844 8
            return false;
5845
        }
5846
5847
        //
5848
        // 2.) simple check for ASCII chars
5849
        //
5850
5851 27
        if (ASCII::is_ascii($str)) {
5852 10
            return 'ASCII';
5853
        }
5854
5855
        //
5856
        // 3.) simple check for UTF-8 chars
5857
        //
5858
5859 27
        if (self::is_utf8_string($str)) {
5860 19
            return 'UTF-8';
5861
        }
5862
5863
        //
5864
        // 4.) check via "mb_detect_encoding()"
5865
        //
5866
        // INFO: UTF-16, UTF-32, UCS2 and UCS4, encoding detection will fail always with "mb_detect_encoding()"
5867
5868
        $encoding_detecting_order = [
5869 16
            'ISO-8859-1',
5870
            'ISO-8859-2',
5871
            'ISO-8859-3',
5872
            'ISO-8859-4',
5873
            'ISO-8859-5',
5874
            'ISO-8859-6',
5875
            'ISO-8859-7',
5876
            'ISO-8859-8',
5877
            'ISO-8859-9',
5878
            'ISO-8859-10',
5879
            'ISO-8859-13',
5880
            'ISO-8859-14',
5881
            'ISO-8859-15',
5882
            'ISO-8859-16',
5883
            'WINDOWS-1251',
5884
            'WINDOWS-1252',
5885
            'WINDOWS-1254',
5886
            'CP932',
5887
            'CP936',
5888
            'CP950',
5889
            'CP866',
5890
            'CP850',
5891
            'CP51932',
5892
            'CP50220',
5893
            'CP50221',
5894
            'CP50222',
5895
            'ISO-2022-JP',
5896
            'ISO-2022-KR',
5897
            'JIS',
5898
            'JIS-ms',
5899
            'EUC-CN',
5900
            'EUC-JP',
5901
        ];
5902
5903 16
        if (self::$SUPPORT['mbstring'] === true) {
5904
            // info: do not use the symfony polyfill here
5905 16
            $encoding = \mb_detect_encoding($str, $encoding_detecting_order, true);
5906 16
            if ($encoding) {
5907 16
                return $encoding;
5908
            }
5909
        }
5910
5911
        //
5912
        // 5.) check via "iconv()"
5913
        //
5914
5915
        if (self::$ENCODINGS === null) {
5916
            self::$ENCODINGS = self::getData('encodings');
5917
        }
5918
5919
        foreach (self::$ENCODINGS as $encoding_tmp) {
5920
            // INFO: //IGNORE but still throw notice
5921
            /** @noinspection PhpUsageOfSilenceOperatorInspection */
5922
            if ((string) @\iconv($encoding_tmp, $encoding_tmp . '//IGNORE', $str) === $str) {
5923
                return $encoding_tmp;
5924
            }
5925
        }
5926
5927
        return false;
5928
    }
5929
5930
    /**
5931
     * Check if the string ends with the given substring.
5932
     *
5933
     * EXAMPLE: <code>
5934
     * UTF8::str_ends_with('BeginMiddleΚόσμε', 'Κόσμε'); // true
5935
     * UTF8::str_ends_with('BeginMiddleΚόσμε', 'κόσμε'); // false
5936
     * </code>
5937
     *
5938
     * @param string $haystack <p>The string to search in.</p>
5939
     * @param string $needle   <p>The substring to search for.</p>
5940
     *
5941
     * @psalm-pure
5942
     *
5943
     * @return bool
5944
     */
5945 9
    public static function str_ends_with(string $haystack, string $needle): bool
5946
    {
5947 9
        if ($needle === '') {
5948 2
            return true;
5949
        }
5950
5951 9
        if ($haystack === '') {
5952
            return false;
5953
        }
5954
5955 9
        if (\PHP_VERSION_ID >= 80000) {
5956
            /** @phpstan-ignore-next-line - only for PHP8 */
5957
            return \str_ends_with($haystack, $needle);
5958
        }
5959
5960 9
        return \substr($haystack, -\strlen($needle)) === $needle;
5961
    }
5962
5963
    /**
5964
     * Returns true if the string ends with any of $substrings, false otherwise.
5965
     *
5966
     * - case-sensitive
5967
     *
5968
     * @param string   $str        <p>The input string.</p>
5969
     * @param string[] $substrings <p>Substrings to look for.</p>
5970
     *
5971
     * @psalm-pure
5972
     *
5973
     * @return bool
5974
     *              <p>Whether or not $str ends with $substring.</p>
5975
     */
5976 7
    public static function str_ends_with_any(string $str, array $substrings): bool
5977
    {
5978 7
        if ($substrings === []) {
5979
            return false;
5980
        }
5981
5982 7
        foreach ($substrings as &$substring) {
5983 7
            if (\substr($str, -\strlen($substring)) === $substring) {
5984 7
                return true;
5985
            }
5986
        }
5987
5988 6
        return false;
5989
    }
5990
5991
    /**
5992
     * Ensures that the string begins with $substring. If it doesn't, it's
5993
     * prepended.
5994
     *
5995
     * @param string $str       <p>The input string.</p>
5996
     * @param string $substring <p>The substring to add if not present.</p>
5997
     *
5998
     * @psalm-pure
5999
     *
6000
     * @return string
6001
     */
6002 10
    public static function str_ensure_left(string $str, string $substring): string
6003
    {
6004
        if (
6005 10
            $substring !== ''
6006
            &&
6007 10
            \strpos($str, $substring) === 0
6008
        ) {
6009 6
            return $str;
6010
        }
6011
6012 4
        return $substring . $str;
6013
    }
6014
6015
    /**
6016
     * Ensures that the string ends with $substring. If it doesn't, it's appended.
6017
     *
6018
     * @param string $str       <p>The input string.</p>
6019
     * @param string $substring <p>The substring to add if not present.</p>
6020
     *
6021
     * @psalm-pure
6022
     *
6023
     * @return string
6024
     */
6025 10
    public static function str_ensure_right(string $str, string $substring): string
6026
    {
6027
        if (
6028 10
            $str === ''
6029
            ||
6030 10
            $substring === ''
6031
            ||
6032 10
            \substr($str, -\strlen($substring)) !== $substring
6033
        ) {
6034 4
            $str .= $substring;
6035
        }
6036
6037 10
        return $str;
6038
    }
6039
6040
    /**
6041
     * Capitalizes the first word of the string, replaces underscores with
6042
     * spaces, and strips '_id'.
6043
     *
6044
     * @param string $str
6045
     *
6046
     * @psalm-pure
6047
     *
6048
     * @return string
6049
     */
6050 3
    public static function str_humanize($str): string
6051
    {
6052 3
        $str = \str_replace(
6053
            [
6054 3
                '_id',
6055
                '_',
6056
            ],
6057
            [
6058 3
                '',
6059
                ' ',
6060
            ],
6061 3
            $str
6062
        );
6063
6064 3
        return self::ucfirst(\trim($str));
6065
    }
6066
6067
    /**
6068
     * Check if the string ends with the given substring, case-insensitive.
6069
     *
6070
     * EXAMPLE: <code>
6071
     * UTF8::str_iends_with('BeginMiddleΚόσμε', 'Κόσμε'); // true
6072
     * UTF8::str_iends_with('BeginMiddleΚόσμε', 'κόσμε'); // true
6073
     * </code>
6074
     *
6075
     * @param string $haystack <p>The string to search in.</p>
6076
     * @param string $needle   <p>The substring to search for.</p>
6077
     *
6078
     * @psalm-pure
6079
     *
6080
     * @return bool
6081
     */
6082 12
    public static function str_iends_with(string $haystack, string $needle): bool
6083
    {
6084 12
        if ($needle === '') {
6085 2
            return true;
6086
        }
6087
6088 12
        if ($haystack === '') {
6089
            return false;
6090
        }
6091
6092 12
        return self::strcasecmp(\substr($haystack, -\strlen($needle)), $needle) === 0;
6093
    }
6094
6095
    /**
6096
     * Returns true if the string ends with any of $substrings, false otherwise.
6097
     *
6098
     * - case-insensitive
6099
     *
6100
     * @param string   $str        <p>The input string.</p>
6101
     * @param string[] $substrings <p>Substrings to look for.</p>
6102
     *
6103
     * @psalm-pure
6104
     *
6105
     * @return bool
6106
     *              <p>Whether or not $str ends with $substring.</p>
6107
     */
6108 4
    public static function str_iends_with_any(string $str, array $substrings): bool
6109
    {
6110 4
        if ($substrings === []) {
6111
            return false;
6112
        }
6113
6114 4
        foreach ($substrings as &$substring) {
6115 4
            if (self::str_iends_with($str, $substring)) {
6116 4
                return true;
6117
            }
6118
        }
6119
6120
        return false;
6121
    }
6122
6123
    /**
6124
     * Inserts $substring into the string at the $index provided.
6125
     *
6126
     * @param string $str       <p>The input string.</p>
6127
     * @param string $substring <p>String to be inserted.</p>
6128
     * @param int    $index     <p>The index at which to insert the substring.</p>
6129
     * @param string $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
6130
     *
6131
     * @psalm-pure
6132
     *
6133
     * @return string
6134
     */
6135 8
    public static function str_insert(
6136
        string $str,
6137
        string $substring,
6138
        int $index,
6139
        string $encoding = 'UTF-8'
6140
    ): string {
6141 8
        if ($encoding === 'UTF-8') {
6142 4
            $len = (int) \mb_strlen($str);
6143 4
            if ($index > $len) {
6144
                return $str;
6145
            }
6146
6147
            /** @noinspection UnnecessaryCastingInspection */
6148 4
            return (string) \mb_substr($str, 0, $index) .
6149 4
                   $substring .
6150 4
                   (string) \mb_substr($str, $index, $len);
6151
        }
6152
6153 4
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
6154
6155 4
        $len = (int) self::strlen($str, $encoding);
6156 4
        if ($index > $len) {
6157 1
            return $str;
6158
        }
6159
6160 3
        return ((string) self::substr($str, 0, $index, $encoding)) .
6161 3
               $substring .
6162 3
               ((string) self::substr($str, $index, $len, $encoding));
6163
    }
6164
6165
    /**
6166
     * Case-insensitive and UTF-8 safe version of <function>str_replace</function>.
6167
     *
6168
     * EXAMPLE: <code>
6169
     * UTF8::str_ireplace('lIzÆ', 'lise', 'Iñtërnâtiônàlizætiøn'); // 'Iñtërnâtiônàlisetiøn'
6170
     * </code>
6171
     *
6172
     * @see http://php.net/manual/en/function.str-ireplace.php
6173
     *
6174
     * @param string|string[] $search      <p>
6175
     *                                     Every replacement with search array is
6176
     *                                     performed on the result of previous replacement.
6177
     *                                     </p>
6178
     * @param string|string[] $replacement <p>The replacement.</p>
6179
     * @param string|string[] $subject     <p>
6180
     *                                     If subject is an array, then the search and
6181
     *                                     replace is performed with every entry of
6182
     *                                     subject, and the return value is an array as
6183
     *                                     well.
6184
     *                                     </p>
6185
     * @param int             $count       [optional] <p>
6186
     *                                     The number of matched and replaced needles will
6187
     *                                     be returned in count which is passed by
6188
     *                                     reference.
6189
     *                                     </p>
6190
     *
6191
     * @psalm-pure
6192
     *
6193
     * @return string|string[]
6194
     *                         <p>A string or an array of replacements.</p>
6195
     *
6196
     * @template TStrIReplaceSubject
6197
     * @phpstan-param TStrIReplaceSubject $subject
6198
     * @phpstan-return TStrIReplaceSubject
6199
     */
6200 29
    public static function str_ireplace($search, $replacement, $subject, &$count = null)
6201
    {
6202 29
        $search = (array) $search;
6203
6204
        /** @noinspection AlterInForeachInspection */
6205 29
        foreach ($search as &$s) {
6206 29
            $s = (string) $s;
6207 29
            if ($s === '') {
6208 6
                $s = '/^(?<=.)$/';
6209
            } else {
6210 29
                $s = '/' . \preg_quote($s, '/') . '/ui';
6211
            }
6212
        }
6213
6214
        // fallback
6215
        /** @phpstan-ignore-next-line - only a fallback for PHP8 */
6216 29
        if ($replacement === null) {
0 ignored issues
show
introduced by
The condition $replacement === null is always false.
Loading history...
6217 1
            $replacement = '';
6218
        }
6219
        /** @phpstan-ignore-next-line - only a fallback for PHP8 */
6220 29
        if ($subject === null) {
6221 1
            $subject = '';
6222
        }
6223
6224
        /**
6225
         * @psalm-suppress PossiblyNullArgument
6226
         * @phpstan-var TStrIReplaceSubject $subject
6227
         */
6228 29
        $subject = \preg_replace($search, $replacement, $subject, -1, $count);
6229
6230 29
        return $subject;
6231
    }
6232
6233
    /**
6234
     * Replaces $search from the beginning of string with $replacement.
6235
     *
6236
     * @param string $str         <p>The input string.</p>
6237
     * @param string $search      <p>The string to search for.</p>
6238
     * @param string $replacement <p>The replacement.</p>
6239
     *
6240
     * @psalm-pure
6241
     *
6242
     * @return string
6243
     *                <p>The string after the replacement.</p>
6244
     */
6245 17
    public static function str_ireplace_beginning(string $str, string $search, string $replacement): string
6246
    {
6247 17
        if ($str === '') {
6248 4
            if ($replacement === '') {
6249 2
                return '';
6250
            }
6251
6252 2
            if ($search === '') {
6253 2
                return $replacement;
6254
            }
6255
        }
6256
6257 13
        if ($search === '') {
6258 2
            return $str . $replacement;
6259
        }
6260
6261 11
        $searchLength = \strlen($search);
6262 11
        if (\strncasecmp($str, $search, $searchLength) === 0) {
6263 10
            return $replacement . \substr($str, $searchLength);
6264
        }
6265
6266 1
        return $str;
6267
    }
6268
6269
    /**
6270
     * Replaces $search from the ending of string with $replacement.
6271
     *
6272
     * @param string $str         <p>The input string.</p>
6273
     * @param string $search      <p>The string to search for.</p>
6274
     * @param string $replacement <p>The replacement.</p>
6275
     *
6276
     * @psalm-pure
6277
     *
6278
     * @return string
6279
     *                <p>The string after the replacement.</p>
6280
     */
6281 17
    public static function str_ireplace_ending(string $str, string $search, string $replacement): string
6282
    {
6283 17
        if ($str === '') {
6284 4
            if ($replacement === '') {
6285 2
                return '';
6286
            }
6287
6288 2
            if ($search === '') {
6289 2
                return $replacement;
6290
            }
6291
        }
6292
6293 13
        if ($search === '') {
6294 2
            return $str . $replacement;
6295
        }
6296
6297 11
        if (\stripos($str, $search, \strlen($str) - \strlen($search)) !== false) {
6298 9
            $str = \substr($str, 0, -\strlen($search)) . $replacement;
6299
        }
6300
6301 11
        return $str;
6302
    }
6303
6304
    /**
6305
     * Check if the string starts with the given substring, case-insensitive.
6306
     *
6307
     * EXAMPLE: <code>
6308
     * UTF8::str_istarts_with('ΚόσμεMiddleEnd', 'Κόσμε'); // true
6309
     * UTF8::str_istarts_with('ΚόσμεMiddleEnd', 'κόσμε'); // true
6310
     * </code>
6311
     *
6312
     * @param string $haystack <p>The string to search in.</p>
6313
     * @param string $needle   <p>The substring to search for.</p>
6314
     *
6315
     * @psalm-pure
6316
     *
6317
     * @return bool
6318
     */
6319 13
    public static function str_istarts_with(string $haystack, string $needle): bool
6320
    {
6321 13
        if ($needle === '') {
6322 2
            return true;
6323
        }
6324
6325 13
        if ($haystack === '') {
6326
            return false;
6327
        }
6328
6329 13
        return self::stripos($haystack, $needle) === 0;
6330
    }
6331
6332
    /**
6333
     * Returns true if the string begins with any of $substrings, false otherwise.
6334
     *
6335
     * - case-insensitive
6336
     *
6337
     * @param string $str        <p>The input string.</p>
6338
     * @param array  $substrings <p>Substrings to look for.</p>
6339
     *
6340
     * @psalm-pure
6341
     *
6342
     * @return bool
6343
     *              <p>Whether or not $str starts with $substring.</p>
6344
     */
6345 5
    public static function str_istarts_with_any(string $str, array $substrings): bool
6346
    {
6347 5
        if ($str === '') {
6348
            return false;
6349
        }
6350
6351 5
        if ($substrings === []) {
6352
            return false;
6353
        }
6354
6355 5
        foreach ($substrings as &$substring) {
6356 5
            if (self::str_istarts_with($str, $substring)) {
6357 5
                return true;
6358
            }
6359
        }
6360
6361 1
        return false;
6362
    }
6363
6364
    /**
6365
     * Gets the substring after the first occurrence of a separator.
6366
     *
6367
     * @param string $str       <p>The input string.</p>
6368
     * @param string $separator <p>The string separator.</p>
6369
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
6370
     *
6371
     * @psalm-pure
6372
     *
6373
     * @return string
6374
     */
6375 1
    public static function str_isubstr_after_first_separator(
6376
        string $str,
6377
        string $separator,
6378
        string $encoding = 'UTF-8'
6379
    ): string {
6380 1
        if ($separator === '' || $str === '') {
6381 1
            return '';
6382
        }
6383
6384 1
        $offset = self::stripos($str, $separator);
6385 1
        if ($offset === false) {
6386 1
            return '';
6387
        }
6388
6389 1
        if ($encoding === 'UTF-8') {
6390 1
            return (string) \mb_substr(
6391 1
                $str,
6392 1
                $offset + (int) \mb_strlen($separator)
6393
            );
6394
        }
6395
6396
        return (string) self::substr(
6397
            $str,
6398
            $offset + (int) self::strlen($separator, $encoding),
6399
            null,
6400
            $encoding
6401
        );
6402
    }
6403
6404
    /**
6405
     * Gets the substring after the last occurrence of a separator.
6406
     *
6407
     * @param string $str       <p>The input string.</p>
6408
     * @param string $separator <p>The string separator.</p>
6409
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
6410
     *
6411
     * @psalm-pure
6412
     *
6413
     * @return string
6414
     */
6415 1
    public static function str_isubstr_after_last_separator(
6416
        string $str,
6417
        string $separator,
6418
        string $encoding = 'UTF-8'
6419
    ): string {
6420 1
        if ($separator === '' || $str === '') {
6421 1
            return '';
6422
        }
6423
6424 1
        $offset = self::strripos($str, $separator);
6425 1
        if ($offset === false) {
6426 1
            return '';
6427
        }
6428
6429 1
        if ($encoding === 'UTF-8') {
6430 1
            return (string) \mb_substr(
6431 1
                $str,
6432 1
                $offset + (int) self::strlen($separator)
6433
            );
6434
        }
6435
6436
        return (string) self::substr(
6437
            $str,
6438
            $offset + (int) self::strlen($separator, $encoding),
6439
            null,
6440
            $encoding
6441
        );
6442
    }
6443
6444
    /**
6445
     * Gets the substring before the first occurrence of a separator.
6446
     *
6447
     * @param string $str       <p>The input string.</p>
6448
     * @param string $separator <p>The string separator.</p>
6449
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
6450
     *
6451
     * @psalm-pure
6452
     *
6453
     * @return string
6454
     */
6455 1
    public static function str_isubstr_before_first_separator(
6456
        string $str,
6457
        string $separator,
6458
        string $encoding = 'UTF-8'
6459
    ): string {
6460 1
        if ($separator === '' || $str === '') {
6461 1
            return '';
6462
        }
6463
6464 1
        $offset = self::stripos($str, $separator);
6465 1
        if ($offset === false) {
6466 1
            return '';
6467
        }
6468
6469 1
        if ($encoding === 'UTF-8') {
6470 1
            return (string) \mb_substr($str, 0, $offset);
6471
        }
6472
6473
        return (string) self::substr($str, 0, $offset, $encoding);
6474
    }
6475
6476
    /**
6477
     * Gets the substring before the last occurrence of a separator.
6478
     *
6479
     * @param string $str       <p>The input string.</p>
6480
     * @param string $separator <p>The string separator.</p>
6481
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
6482
     *
6483
     * @psalm-pure
6484
     *
6485
     * @return string
6486
     */
6487 1
    public static function str_isubstr_before_last_separator(
6488
        string $str,
6489
        string $separator,
6490
        string $encoding = 'UTF-8'
6491
    ): string {
6492 1
        if ($separator === '' || $str === '') {
6493 1
            return '';
6494
        }
6495
6496 1
        if ($encoding === 'UTF-8') {
6497 1
            $offset = \mb_strripos($str, $separator);
6498 1
            if ($offset === false) {
6499 1
                return '';
6500
            }
6501
6502 1
            return (string) \mb_substr($str, 0, $offset);
6503
        }
6504
6505
        $offset = self::strripos($str, $separator, 0, $encoding);
6506
        if ($offset === false) {
6507
            return '';
6508
        }
6509
6510
        return (string) self::substr($str, 0, $offset, $encoding);
6511
    }
6512
6513
    /**
6514
     * Gets the substring after (or before via "$before_needle") the first occurrence of the "$needle".
6515
     *
6516
     * @param string $str           <p>The input string.</p>
6517
     * @param string $needle        <p>The string to look for.</p>
6518
     * @param bool   $before_needle [optional] <p>Default: false</p>
6519
     * @param string $encoding      [optional] <p>Default: 'UTF-8'</p>
6520
     *
6521
     * @psalm-pure
6522
     *
6523
     * @return string
6524
     */
6525 2
    public static function str_isubstr_first(
6526
        string $str,
6527
        string $needle,
6528
        bool $before_needle = false,
6529
        string $encoding = 'UTF-8'
6530
    ): string {
6531
        if (
6532 2
            $needle === ''
6533
            ||
6534 2
            $str === ''
6535
        ) {
6536 2
            return '';
6537
        }
6538
6539 2
        $part = self::stristr(
6540 2
            $str,
6541 2
            $needle,
6542 2
            $before_needle,
6543 2
            $encoding
6544
        );
6545 2
        if ($part === false) {
6546 2
            return '';
6547
        }
6548
6549 2
        return $part;
6550
    }
6551
6552
    /**
6553
     * Gets the substring after (or before via "$before_needle") the last occurrence of the "$needle".
6554
     *
6555
     * @param string $str           <p>The input string.</p>
6556
     * @param string $needle        <p>The string to look for.</p>
6557
     * @param bool   $before_needle [optional] <p>Default: false</p>
6558
     * @param string $encoding      [optional] <p>Default: 'UTF-8'</p>
6559
     *
6560
     * @psalm-pure
6561
     *
6562
     * @return string
6563
     */
6564 1
    public static function str_isubstr_last(
6565
        string $str,
6566
        string $needle,
6567
        bool $before_needle = false,
6568
        string $encoding = 'UTF-8'
6569
    ): string {
6570
        if (
6571 1
            $needle === ''
6572
            ||
6573 1
            $str === ''
6574
        ) {
6575 1
            return '';
6576
        }
6577
6578 1
        $part = self::strrichr(
6579 1
            $str,
6580 1
            $needle,
6581 1
            $before_needle,
6582 1
            $encoding
6583
        );
6584 1
        if ($part === false) {
6585 1
            return '';
6586
        }
6587
6588 1
        return $part;
6589
    }
6590
6591
    /**
6592
     * Returns the last $n characters of the string.
6593
     *
6594
     * @param string $str      <p>The input string.</p>
6595
     * @param int    $n        <p>Number of characters to retrieve from the end.</p>
6596
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6597
     *
6598
     * @psalm-pure
6599
     *
6600
     * @return string
6601
     */
6602 12
    public static function str_last_char(
6603
        string $str,
6604
        int $n = 1,
6605
        string $encoding = 'UTF-8'
6606
    ): string {
6607 12
        if ($str === '' || $n <= 0) {
6608 4
            return '';
6609
        }
6610
6611 8
        if ($encoding === 'UTF-8') {
6612 4
            return (string) \mb_substr($str, -$n);
6613
        }
6614
6615 4
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
6616
6617 4
        return (string) self::substr($str, -$n, null, $encoding);
6618
    }
6619
6620
    /**
6621
     * Limit the number of characters in a string.
6622
     *
6623
     * @param string $str        <p>The input string.</p>
6624
     * @param int    $length     [optional] <p>Default: 100</p>
6625
     * @param string $str_add_on [optional] <p>Default: …</p>
6626
     * @param string $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
6627
     *
6628
     * @psalm-pure
6629
     *
6630
     * @return string
6631
     */
6632 2
    public static function str_limit(
6633
        string $str,
6634
        int $length = 100,
6635
        string $str_add_on = '…',
6636
        string $encoding = 'UTF-8'
6637
    ): string {
6638 2
        if ($str === '' || $length <= 0) {
6639 2
            return '';
6640
        }
6641
6642 2
        if ($encoding === 'UTF-8') {
6643 2
            if ((int) \mb_strlen($str) <= $length) {
6644 2
                return $str;
6645
            }
6646
6647
            /** @noinspection UnnecessaryCastingInspection */
6648 2
            return (string) \mb_substr($str, 0, $length - (int) self::strlen($str_add_on)) . $str_add_on;
6649
        }
6650
6651
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
6652
6653
        if ((int) self::strlen($str, $encoding) <= $length) {
6654
            return $str;
6655
        }
6656
6657
        return ((string) self::substr($str, 0, $length - (int) self::strlen($str_add_on), $encoding)) . $str_add_on;
6658
    }
6659
6660
    /**
6661
     * Limit the number of characters in a string, but also after the next word.
6662
     *
6663
     * EXAMPLE: <code>UTF8::str_limit_after_word('fòô bàř fòô', 8, ''); // 'fòô bàř'</code>
6664
     *
6665
     * @param string $str        <p>The input string.</p>
6666
     * @param int    $length     [optional] <p>Default: 100</p>
6667
     * @param string $str_add_on [optional] <p>Default: …</p>
6668
     * @param string $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
6669
     *
6670
     * @psalm-pure
6671
     *
6672
     * @return string
6673
     */
6674 6
    public static function str_limit_after_word(
6675
        string $str,
6676
        int $length = 100,
6677
        string $str_add_on = '…',
6678
        string $encoding = 'UTF-8'
6679
    ): string {
6680 6
        if ($str === '' || $length <= 0) {
6681 2
            return '';
6682
        }
6683
6684 6
        if ($encoding === 'UTF-8') {
6685 2
            if ((int) \mb_strlen($str) <= $length) {
6686 2
                return $str;
6687
            }
6688
6689 2
            if (\mb_substr($str, $length - 1, 1) === ' ') {
6690 2
                return ((string) \mb_substr($str, 0, $length - 1)) . $str_add_on;
6691
            }
6692
6693 2
            $str = \mb_substr($str, 0, $length);
6694
6695 2
            $array = \explode(' ', $str, -1);
6696 2
            $new_str = \implode(' ', $array);
6697
6698 2
            if ($new_str === '') {
6699 2
                return ((string) \mb_substr($str, 0, $length - 1)) . $str_add_on;
6700
            }
6701
        } else {
6702 4
            if ((int) self::strlen($str, $encoding) <= $length) {
6703
                return $str;
6704
            }
6705
6706 4
            if (self::substr($str, $length - 1, 1, $encoding) === ' ') {
6707 3
                return ((string) self::substr($str, 0, $length - 1, $encoding)) . $str_add_on;
6708
            }
6709
6710
            /** @noinspection CallableParameterUseCaseInTypeContextInspection - FP */
6711 1
            $str = self::substr($str, 0, $length, $encoding);
6712 1
            if ($str === false) {
6713
                return '' . $str_add_on;
6714
            }
6715
6716 1
            $array = \explode(' ', $str, -1);
6717 1
            $new_str = \implode(' ', $array);
6718
6719 1
            if ($new_str === '') {
6720
                return ((string) self::substr($str, 0, $length - 1, $encoding)) . $str_add_on;
6721
            }
6722
        }
6723
6724 3
        return $new_str . $str_add_on;
6725
    }
6726
6727
    /**
6728
     * Returns the longest common prefix between the $str1 and $str2.
6729
     *
6730
     * @param string $str1     <p>The input sting.</p>
6731
     * @param string $str2     <p>Second string for comparison.</p>
6732
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6733
     *
6734
     * @psalm-pure
6735
     *
6736
     * @return string
6737
     */
6738 10
    public static function str_longest_common_prefix(
6739
        string $str1,
6740
        string $str2,
6741
        string $encoding = 'UTF-8'
6742
    ): string {
6743
        // init
6744 10
        $longest_common_prefix = '';
6745
6746 10
        if ($encoding === 'UTF-8') {
6747 5
            $max_length = (int) \min(
6748 5
                \mb_strlen($str1),
6749 5
                \mb_strlen($str2)
6750
            );
6751
6752 5
            for ($i = 0; $i < $max_length; ++$i) {
6753 4
                $char = \mb_substr($str1, $i, 1);
6754
6755
                if (
6756 4
                    $char !== false
6757
                    &&
6758 4
                    $char === \mb_substr($str2, $i, 1)
6759
                ) {
6760 3
                    $longest_common_prefix .= $char;
6761
                } else {
6762 3
                    break;
6763
                }
6764
            }
6765
        } else {
6766 5
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
6767
6768 5
            $max_length = (int) \min(
6769 5
                self::strlen($str1, $encoding),
6770 5
                self::strlen($str2, $encoding)
6771
            );
6772
6773 5
            for ($i = 0; $i < $max_length; ++$i) {
6774 4
                $char = self::substr($str1, $i, 1, $encoding);
6775
6776
                if (
6777 4
                    $char !== false
6778
                    &&
6779 4
                    $char === self::substr($str2, $i, 1, $encoding)
6780
                ) {
6781 3
                    $longest_common_prefix .= $char;
6782
                } else {
6783 3
                    break;
6784
                }
6785
            }
6786
        }
6787
6788 10
        return $longest_common_prefix;
6789
    }
6790
6791
    /**
6792
     * Returns the longest common substring between the $str1 and $str2.
6793
     * In the case of ties, it returns that which occurs first.
6794
     *
6795
     * @param string $str1
6796
     * @param string $str2     <p>Second string for comparison.</p>
6797
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6798
     *
6799
     * @psalm-pure
6800
     *
6801
     * @return string
6802
     *                <p>A string with its $str being the longest common substring.</p>
6803
     */
6804 11
    public static function str_longest_common_substring(
6805
        string $str1,
6806
        string $str2,
6807
        string $encoding = 'UTF-8'
6808
    ): string {
6809 11
        if ($str1 === '' || $str2 === '') {
6810 2
            return '';
6811
        }
6812
6813
        // Uses dynamic programming to solve
6814
        // http://en.wikipedia.org/wiki/Longest_common_substring_problem
6815
6816 9
        if ($encoding === 'UTF-8') {
6817 4
            $str_length = (int) \mb_strlen($str1);
6818 4
            $other_length = (int) \mb_strlen($str2);
6819
        } else {
6820 5
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
6821
6822 5
            $str_length = (int) self::strlen($str1, $encoding);
6823 5
            $other_length = (int) self::strlen($str2, $encoding);
6824
        }
6825
6826
        // Return if either string is empty
6827 9
        if ($str_length === 0 || $other_length === 0) {
6828
            return '';
6829
        }
6830
6831 9
        $len = 0;
6832 9
        $end = 0;
6833 9
        $table = \array_fill(
6834 9
            0,
6835 9
            $str_length + 1,
6836 9
            \array_fill(0, $other_length + 1, 0)
6837
        );
6838
6839 9
        if ($encoding === 'UTF-8') {
6840 9
            for ($i = 1; $i <= $str_length; ++$i) {
6841 9
                for ($j = 1; $j <= $other_length; ++$j) {
6842 9
                    $str_char = \mb_substr($str1, $i - 1, 1);
6843 9
                    $other_char = \mb_substr($str2, $j - 1, 1);
6844
6845 9
                    if ($str_char === $other_char) {
6846 8
                        $table[$i][$j] = $table[$i - 1][$j - 1] + 1;
6847 8
                        if ($table[$i][$j] > $len) {
6848 8
                            $len = $table[$i][$j];
6849 8
                            $end = $i;
6850
                        }
6851
                    } else {
6852 9
                        $table[$i][$j] = 0;
6853
                    }
6854
                }
6855
            }
6856
        } else {
6857
            for ($i = 1; $i <= $str_length; ++$i) {
6858
                for ($j = 1; $j <= $other_length; ++$j) {
6859
                    $str_char = self::substr($str1, $i - 1, 1, $encoding);
6860
                    $other_char = self::substr($str2, $j - 1, 1, $encoding);
6861
6862
                    if ($str_char === $other_char) {
6863
                        $table[$i][$j] = $table[$i - 1][$j - 1] + 1;
6864
                        if ($table[$i][$j] > $len) {
6865
                            $len = $table[$i][$j];
6866
                            $end = $i;
6867
                        }
6868
                    } else {
6869
                        $table[$i][$j] = 0;
6870
                    }
6871
                }
6872
            }
6873
        }
6874
6875 9
        if ($encoding === 'UTF-8') {
6876 9
            return (string) \mb_substr($str1, $end - $len, $len);
6877
        }
6878
6879
        return (string) self::substr($str1, $end - $len, $len, $encoding);
6880
    }
6881
6882
    /**
6883
     * Returns the longest common suffix between the $str1 and $str2.
6884
     *
6885
     * @param string $str1
6886
     * @param string $str2     <p>Second string for comparison.</p>
6887
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6888
     *
6889
     * @psalm-pure
6890
     *
6891
     * @return string
6892
     */
6893 10
    public static function str_longest_common_suffix(
6894
        string $str1,
6895
        string $str2,
6896
        string $encoding = 'UTF-8'
6897
    ): string {
6898 10
        if ($str1 === '' || $str2 === '') {
6899 2
            return '';
6900
        }
6901
6902 8
        if ($encoding === 'UTF-8') {
6903 4
            $max_length = (int) \min(
6904 4
                \mb_strlen($str1, $encoding),
6905 4
                \mb_strlen($str2, $encoding)
6906
            );
6907
6908 4
            $longest_common_suffix = '';
6909 4
            for ($i = 1; $i <= $max_length; ++$i) {
6910 4
                $char = \mb_substr($str1, -$i, 1);
6911
6912
                if (
6913 4
                    $char !== false
6914
                    &&
6915 4
                    $char === \mb_substr($str2, -$i, 1)
6916
                ) {
6917 3
                    $longest_common_suffix = $char . $longest_common_suffix;
6918
                } else {
6919 3
                    break;
6920
                }
6921
            }
6922
        } else {
6923 4
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
6924
6925 4
            $max_length = (int) \min(
6926 4
                self::strlen($str1, $encoding),
6927 4
                self::strlen($str2, $encoding)
6928
            );
6929
6930 4
            $longest_common_suffix = '';
6931 4
            for ($i = 1; $i <= $max_length; ++$i) {
6932 4
                $char = self::substr($str1, -$i, 1, $encoding);
6933
6934
                if (
6935 4
                    $char !== false
6936
                    &&
6937 4
                    $char === self::substr($str2, -$i, 1, $encoding)
6938
                ) {
6939 3
                    $longest_common_suffix = $char . $longest_common_suffix;
6940
                } else {
6941 3
                    break;
6942
                }
6943
            }
6944
        }
6945
6946 8
        return $longest_common_suffix;
6947
    }
6948
6949
    /**
6950
     * Returns true if $str matches the supplied pattern, false otherwise.
6951
     *
6952
     * @param string $str     <p>The input string.</p>
6953
     * @param string $pattern <p>Regex pattern to match against.</p>
6954
     *
6955
     * @psalm-pure
6956
     *
6957
     * @return bool
6958
     *              <p>Whether or not $str matches the pattern.</p>
6959
     */
6960 10
    public static function str_matches_pattern(string $str, string $pattern): bool
6961
    {
6962 10
        return (bool) \preg_match('/' . $pattern . '/u', $str);
6963
    }
6964
6965
    /**
6966
     * Returns whether or not a character exists at an index. Offsets may be
6967
     * negative to count from the last character in the string. Implements
6968
     * part of the ArrayAccess interface.
6969
     *
6970
     * @param string $str      <p>The input string.</p>
6971
     * @param int    $offset   <p>The index to check.</p>
6972
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6973
     *
6974
     * @psalm-pure
6975
     *
6976
     * @return bool
6977
     *              <p>Whether or not the index exists.</p>
6978
     */
6979 6
    public static function str_offset_exists(string $str, int $offset, string $encoding = 'UTF-8'): bool
6980
    {
6981
        // init
6982 6
        $length = (int) self::strlen($str, $encoding);
6983
6984 6
        if ($offset >= 0) {
6985 3
            return $length > $offset;
6986
        }
6987
6988 3
        return $length >= \abs($offset);
6989
    }
6990
6991
    /**
6992
     * Returns the character at the given index. Offsets may be negative to
6993
     * count from the last character in the string. Implements part of the
6994
     * ArrayAccess interface, and throws an OutOfBoundsException if the index
6995
     * does not exist.
6996
     *
6997
     * @param string $str      <p>The input string.</p>
6998
     * @param int    $index    <p>The <strong>index</strong> from which to retrieve the char.</p>
6999
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
7000
     *
7001
     * @throws \OutOfBoundsException if the positive or negative offset does not exist
7002
     *
7003
     * @return string
7004
     *                <p>The character at the specified index.</p>
7005
     *
7006
     * @psalm-pure
7007
     */
7008 2
    public static function str_offset_get(string $str, int $index, string $encoding = 'UTF-8'): string
7009
    {
7010
        // init
7011 2
        $length = (int) self::strlen($str);
7012
7013
        if (
7014 2
            ($index >= 0 && $length <= $index)
7015
            ||
7016 2
            $length < \abs($index)
7017
        ) {
7018 1
            throw new \OutOfBoundsException('No character exists at the index');
7019
        }
7020
7021 1
        return self::char_at($str, $index, $encoding);
7022
    }
7023
7024
    /**
7025
     * Pad a UTF-8 string to a given length with another string.
7026
     *
7027
     * EXAMPLE: <code>UTF8::str_pad('中文空白', 10, '_', STR_PAD_BOTH); // '___中文空白___'</code>
7028
     *
7029
     * @param string     $str        <p>The input string.</p>
7030
     * @param int        $pad_length <p>The length of return string.</p>
7031
     * @param string     $pad_string [optional] <p>String to use for padding the input string.</p>
7032
     * @param int|string $pad_type   [optional] <p>
7033
     *                               Can be <strong>STR_PAD_RIGHT</strong> (default), [or string "right"]<br>
7034
     *                               <strong>STR_PAD_LEFT</strong> [or string "left"] or<br>
7035
     *                               <strong>STR_PAD_BOTH</strong> [or string "both"]
7036
     *                               </p>
7037
     * @param string     $encoding   [optional] <p>Default: 'UTF-8'</p>
7038
     *
7039
     * @psalm-pure
7040
     *
7041
     * @return string
7042
     *                <p>Returns the padded string.</p>
7043
     */
7044 41
    public static function str_pad(
7045
        string $str,
7046
        int $pad_length,
7047
        string $pad_string = ' ',
7048
        $pad_type = \STR_PAD_RIGHT,
7049
        string $encoding = 'UTF-8'
7050
    ): string {
7051 41
        if ($pad_length === 0 || $pad_string === '') {
7052 1
            return $str;
7053
        }
7054
7055 41
        if ($pad_type !== (int) $pad_type) {
7056 13
            if ($pad_type === 'left') {
7057 3
                $pad_type = \STR_PAD_LEFT;
7058 10
            } elseif ($pad_type === 'right') {
7059 6
                $pad_type = \STR_PAD_RIGHT;
7060 4
            } elseif ($pad_type === 'both') {
7061 3
                $pad_type = \STR_PAD_BOTH;
7062
            } else {
7063 1
                throw new \InvalidArgumentException(
7064 1
                    'Pad expects $pad_type to be "STR_PAD_*" or ' . "to be one of 'left', 'right' or 'both'"
7065
                );
7066
            }
7067
        }
7068
7069 40
        if ($encoding === 'UTF-8') {
7070 25
            $str_length = (int) \mb_strlen($str);
7071
7072 25
            if ($pad_length >= $str_length) {
7073
                switch ($pad_type) {
7074 25
                    case \STR_PAD_LEFT:
7075 8
                        $ps_length = (int) \mb_strlen($pad_string);
7076
7077 8
                        $diff = ($pad_length - $str_length);
7078
7079 8
                        $pre = (string) \mb_substr(
7080 8
                            \str_repeat($pad_string, (int) \ceil($diff / $ps_length)),
7081 8
                            0,
7082 8
                            $diff
7083
                        );
7084 8
                        $post = '';
7085
7086 8
                        break;
7087
7088 20
                    case \STR_PAD_BOTH:
7089 14
                        $diff = ($pad_length - $str_length);
7090
7091 14
                        $ps_length_left = (int) \floor($diff / 2);
7092
7093 14
                        $ps_length_right = (int) \ceil($diff / 2);
7094
7095 14
                        $pre = (string) \mb_substr(
7096 14
                            \str_repeat($pad_string, $ps_length_left),
7097 14
                            0,
7098 14
                            $ps_length_left
7099
                        );
7100 14
                        $post = (string) \mb_substr(
7101 14
                            \str_repeat($pad_string, $ps_length_right),
7102 14
                            0,
7103 14
                            $ps_length_right
7104
                        );
7105
7106 14
                        break;
7107
7108 9
                    case \STR_PAD_RIGHT:
7109
                    default:
7110 9
                        $ps_length = (int) \mb_strlen($pad_string);
7111
7112 9
                        $diff = ($pad_length - $str_length);
7113
7114 9
                        $post = (string) \mb_substr(
7115 9
                            \str_repeat($pad_string, (int) \ceil($diff / $ps_length)),
7116 9
                            0,
7117 9
                            $diff
7118
                        );
7119 9
                        $pre = '';
7120
                }
7121
7122 25
                return $pre . $str . $post;
7123
            }
7124
7125 3
            return $str;
7126
        }
7127
7128 15
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
7129
7130 15
        $str_length = (int) self::strlen($str, $encoding);
7131
7132 15
        if ($pad_length >= $str_length) {
7133
            switch ($pad_type) {
7134 14
                case \STR_PAD_LEFT:
7135 5
                    $ps_length = (int) self::strlen($pad_string, $encoding);
7136
7137 5
                    $diff = ($pad_length - $str_length);
7138
7139 5
                    $pre = (string) self::substr(
7140 5
                        \str_repeat($pad_string, (int) \ceil($diff / $ps_length)),
7141 5
                        0,
7142 5
                        $diff,
7143 5
                        $encoding
7144
                    );
7145 5
                    $post = '';
7146
7147 5
                    break;
7148
7149 9
                case \STR_PAD_BOTH:
7150 3
                    $diff = ($pad_length - $str_length);
7151
7152 3
                    $ps_length_left = (int) \floor($diff / 2);
7153
7154 3
                    $ps_length_right = (int) \ceil($diff / 2);
7155
7156 3
                    $pre = (string) self::substr(
7157 3
                        \str_repeat($pad_string, $ps_length_left),
7158 3
                        0,
7159 3
                        $ps_length_left,
7160 3
                        $encoding
7161
                    );
7162 3
                    $post = (string) self::substr(
7163 3
                        \str_repeat($pad_string, $ps_length_right),
7164 3
                        0,
7165 3
                        $ps_length_right,
7166 3
                        $encoding
7167
                    );
7168
7169 3
                    break;
7170
7171 6
                case \STR_PAD_RIGHT:
7172
                default:
7173 6
                    $ps_length = (int) self::strlen($pad_string, $encoding);
7174
7175 6
                    $diff = ($pad_length - $str_length);
7176
7177 6
                    $post = (string) self::substr(
7178 6
                        \str_repeat($pad_string, (int) \ceil($diff / $ps_length)),
7179 6
                        0,
7180 6
                        $diff,
7181 6
                        $encoding
7182
                    );
7183 6
                    $pre = '';
7184
            }
7185
7186 14
            return $pre . $str . $post;
7187
        }
7188
7189 1
        return $str;
7190
    }
7191
7192
    /**
7193
     * Returns a new string of a given length such that both sides of the
7194
     * string are padded. Alias for "UTF8::str_pad()" with a $pad_type of 'both'.
7195
     *
7196
     * @param string $str
7197
     * @param int    $length   <p>Desired string length after padding.</p>
7198
     * @param string $pad_str  [optional] <p>String used to pad, defaults to space. Default: ' '</p>
7199
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
7200
     *
7201
     * @psalm-pure
7202
     *
7203
     * @return string
7204
     *                <p>The string with padding applied.</p>
7205
     */
7206 11
    public static function str_pad_both(
7207
        string $str,
7208
        int $length,
7209
        string $pad_str = ' ',
7210
        string $encoding = 'UTF-8'
7211
    ): string {
7212 11
        return self::str_pad(
7213 11
            $str,
7214 11
            $length,
7215 11
            $pad_str,
7216 11
            \STR_PAD_BOTH,
7217 11
            $encoding
7218
        );
7219
    }
7220
7221
    /**
7222
     * Returns a new string of a given length such that the beginning of the
7223
     * string is padded. Alias for "UTF8::str_pad()" with a $pad_type of 'left'.
7224
     *
7225
     * @param string $str
7226
     * @param int    $length   <p>Desired string length after padding.</p>
7227
     * @param string $pad_str  [optional] <p>String used to pad, defaults to space. Default: ' '</p>
7228
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
7229
     *
7230
     * @psalm-pure
7231
     *
7232
     * @return string
7233
     *                <p>The string with left padding.</p>
7234
     */
7235 7
    public static function str_pad_left(
7236
        string $str,
7237
        int $length,
7238
        string $pad_str = ' ',
7239
        string $encoding = 'UTF-8'
7240
    ): string {
7241 7
        return self::str_pad(
7242 7
            $str,
7243 7
            $length,
7244 7
            $pad_str,
7245 7
            \STR_PAD_LEFT,
7246 7
            $encoding
7247
        );
7248
    }
7249
7250
    /**
7251
     * Returns a new string of a given length such that the end of the string
7252
     * is padded. Alias for "UTF8::str_pad()" with a $pad_type of 'right'.
7253
     *
7254
     * @param string $str
7255
     * @param int    $length   <p>Desired string length after padding.</p>
7256
     * @param string $pad_str  [optional] <p>String used to pad, defaults to space. Default: ' '</p>
7257
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
7258
     *
7259
     * @psalm-pure
7260
     *
7261
     * @return string
7262
     *                <p>The string with right padding.</p>
7263
     */
7264 7
    public static function str_pad_right(
7265
        string $str,
7266
        int $length,
7267
        string $pad_str = ' ',
7268
        string $encoding = 'UTF-8'
7269
    ): string {
7270 7
        return self::str_pad(
7271 7
            $str,
7272 7
            $length,
7273 7
            $pad_str,
7274 7
            \STR_PAD_RIGHT,
7275 7
            $encoding
7276
        );
7277
    }
7278
7279
    /**
7280
     * Repeat a string.
7281
     *
7282
     * EXAMPLE: <code>UTF8::str_repeat("°~\xf0\x90\x28\xbc", 2); // '°~ð(¼°~ð(¼'</code>
7283
     *
7284
     * @param string $str        <p>
7285
     *                           The string to be repeated.
7286
     *                           </p>
7287
     * @param int    $multiplier <p>
7288
     *                           Number of time the input string should be
7289
     *                           repeated.
7290
     *                           </p>
7291
     *                           <p>
7292
     *                           multiplier has to be greater than or equal to 0.
7293
     *                           If the multiplier is set to 0, the function
7294
     *                           will return an empty string.
7295
     *                           </p>
7296
     *
7297
     * @psalm-pure
7298
     *
7299
     * @return string
7300
     *                <p>The repeated string.</p>
7301
     */
7302 9
    public static function str_repeat(string $str, int $multiplier): string
7303
    {
7304 9
        $str = self::filter($str);
7305
7306 9
        return \str_repeat($str, $multiplier);
7307
    }
7308
7309
    /**
7310
     * INFO: This is only a wrapper for "str_replace()"  -> the original functions is already UTF-8 safe.
7311
     *
7312
     * Replace all occurrences of the search string with the replacement string
7313
     *
7314
     * @see http://php.net/manual/en/function.str-replace.php
7315
     *
7316
     * @param string|string[] $search  <p>
7317
     *                                 The value being searched for, otherwise known as the needle.
7318
     *                                 An array may be used to designate multiple needles.
7319
     *                                 </p>
7320
     * @param string|string[] $replace <p>
7321
     *                                 The replacement value that replaces found search
7322
     *                                 values. An array may be used to designate multiple replacements.
7323
     *                                 </p>
7324
     * @param string|string[] $subject <p>
7325
     *                                 The string or array of strings being searched and replaced on,
7326
     *                                 otherwise known as the haystack.
7327
     *                                 </p>
7328
     *                                 <p>
7329
     *                                 If subject is an array, then the search and
7330
     *                                 replace is performed with every entry of
7331
     *                                 subject, and the return value is an array as
7332
     *                                 well.
7333
     *                                 </p>
7334
     * @param int|null        $count   [optional] <p>
7335
     *                                 If passed, this will hold the number of matched and replaced needles.
7336
     *                                 </p>
7337
     *
7338
     * @psalm-pure
7339
     *
7340
     * @return string|string[]
7341
     *                         <p>This function returns a string or an array with the replaced values.</p>
7342
     *
7343
     * @template TStrReplaceSubject
7344
     * @phpstan-param TStrReplaceSubject $subject
7345
     * @phpstan-return TStrReplaceSubject
7346
     *
7347
     * @deprecated please use \str_replace() instead
7348
     */
7349 12
    public static function str_replace(
7350
        $search,
7351
        $replace,
7352
        $subject,
7353
        int &$count = null
7354
    ) {
7355
        /**
7356
         * @psalm-suppress PossiblyNullArgument
7357
         * @phpstan-var TStrReplaceSubject $return;
7358
         */
7359 12
        $return = \str_replace(
7360 12
            $search,
7361 12
            $replace,
7362 12
            $subject,
7363 12
            $count
7364
        );
7365
7366 12
        return $return;
7367
    }
7368
7369
    /**
7370
     * Replaces $search from the beginning of string with $replacement.
7371
     *
7372
     * @param string $str         <p>The input string.</p>
7373
     * @param string $search      <p>The string to search for.</p>
7374
     * @param string $replacement <p>The replacement.</p>
7375
     *
7376
     * @psalm-pure
7377
     *
7378
     * @return string
7379
     *                <p>A string after the replacements.</p>
7380
     */
7381 17
    public static function str_replace_beginning(
7382
        string $str,
7383
        string $search,
7384
        string $replacement
7385
    ): string {
7386 17
        if ($str === '') {
7387 4
            if ($replacement === '') {
7388 2
                return '';
7389
            }
7390
7391 2
            if ($search === '') {
7392 2
                return $replacement;
7393
            }
7394
        }
7395
7396 13
        if ($search === '') {
7397 2
            return $str . $replacement;
7398
        }
7399
7400 11
        $searchLength = \strlen($search);
7401 11
        if (\strncmp($str, $search, $searchLength) === 0) {
7402 9
            return $replacement . \substr($str, $searchLength);
7403
        }
7404
7405 2
        return $str;
7406
    }
7407
7408
    /**
7409
     * Replaces $search from the ending of string with $replacement.
7410
     *
7411
     * @param string $str         <p>The input string.</p>
7412
     * @param string $search      <p>The string to search for.</p>
7413
     * @param string $replacement <p>The replacement.</p>
7414
     *
7415
     * @psalm-pure
7416
     *
7417
     * @return string
7418
     *                <p>A string after the replacements.</p>
7419
     */
7420 17
    public static function str_replace_ending(
7421
        string $str,
7422
        string $search,
7423
        string $replacement
7424
    ): string {
7425 17
        if ($str === '') {
7426 4
            if ($replacement === '') {
7427 2
                return '';
7428
            }
7429
7430 2
            if ($search === '') {
7431 2
                return $replacement;
7432
            }
7433
        }
7434
7435 13
        if ($search === '') {
7436 2
            return $str . $replacement;
7437
        }
7438
7439 11
        if (\strpos($str, $search, \strlen($str) - \strlen($search)) !== false) {
7440 8
            $str = \substr($str, 0, -\strlen($search)) . $replacement;
7441
        }
7442
7443 11
        return $str;
7444
    }
7445
7446
    /**
7447
     * Replace the first "$search"-term with the "$replace"-term.
7448
     *
7449
     * @param string $search
7450
     * @param string $replace
7451
     * @param string $subject
7452
     *
7453
     * @psalm-pure
7454
     *
7455
     * @return string
7456
     *
7457
     * @psalm-suppress InvalidReturnType
7458
     */
7459 2
    public static function str_replace_first(
7460
        string $search,
7461
        string $replace,
7462
        string $subject
7463
    ): string {
7464 2
        $pos = self::strpos($subject, $search);
7465
7466 2
        if ($pos !== false) {
7467
            /**
7468
             * @psalm-suppress InvalidReturnStatement
7469
             */
7470 2
            return self::substr_replace(
7471 2
                $subject,
7472 2
                $replace,
7473 2
                $pos,
7474 2
                (int) self::strlen($search)
7475
            );
7476
        }
7477
7478 2
        return $subject;
7479
    }
7480
7481
    /**
7482
     * Replace the last "$search"-term with the "$replace"-term.
7483
     *
7484
     * @param string $search
7485
     * @param string $replace
7486
     * @param string $subject
7487
     *
7488
     * @psalm-pure
7489
     *
7490
     * @return string
7491
     *
7492
     * @psalm-suppress InvalidReturnType
7493
     */
7494 2
    public static function str_replace_last(
7495
        string $search,
7496
        string $replace,
7497
        string $subject
7498
    ): string {
7499 2
        $pos = self::strrpos($subject, $search);
7500 2
        if ($pos !== false) {
7501
            /**
7502
             * @psalm-suppress InvalidReturnStatement
7503
             */
7504 2
            return self::substr_replace(
7505 2
                $subject,
7506 2
                $replace,
7507 2
                $pos,
7508 2
                (int) self::strlen($search)
7509
            );
7510
        }
7511
7512 2
        return $subject;
7513
    }
7514
7515
    /**
7516
     * Shuffles all the characters in the string.
7517
     *
7518
     * INFO: uses random algorithm which is weak for cryptography purposes
7519
     *
7520
     * EXAMPLE: <code>UTF8::str_shuffle('fòô bàř fòô'); // 'àòôřb ffòô '</code>
7521
     *
7522
     * @param string $str      <p>The input string</p>
7523
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
7524
     *
7525
     * @return string
7526
     *                <p>The shuffled string.</p>
7527
     */
7528 5
    public static function str_shuffle(string $str, string $encoding = 'UTF-8'): string
7529
    {
7530 5
        if ($encoding === 'UTF-8') {
7531 5
            $indexes = \range(0, (int) \mb_strlen($str) - 1);
7532 5
            \shuffle($indexes);
7533
7534
            // init
7535 5
            $shuffled_str = '';
7536
7537 5
            foreach ($indexes as &$i) {
7538 5
                $tmp_sub_str = \mb_substr($str, $i, 1);
7539 5
                if ($tmp_sub_str !== false) {
7540 5
                    $shuffled_str .= $tmp_sub_str;
7541
                }
7542
            }
7543
        } else {
7544
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
7545
7546
            $indexes = \range(0, (int) self::strlen($str, $encoding) - 1);
7547
            \shuffle($indexes);
7548
7549
            // init
7550
            $shuffled_str = '';
7551
7552
            foreach ($indexes as &$i) {
7553
                $tmp_sub_str = self::substr($str, $i, 1, $encoding);
7554
                if ($tmp_sub_str !== false) {
7555
                    $shuffled_str .= $tmp_sub_str;
7556
                }
7557
            }
7558
        }
7559
7560 5
        return $shuffled_str;
7561
    }
7562
7563
    /**
7564
     * Returns the substring beginning at $start, and up to, but not including
7565
     * the index specified by $end. If $end is omitted, the function extracts
7566
     * the remaining string. If $end is negative, it is computed from the end
7567
     * of the string.
7568
     *
7569
     * @param string   $str
7570
     * @param int      $start    <p>Initial index from which to begin extraction.</p>
7571
     * @param int|null $end      [optional] <p>Index at which to end extraction. Default: null</p>
7572
     * @param string   $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
7573
     *
7574
     * @psalm-pure
7575
     *
7576
     * @return false|string
7577
     *                      <p>The extracted substring.</p><p>If <i>str</i> is shorter than <i>start</i>
7578
     *                      characters long, <b>FALSE</b> will be returned.
7579
     */
7580 18
    public static function str_slice(
7581
        string $str,
7582
        int $start,
7583
        int $end = null,
7584
        string $encoding = 'UTF-8'
7585
    ) {
7586 18
        if ($encoding === 'UTF-8') {
7587 7
            if ($end === null) {
7588 1
                $length = (int) \mb_strlen($str);
7589 6
            } elseif ($end >= 0 && $end <= $start) {
7590 2
                return '';
7591 4
            } elseif ($end < 0) {
7592 1
                $length = (int) \mb_strlen($str) + $end - $start;
7593
            } else {
7594 3
                $length = $end - $start;
7595
            }
7596
7597 5
            return \mb_substr($str, $start, $length);
7598
        }
7599
7600 11
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
7601
7602 11
        if ($end === null) {
7603 5
            $length = (int) self::strlen($str, $encoding);
7604 6
        } elseif ($end >= 0 && $end <= $start) {
7605 2
            return '';
7606 4
        } elseif ($end < 0) {
7607 1
            $length = (int) self::strlen($str, $encoding) + $end - $start;
7608
        } else {
7609 3
            $length = $end - $start;
7610
        }
7611
7612 9
        return self::substr($str, $start, $length, $encoding);
7613
    }
7614
7615
    /**
7616
     * Convert a string to e.g.: "snake_case"
7617
     *
7618
     * @param string $str
7619
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
7620
     *
7621
     * @psalm-pure
7622
     *
7623
     * @return string
7624
     *                <p>A string in snake_case.</p>
7625
     */
7626 22
    public static function str_snakeize(string $str, string $encoding = 'UTF-8'): string
7627
    {
7628 22
        if ($str === '') {
7629
            return '';
7630
        }
7631
7632 22
        $str = \str_replace(
7633 22
            '-',
7634 22
            '_',
7635 22
            self::normalize_whitespace($str)
7636
        );
7637
7638 22
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
7639 19
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
7640
        }
7641
7642 22
        $str = (string) \preg_replace_callback(
7643 22
            '/([\\p{N}|\\p{Lu}])/u',
7644
            /**
7645
             * @param string[] $matches
7646
             *
7647
             * @psalm-pure
7648
             *
7649
             * @return string
7650
             */
7651
            static function (array $matches) use ($encoding): string {
7652 9
                $match = $matches[1];
7653 9
                $match_int = (int) $match;
7654
7655 9
                if ((string) $match_int === $match) {
7656 4
                    return '_' . $match . '_';
7657
                }
7658
7659 5
                if ($encoding === 'UTF-8') {
7660 5
                    return '_' . \mb_strtolower($match);
7661
                }
7662
7663
                return '_' . self::strtolower($match, $encoding);
7664 22
            },
7665 22
            $str
7666
        );
7667
7668 22
        $str = (string) \preg_replace(
7669
            [
7670 22
                '/\\s+/u',           // convert spaces to "_"
7671
                '/^\\s+|\\s+$/u', // trim leading & trailing spaces
7672
                '/_+/',                 // remove double "_"
7673
            ],
7674
            [
7675 22
                '_',
7676
                '',
7677
                '_',
7678
            ],
7679 22
            $str
7680
        );
7681
7682 22
        return \trim(\trim($str, '_')); // trim leading & trailing "_" + whitespace
7683
    }
7684
7685
    /**
7686
     * Sort all characters according to code points.
7687
     *
7688
     * EXAMPLE: <code>UTF8::str_sort('  -ABC-中文空白-  '); // '    ---ABC中文白空'</code>
7689
     *
7690
     * @param string $str    <p>A UTF-8 string.</p>
7691
     * @param bool   $unique <p>Sort unique. If <strong>true</strong>, repeated characters are ignored.</p>
7692
     * @param bool   $desc   <p>If <strong>true</strong>, will sort characters in reverse code point order.</p>
7693
     *
7694
     * @psalm-pure
7695
     *
7696
     * @return string
7697
     *                <p>A string of sorted characters.</p>
7698
     */
7699 2
    public static function str_sort(string $str, bool $unique = false, bool $desc = false): string
7700
    {
7701
        /** @var int[] $array */
7702 2
        $array = self::codepoints($str);
7703
7704 2
        if ($unique) {
7705 2
            $array = \array_flip(\array_flip($array));
7706
        }
7707
7708 2
        if ($desc) {
7709 2
            \arsort($array);
7710
        } else {
7711 2
            \asort($array);
7712
        }
7713
7714 2
        return self::string($array);
7715
    }
7716
7717
    /**
7718
     * Convert a string to an array of Unicode characters.
7719
     *
7720
     * EXAMPLE: <code>
7721
     * UTF8::str_split_array(['中文空白', 'test'], 2); // [['中文', '空白'], ['te', 'st']]
7722
     * </code>
7723
     *
7724
     * @param int[]|string[] $input                   <p>The string[] or int[] to split into array.</p>
7725
     * @param int            $length                  [optional] <p>Max character length of each array
7726
     *                                                lement.</p>
7727
     * @param bool           $clean_utf8              [optional] <p>Remove non UTF-8 chars from the
7728
     *                                                string.</p>
7729
     * @param bool           $try_to_use_mb_functions [optional] <p>Set to false, if you don't want to use
7730
     *                                                "mb_substr"</p>
7731
     *
7732
     * @psalm-pure
7733
     *
7734
     * @return string[][]
7735
     *                    <p>An array containing chunks of the input.</p>
7736
     */
7737 1
    public static function str_split_array(
7738
        array $input,
7739
        int $length = 1,
7740
        bool $clean_utf8 = false,
7741
        bool $try_to_use_mb_functions = true
7742
    ): array {
7743 1
        foreach ($input as &$v) {
7744 1
            $v = self::str_split(
7745 1
                $v,
7746 1
                $length,
7747 1
                $clean_utf8,
7748 1
                $try_to_use_mb_functions
7749
            );
7750
        }
7751
7752
        /** @var string[][] $input */
7753 1
        return $input;
7754
    }
7755
7756
    /**
7757
     * Convert a string to an array of unicode characters.
7758
     *
7759
     * EXAMPLE: <code>UTF8::str_split('中文空白'); // array('中', '文', '空', '白')</code>
7760
     *
7761
     * @param int|string $input                   <p>The string or int to split into array.</p>
7762
     * @param int        $length                  [optional] <p>Max character length of each array
7763
     *                                            element.</p>
7764
     * @param bool       $clean_utf8              [optional] <p>Remove non UTF-8 chars from the
7765
     *                                            string.</p>
7766
     * @param bool       $try_to_use_mb_functions [optional] <p>Set to false, if you don't want to use
7767
     *                                            "mb_substr"</p>
7768
     *
7769
     * @psalm-pure
7770
     *
7771
     * @return string[]
7772
     *                  <p>An array containing chunks of chars from the input.</p>
7773
     */
7774 90
    public static function str_split(
7775
        $input,
7776
        int $length = 1,
7777
        bool $clean_utf8 = false,
7778
        bool $try_to_use_mb_functions = true
7779
    ): array {
7780 90
        if ($length <= 0) {
7781 3
            return [];
7782
        }
7783
7784
        // this is only an old fallback
7785
        /** @noinspection PhpSillyAssignmentInspection - hack for phpstan */
7786
        /** @var int|int[]|string|string[] $input */
7787 89
        $input = $input;
7788 89
        if (\is_array($input)) {
7789
            /** @psalm-suppress InvalidReturnStatement */
7790
            /** @phpstan-ignore-next-line - old code :/ */
7791
            return self::str_split_array(
0 ignored issues
show
Bug Best Practice introduced by
The expression return self::str_split_a...ry_to_use_mb_functions) returns the type array<mixed,string[]> which is incompatible with the documented return type string[].
Loading history...
7792
                $input,
7793
                $length,
7794
                $clean_utf8,
7795
                $try_to_use_mb_functions
7796
            );
7797
        }
7798
7799
        // init
7800 89
        $input = (string) $input;
7801
7802 89
        if ($input === '') {
7803 14
            return [];
7804
        }
7805
7806 86
        if ($clean_utf8) {
7807 19
            $input = self::clean($input);
7808
        }
7809
7810
        if (
7811 86
            $try_to_use_mb_functions
7812
            &&
7813 86
            self::$SUPPORT['mbstring'] === true
7814
        ) {
7815 82
            if (\function_exists('mb_str_split')) {
7816
                /**
7817
                 * @psalm-suppress ImpureFunctionCall - why?
7818
                 */
7819 82
                $return = \mb_str_split($input, $length);
7820 82
                if ($return !== false) {
7821 82
                    return $return;
7822
                }
7823
            }
7824
7825
            $i_max = \mb_strlen($input);
7826
            if ($i_max <= 127) {
7827
                $ret = [];
7828
                for ($i = 0; $i < $i_max; ++$i) {
7829
                    $ret[] = \mb_substr($input, $i, 1);
7830
                }
7831
            } else {
7832
                $return_array = [];
7833
                \preg_match_all('/./us', $input, $return_array);
7834
                $ret = $return_array[0] ?? [];
7835
            }
7836 23
        } elseif (self::$SUPPORT['pcre_utf8'] === true) {
7837 17
            $return_array = [];
7838 17
            \preg_match_all('/./us', $input, $return_array);
7839 17
            $ret = $return_array[0] ?? [];
7840
        } else {
7841
7842
            // fallback
7843
7844 8
            $ret = [];
7845 8
            $len = \strlen($input);
7846
7847 8
            for ($i = 0; $i < $len; ++$i) {
7848 8
                if (($input[$i] & "\x80") === "\x00") {
7849 8
                    $ret[] = $input[$i];
7850
                } elseif (
7851 8
                    isset($input[$i + 1])
7852
                    &&
7853 8
                    ($input[$i] & "\xE0") === "\xC0"
7854
                ) {
7855 4
                    if (($input[$i + 1] & "\xC0") === "\x80") {
7856 4
                        $ret[] = $input[$i] . $input[$i + 1];
7857
7858 4
                        ++$i;
7859
                    }
7860
                } elseif (
7861 6
                    isset($input[$i + 2])
7862
                    &&
7863 6
                    ($input[$i] & "\xF0") === "\xE0"
7864
                ) {
7865
                    if (
7866 6
                        ($input[$i + 1] & "\xC0") === "\x80"
7867
                        &&
7868 6
                        ($input[$i + 2] & "\xC0") === "\x80"
7869
                    ) {
7870 6
                        $ret[] = $input[$i] . $input[$i + 1] . $input[$i + 2];
7871
7872 6
                        $i += 2;
7873
                    }
7874
                } elseif (
7875
                    isset($input[$i + 3])
7876
                    &&
7877
                    ($input[$i] & "\xF8") === "\xF0"
7878
                ) {
7879
                    if (
7880
                        ($input[$i + 1] & "\xC0") === "\x80"
7881
                        &&
7882
                        ($input[$i + 2] & "\xC0") === "\x80"
7883
                        &&
7884
                        ($input[$i + 3] & "\xC0") === "\x80"
7885
                    ) {
7886
                        $ret[] = $input[$i] . $input[$i + 1] . $input[$i + 2] . $input[$i + 3];
7887
7888
                        $i += 3;
7889
                    }
7890
                }
7891
            }
7892
        }
7893
7894 23
        if ($length > 1) {
7895 2
            return \array_map(
7896
                static function (array $item): string {
7897 2
                    return \implode('', $item);
7898 2
                },
7899 2
                \array_chunk($ret, $length)
7900
            );
7901
        }
7902
7903 23
        if (isset($ret[0]) && $ret[0] === '') {
7904
            return [];
7905
        }
7906
7907 23
        return $ret;
7908
    }
7909
7910
    /**
7911
     * Splits the string with the provided regular expression, returning an
7912
     * array of strings. An optional integer $limit will truncate the
7913
     * results.
7914
     *
7915
     * @param string $str
7916
     * @param string $pattern <p>The regex with which to split the string.</p>
7917
     * @param int    $limit   [optional] <p>Maximum number of results to return. Default: -1 === no limit</p>
7918
     *
7919
     * @psalm-pure
7920
     *
7921
     * @return string[]
7922
     *                  <p>An array of strings.</p>
7923
     */
7924 16
    public static function str_split_pattern(string $str, string $pattern, int $limit = -1): array
7925
    {
7926 16
        if ($limit === 0) {
7927 2
            return [];
7928
        }
7929
7930 14
        if ($pattern === '') {
7931 1
            return [$str];
7932
        }
7933
7934 13
        if (self::$SUPPORT['mbstring'] === true) {
7935 13
            if ($limit >= 0) {
7936 8
                $result_tmp = \mb_split($pattern, $str);
7937 8
                if ($result_tmp === false) {
7938
                    return [];
7939
                }
7940
7941 8
                $result = [];
7942 8
                foreach ($result_tmp as $item_tmp) {
7943 8
                    if ($limit === 0) {
7944 4
                        break;
7945
                    }
7946 8
                    --$limit;
7947
7948 8
                    $result[] = $item_tmp;
7949
                }
7950
7951 8
                return $result;
7952
            }
7953
7954 5
            $result = \mb_split($pattern, $str);
7955 5
            if ($result === false) {
7956
                return [];
7957
            }
7958
7959 5
            return $result;
7960
        }
7961
7962
        if ($limit > 0) {
7963
            ++$limit;
7964
        } else {
7965
            $limit = -1;
7966
        }
7967
7968
        $array = \preg_split('/' . \preg_quote($pattern, '/') . '/u', $str, $limit);
7969
        if ($array === false) {
7970
            return [];
7971
        }
7972
7973
        if ($limit > 0 && \count($array) === $limit) {
7974
            \array_pop($array);
7975
        }
7976
7977
        return $array;
7978
    }
7979
7980
    /**
7981
     * Check if the string starts with the given substring.
7982
     *
7983
     * EXAMPLE: <code>
7984
     * UTF8::str_starts_with('ΚόσμεMiddleEnd', 'Κόσμε'); // true
7985
     * UTF8::str_starts_with('ΚόσμεMiddleEnd', 'κόσμε'); // false
7986
     * </code>
7987
     *
7988
     * @param string $haystack <p>The string to search in.</p>
7989
     * @param string $needle   <p>The substring to search for.</p>
7990
     *
7991
     * @psalm-pure
7992
     *
7993
     * @return bool
7994
     */
7995 19
    public static function str_starts_with(string $haystack, string $needle): bool
7996
    {
7997 19
        if ($needle === '') {
7998 2
            return true;
7999
        }
8000
8001 19
        if ($haystack === '') {
8002
            return false;
8003
        }
8004
8005 19
        if (\PHP_VERSION_ID >= 80000) {
8006
            /** @phpstan-ignore-next-line - only for PHP8 */
8007
            return \str_starts_with($haystack, $needle);
8008
        }
8009
8010 19
        return \strncmp($haystack, $needle, \strlen($needle)) === 0;
8011
    }
8012
8013
    /**
8014
     * Returns true if the string begins with any of $substrings, false otherwise.
8015
     *
8016
     * - case-sensitive
8017
     *
8018
     * @param string $str        <p>The input string.</p>
8019
     * @param array  $substrings <p>Substrings to look for.</p>
8020
     *
8021
     * @psalm-pure
8022
     *
8023
     * @return bool
8024
     *              <p>Whether or not $str starts with $substring.</p>
8025
     */
8026 8
    public static function str_starts_with_any(string $str, array $substrings): bool
8027
    {
8028 8
        if ($str === '') {
8029
            return false;
8030
        }
8031
8032 8
        if ($substrings === []) {
8033
            return false;
8034
        }
8035
8036 8
        foreach ($substrings as &$substring) {
8037 8
            if (self::str_starts_with($str, $substring)) {
8038 8
                return true;
8039
            }
8040
        }
8041
8042 6
        return false;
8043
    }
8044
8045
    /**
8046
     * Gets the substring after the first occurrence of a separator.
8047
     *
8048
     * @param string $str       <p>The input string.</p>
8049
     * @param string $separator <p>The string separator.</p>
8050
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
8051
     *
8052
     * @psalm-pure
8053
     *
8054
     * @return string
8055
     */
8056 1
    public static function str_substr_after_first_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
8057
    {
8058 1
        if ($separator === '' || $str === '') {
8059 1
            return '';
8060
        }
8061
8062 1
        if ($encoding === 'UTF-8') {
8063 1
            $offset = \mb_strpos($str, $separator);
8064 1
            if ($offset === false) {
8065 1
                return '';
8066
            }
8067
8068 1
            return (string) \mb_substr(
8069 1
                $str,
8070 1
                $offset + (int) \mb_strlen($separator)
8071
            );
8072
        }
8073
8074
        $offset = self::strpos($str, $separator, 0, $encoding);
8075
        if ($offset === false) {
8076
            return '';
8077
        }
8078
8079
        return (string) \mb_substr(
8080
            $str,
8081
            $offset + (int) self::strlen($separator, $encoding),
8082
            null,
8083
            $encoding
8084
        );
8085
    }
8086
8087
    /**
8088
     * Gets the substring after the last occurrence of a separator.
8089
     *
8090
     * @param string $str       <p>The input string.</p>
8091
     * @param string $separator <p>The string separator.</p>
8092
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
8093
     *
8094
     * @psalm-pure
8095
     *
8096
     * @return string
8097
     */
8098 1
    public static function str_substr_after_last_separator(
8099
        string $str,
8100
        string $separator,
8101
        string $encoding = 'UTF-8'
8102
    ): string {
8103 1
        if ($separator === '' || $str === '') {
8104 1
            return '';
8105
        }
8106
8107 1
        if ($encoding === 'UTF-8') {
8108 1
            $offset = \mb_strrpos($str, $separator);
8109 1
            if ($offset === false) {
8110 1
                return '';
8111
            }
8112
8113 1
            return (string) \mb_substr(
8114 1
                $str,
8115 1
                $offset + (int) \mb_strlen($separator)
8116
            );
8117
        }
8118
8119
        $offset = self::strrpos($str, $separator, 0, $encoding);
8120
        if ($offset === false) {
8121
            return '';
8122
        }
8123
8124
        return (string) self::substr(
8125
            $str,
8126
            $offset + (int) self::strlen($separator, $encoding),
8127
            null,
8128
            $encoding
8129
        );
8130
    }
8131
8132
    /**
8133
     * Gets the substring before the first occurrence of a separator.
8134
     *
8135
     * @param string $str       <p>The input string.</p>
8136
     * @param string $separator <p>The string separator.</p>
8137
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
8138
     *
8139
     * @psalm-pure
8140
     *
8141
     * @return string
8142
     */
8143 1
    public static function str_substr_before_first_separator(
8144
        string $str,
8145
        string $separator,
8146
        string $encoding = 'UTF-8'
8147
    ): string {
8148 1
        if ($separator === '' || $str === '') {
8149 1
            return '';
8150
        }
8151
8152 1
        if ($encoding === 'UTF-8') {
8153 1
            $offset = \mb_strpos($str, $separator);
8154 1
            if ($offset === false) {
8155 1
                return '';
8156
            }
8157
8158 1
            return (string) \mb_substr(
8159 1
                $str,
8160 1
                0,
8161 1
                $offset
8162
            );
8163
        }
8164
8165
        $offset = self::strpos($str, $separator, 0, $encoding);
8166
        if ($offset === false) {
8167
            return '';
8168
        }
8169
8170
        return (string) self::substr(
8171
            $str,
8172
            0,
8173
            $offset,
8174
            $encoding
8175
        );
8176
    }
8177
8178
    /**
8179
     * Gets the substring before the last occurrence of a separator.
8180
     *
8181
     * @param string $str       <p>The input string.</p>
8182
     * @param string $separator <p>The string separator.</p>
8183
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
8184
     *
8185
     * @psalm-pure
8186
     *
8187
     * @return string
8188
     */
8189 1
    public static function str_substr_before_last_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
8190
    {
8191 1
        if ($separator === '' || $str === '') {
8192 1
            return '';
8193
        }
8194
8195 1
        if ($encoding === 'UTF-8') {
8196 1
            $offset = \mb_strrpos($str, $separator);
8197 1
            if ($offset === false) {
8198 1
                return '';
8199
            }
8200
8201 1
            return (string) \mb_substr(
8202 1
                $str,
8203 1
                0,
8204 1
                $offset
8205
            );
8206
        }
8207
8208
        $offset = self::strrpos($str, $separator, 0, $encoding);
8209
        if ($offset === false) {
8210
            return '';
8211
        }
8212
8213
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
8214
8215
        return (string) self::substr(
8216
            $str,
8217
            0,
8218
            $offset,
8219
            $encoding
8220
        );
8221
    }
8222
8223
    /**
8224
     * Gets the substring after (or before via "$before_needle") the first occurrence of the "$needle".
8225
     *
8226
     * @param string $str           <p>The input string.</p>
8227
     * @param string $needle        <p>The string to look for.</p>
8228
     * @param bool   $before_needle [optional] <p>Default: false</p>
8229
     * @param string $encoding      [optional] <p>Default: 'UTF-8'</p>
8230
     *
8231
     * @psalm-pure
8232
     *
8233
     * @return string
8234
     */
8235 2
    public static function str_substr_first(
8236
        string $str,
8237
        string $needle,
8238
        bool $before_needle = false,
8239
        string $encoding = 'UTF-8'
8240
    ): string {
8241 2
        if ($str === '' || $needle === '') {
8242 2
            return '';
8243
        }
8244
8245 2
        if ($encoding === 'UTF-8') {
8246 2
            if ($before_needle) {
8247 1
                $part = \mb_strstr(
8248 1
                    $str,
8249 1
                    $needle,
8250 1
                    $before_needle
8251
                );
8252
            } else {
8253 1
                $part = \mb_strstr(
8254 1
                    $str,
8255 2
                    $needle
8256
                );
8257
            }
8258
        } else {
8259
            $part = self::strstr(
8260
                $str,
8261
                $needle,
8262
                $before_needle,
8263
                $encoding
8264
            );
8265
        }
8266
8267 2
        return $part === false ? '' : $part;
8268
    }
8269
8270
    /**
8271
     * Gets the substring after (or before via "$before_needle") the last occurrence of the "$needle".
8272
     *
8273
     * @param string $str           <p>The input string.</p>
8274
     * @param string $needle        <p>The string to look for.</p>
8275
     * @param bool   $before_needle [optional] <p>Default: false</p>
8276
     * @param string $encoding      [optional] <p>Default: 'UTF-8'</p>
8277
     *
8278
     * @psalm-pure
8279
     *
8280
     * @return string
8281
     */
8282 2
    public static function str_substr_last(
8283
        string $str,
8284
        string $needle,
8285
        bool $before_needle = false,
8286
        string $encoding = 'UTF-8'
8287
    ): string {
8288 2
        if ($str === '' || $needle === '') {
8289 2
            return '';
8290
        }
8291
8292 2
        if ($encoding === 'UTF-8') {
8293 2
            if ($before_needle) {
8294 1
                $part = \mb_strrchr(
8295 1
                    $str,
8296 1
                    $needle,
8297 1
                    $before_needle
8298
                );
8299
            } else {
8300 1
                $part = \mb_strrchr(
8301 1
                    $str,
8302 2
                    $needle
8303
                );
8304
            }
8305
        } else {
8306
            $part = self::strrchr(
8307
                $str,
8308
                $needle,
8309
                $before_needle,
8310
                $encoding
8311
            );
8312
        }
8313
8314 2
        return $part === false ? '' : $part;
8315
    }
8316
8317
    /**
8318
     * Surrounds $str with the given substring.
8319
     *
8320
     * @param string $str
8321
     * @param string $substring <p>The substring to add to both sides.</p>
8322
     *
8323
     * @psalm-pure
8324
     *
8325
     * @return string
8326
     *                <p>A string with the substring both prepended and appended.</p>
8327
     */
8328 5
    public static function str_surround(string $str, string $substring): string
8329
    {
8330 5
        return $substring . $str . $substring;
8331
    }
8332
8333
    /**
8334
     * Returns a trimmed string with the first letter of each word capitalized.
8335
     * Also accepts an array, $ignore, allowing you to list words not to be
8336
     * capitalized.
8337
     *
8338
     * @param string              $str
8339
     * @param array|string[]|null $ignore                        [optional] <p>An array of words not to capitalize or
8340
     *                                                           null. Default: null</p>
8341
     * @param string              $encoding                      [optional] <p>Default: 'UTF-8'</p>
8342
     * @param bool                $clean_utf8                    [optional] <p>Remove non UTF-8 chars from the
8343
     *                                                           string.</p>
8344
     * @param string|null         $lang                          [optional] <p>Set the language for special cases: az,
8345
     *                                                           el, lt, tr</p>
8346
     * @param bool                $try_to_keep_the_string_length [optional] <p>true === try to keep the string length:
8347
     *                                                           e.g. ẞ -> ß</p>
8348
     * @param bool                $use_trim_first                [optional] <p>true === trim the input string,
8349
     *                                                           first</p>
8350
     * @param string|null         $word_define_chars             [optional] <p>An string of chars that will be used as
8351
     *                                                           whitespace separator === words.</p>
8352
     *
8353
     * @psalm-pure
8354
     *
8355
     * @return string
8356
     *                <p>The titleized string.</p>
8357
     */
8358 10
    public static function str_titleize(
8359
        string $str,
8360
        array $ignore = null,
8361
        string $encoding = 'UTF-8',
8362
        bool $clean_utf8 = false,
8363
        string $lang = null,
8364
        bool $try_to_keep_the_string_length = false,
8365
        bool $use_trim_first = true,
8366
        string $word_define_chars = null
8367
    ): string {
8368 10
        if ($str === '') {
8369
            return '';
8370
        }
8371
8372 10
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
8373 9
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
8374
        }
8375
8376 10
        if ($use_trim_first) {
8377 10
            $str = \trim($str);
8378
        }
8379
8380 10
        if ($clean_utf8) {
8381
            $str = self::clean($str);
8382
        }
8383
8384 10
        $use_mb_functions = $lang === null && !$try_to_keep_the_string_length;
8385
8386 10
        if ($word_define_chars) {
8387 4
            $word_define_chars = \preg_quote($word_define_chars, '/');
8388
        } else {
8389 6
            $word_define_chars = '';
8390
        }
8391
8392 10
        $str = (string) \preg_replace_callback(
8393 10
            '/([^\\s' . $word_define_chars . ']+)/u',
8394
            static function (array $match) use ($try_to_keep_the_string_length, $lang, $ignore, $use_mb_functions, $encoding): string {
8395 10
                if ($ignore !== null && \in_array($match[0], $ignore, true)) {
8396 4
                    return $match[0];
8397
                }
8398
8399 10
                if ($use_mb_functions) {
8400 10
                    if ($encoding === 'UTF-8') {
8401 10
                        return \mb_strtoupper(\mb_substr($match[0], 0, 1))
8402 10
                               . \mb_strtolower(\mb_substr($match[0], 1));
8403
                    }
8404
8405
                    return \mb_strtoupper(\mb_substr($match[0], 0, 1, $encoding), $encoding)
8406
                           . \mb_strtolower(\mb_substr($match[0], 1, null, $encoding), $encoding);
8407
                }
8408
8409
                return self::ucfirst(
8410
                    self::strtolower(
8411
                        $match[0],
8412
                        $encoding,
8413
                        false,
8414
                        $lang,
8415
                        $try_to_keep_the_string_length
8416
                    ),
8417
                    $encoding,
8418
                    false,
8419
                    $lang,
8420
                    $try_to_keep_the_string_length
8421
                );
8422 10
            },
8423 10
            $str
8424
        );
8425
8426 10
        return $str;
8427
    }
8428
8429
    /**
8430
     * Convert a string into a obfuscate string.
8431
     *
8432
     * EXAMPLE: <code>
8433
     *
8434
     * UTF8::str_obfuscate('[email protected]', 0.5, '*', ['@', '.']); // e.g. "l***@m**lleke*.*r*"
8435
     * </code>
8436
     *
8437
     * @param string   $str
8438
     * @param float    $percent
8439
     * @param string   $obfuscateChar
8440
     * @param string[] $keepChars
8441
     *
8442
     * @psalm-pure
8443
     *
8444
     * @return string
8445
     *                <p>The obfuscate string.</p>
8446
     */
8447 1
    public static function str_obfuscate(
8448
        string $str,
8449
        float $percent = 0.5,
8450
        string $obfuscateChar = '*',
8451
        array $keepChars = []
8452
    ): string {
8453 1
        $obfuscateCharHelper = "\u{2603}";
8454 1
        $str = \str_replace($obfuscateChar, $obfuscateCharHelper, $str);
8455
8456 1
        $chars = self::chars($str);
8457 1
        $charsMax = \count($chars);
8458 1
        $charsMaxChange = \round($charsMax * $percent);
8459 1
        $charsCounter = 0;
8460 1
        $charKeyDone = [];
8461
8462 1
        while ($charsCounter < $charsMaxChange) {
8463 1
            foreach ($chars as $charKey => $char) {
8464 1
                if (isset($charKeyDone[$charKey])) {
8465 1
                    continue;
8466
                }
8467
8468 1
                if (\random_int(0, 100) > 50) {
8469 1
                    continue;
8470
                }
8471
8472 1
                if ($char === $obfuscateChar) {
8473
                    continue;
8474
                }
8475
8476 1
                ++$charsCounter;
8477 1
                $charKeyDone[$charKey] = true;
8478
8479 1
                if ($charsCounter > $charsMaxChange) {
8480
                    break;
8481
                }
8482
8483 1
                if (\in_array($char, $keepChars, true)) {
8484 1
                    continue;
8485
                }
8486
8487 1
                $chars[$charKey] = $obfuscateChar;
8488
            }
8489
        }
8490
8491 1
        $str = \implode('', $chars);
8492
8493 1
        return \str_replace($obfuscateCharHelper, $obfuscateChar, $str);
8494
    }
8495
8496
    /**
8497
     * Returns a trimmed string in proper title case.
8498
     *
8499
     * Also accepts an array, $ignore, allowing you to list words not to be
8500
     * capitalized.
8501
     *
8502
     * Adapted from John Gruber's script.
8503
     *
8504
     * @see https://gist.github.com/gruber/9f9e8650d68b13ce4d78
8505
     *
8506
     * @param string $str
8507
     * @param array  $ignore   <p>An array of words not to capitalize.</p>
8508
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
8509
     *
8510
     * @psalm-pure
8511
     *
8512
     * @return string
8513
     *                <p>The titleized string.</p>
8514
     */
8515 35
    public static function str_titleize_for_humans(
8516
        string $str,
8517
        array $ignore = [],
8518
        string $encoding = 'UTF-8'
8519
    ): string {
8520 35
        if ($str === '') {
8521
            return '';
8522
        }
8523
8524
        $small_words = [
8525 35
            '(?<!q&)a',
8526
            'an',
8527
            'and',
8528
            'as',
8529
            'at(?!&t)',
8530
            'but',
8531
            'by',
8532
            'en',
8533
            'for',
8534
            'if',
8535
            'in',
8536
            'of',
8537
            'on',
8538
            'or',
8539
            'the',
8540
            'to',
8541
            'v[.]?',
8542
            'via',
8543
            'vs[.]?',
8544
        ];
8545
8546 35
        if ($ignore !== []) {
8547 1
            $small_words = \array_merge($small_words, $ignore);
8548
        }
8549
8550 35
        $small_words_rx = \implode('|', $small_words);
8551 35
        $apostrophe_rx = '(?x: [\'’] [[:lower:]]* )?';
8552
8553 35
        $str = \trim($str);
8554
8555 35
        if (!self::has_lowercase($str)) {
8556 2
            $str = self::strtolower($str, $encoding);
8557
        }
8558
8559
        // the main substitutions
8560 35
        $str = (string) \preg_replace_callback(
8561
            '~\\b (_*) (?:                                                           # 1. Leading underscore and
8562
                        ( (?<=[ ][/\\\\]) [[:alpha:]]+ [-_[:alpha:]/\\\\]+ |                # 2. file path or 
8563 35
                          [-_[:alpha:]]+ [@.:] [-_[:alpha:]@.:/]+ ' . $apostrophe_rx . ' )  #    URL, domain, or email
8564
                        |
8565 35
                        ( (?i: ' . $small_words_rx . ' ) ' . $apostrophe_rx . ' )           # 3. or small word (case-insensitive)
8566
                        |
8567 35
                        ( [[:alpha:]] [[:lower:]\'’()\[\]{}]* ' . $apostrophe_rx . ' )     # 4. or word w/o internal caps
8568
                        |
8569 35
                        ( [[:alpha:]] [[:alpha:]\'’()\[\]{}]* ' . $apostrophe_rx . ' )     # 5. or some other word
8570
                      ) (_*) \\b                                                          # 6. With trailing underscore
8571
                    ~ux',
8572
            /**
8573
             * @param string[] $matches
8574
             *
8575
             * @psalm-pure
8576
             *
8577
             * @return string
8578
             */
8579
            static function (array $matches) use ($encoding): string {
8580
                // preserve leading underscore
8581 35
                $str = $matches[1];
8582 35
                if ($matches[2]) {
8583
                    // preserve URLs, domains, emails and file paths
8584 5
                    $str .= $matches[2];
8585 35
                } elseif ($matches[3]) {
8586
                    // lower-case small words
8587 25
                    $str .= self::strtolower($matches[3], $encoding);
8588 35
                } elseif ($matches[4]) {
8589
                    // capitalize word w/o internal caps
8590 34
                    $str .= static::ucfirst($matches[4], $encoding);
8591
                } else {
8592
                    // preserve other kinds of word (iPhone)
8593 7
                    $str .= $matches[5];
8594
                }
8595
                // preserve trailing underscore
8596 35
                $str .= $matches[6];
8597
8598 35
                return $str;
8599 35
            },
8600 35
            $str
8601
        );
8602
8603
        // Exceptions for small words: capitalize at start of title...
8604 35
        $str = (string) \preg_replace_callback(
8605
            '~(  \\A [[:punct:]]*            # start of title...
8606
                      |  [:.;?!][ ]+                # or of subsentence...
8607
                      |  [ ][\'"“‘(\[][ ]* )        # or of inserted subphrase...
8608 35
                      ( ' . $small_words_rx . ' ) \\b # ...followed by small word
8609
                     ~uxi',
8610
            /**
8611
             * @param string[] $matches
8612
             *
8613
             * @psalm-pure
8614
             *
8615
             * @return string
8616
             */
8617
            static function (array $matches) use ($encoding): string {
8618 11
                return $matches[1] . static::ucfirst($matches[2], $encoding);
8619 35
            },
8620 35
            $str
8621
        );
8622
8623
        // ...and end of title
8624 35
        $str = (string) \preg_replace_callback(
8625 35
            '~\\b ( ' . $small_words_rx . ' ) # small word...
8626
                      (?= [[:punct:]]* \Z          # ...at the end of the title...
8627
                      |   [\'"’”)\]] [ ] )         # ...or of an inserted subphrase?
8628
                     ~uxi',
8629
            /**
8630
             * @param string[] $matches
8631
             *
8632
             * @psalm-pure
8633
             *
8634
             * @return string
8635
             */
8636
            static function (array $matches) use ($encoding): string {
8637 3
                return static::ucfirst($matches[1], $encoding);
8638 35
            },
8639 35
            $str
8640
        );
8641
8642
        // Exceptions for small words in hyphenated compound words.
8643
        // e.g. "in-flight" -> In-Flight
8644 35
        $str = (string) \preg_replace_callback(
8645
            '~\\b
8646
                        (?<! -)                   # Negative lookbehind for a hyphen; we do not want to match man-in-the-middle but do want (in-flight)
8647 35
                        ( ' . $small_words_rx . ' )
8648
                        (?= -[[:alpha:]]+)        # lookahead for "-someword"
8649
                       ~uxi',
8650
            /**
8651
             * @param string[] $matches
8652
             *
8653
             * @psalm-pure
8654
             *
8655
             * @return string
8656
             */
8657
            static function (array $matches) use ($encoding): string {
8658
                return static::ucfirst($matches[1], $encoding);
8659 35
            },
8660 35
            $str
8661
        );
8662
8663
        // e.g. "Stand-in" -> "Stand-In" (Stand is already capped at this point)
8664 35
        $str = (string) \preg_replace_callback(
8665
            '~\\b
8666
                      (?<!…)                    # Negative lookbehind for a hyphen; we do not want to match man-in-the-middle but do want (stand-in)
8667
                      ( [[:alpha:]]+- )         # $1 = first word and hyphen, should already be properly capped
8668 35
                      ( ' . $small_words_rx . ' ) # ...followed by small word
8669
                      (?!	- )                 # Negative lookahead for another -
8670
                     ~uxi',
8671
            /**
8672
             * @param string[] $matches
8673
             *
8674
             * @psalm-pure
8675
             *
8676
             * @return string
8677
             */
8678
            static function (array $matches) use ($encoding): string {
8679
                return $matches[1] . static::ucfirst($matches[2], $encoding);
8680 35
            },
8681 35
            $str
8682
        );
8683
8684 35
        return $str;
8685
    }
8686
8687
    /**
8688
     * Get a binary representation of a specific string.
8689
     *
8690
     * EXAPLE: <code>UTF8::str_to_binary('😃'); // '11110000100111111001100010000011'</code>
8691
     *
8692
     * @param string $str <p>The input string.</p>
8693
     *
8694
     * @psalm-pure
8695
     *
8696
     * @return false|string
8697
     *                      <p>false on error</p>
8698
     */
8699 2
    public static function str_to_binary(string $str)
8700
    {
8701
        /** @var array|false $value - needed for PhpStan (stubs error) */
8702 2
        $value = \unpack('H*', $str);
8703 2
        if ($value === false) {
8704
            return false;
8705
        }
8706
8707
        /** @noinspection OffsetOperationsInspection */
8708 2
        return \base_convert($value[1], 16, 2);
8709
    }
8710
8711
    /**
8712
     * @param string   $str
8713
     * @param bool     $remove_empty_values <p>Remove empty values.</p>
8714
     * @param int|null $remove_short_values <p>The min. string length or null to disable</p>
8715
     *
8716
     * @psalm-pure
8717
     *
8718
     * @return string[]
8719
     */
8720 17
    public static function str_to_lines(string $str, bool $remove_empty_values = false, int $remove_short_values = null): array
8721
    {
8722 17
        if ($str === '') {
8723 1
            return $remove_empty_values ? [] : [''];
8724
        }
8725
8726 16
        if (self::$SUPPORT['mbstring'] === true) {
8727 16
            $return = \mb_split("[\r\n]{1,2}", $str);
8728
        } else {
8729
            $return = \preg_split("/[\r\n]{1,2}/u", $str);
8730
        }
8731
8732 16
        if ($return === false) {
8733
            return $remove_empty_values ? [] : [''];
8734
        }
8735
8736
        if (
8737 16
            $remove_short_values === null
8738
            &&
8739 16
            !$remove_empty_values
8740
        ) {
8741 16
            return $return;
8742
        }
8743
8744
        return self::reduce_string_array(
8745
            $return,
8746
            $remove_empty_values,
8747
            $remove_short_values
8748
        );
8749
    }
8750
8751
    /**
8752
     * Convert a string into an array of words.
8753
     *
8754
     * EXAMPLE: <code>UTF8::str_to_words('中文空白 oöäü#s', '#') // array('', '中文空白', ' ', 'oöäü#s', '')</code>
8755
     *
8756
     * @param string   $str
8757
     * @param string   $char_list           <p>Additional chars for the definition of "words".</p>
8758
     * @param bool     $remove_empty_values <p>Remove empty values.</p>
8759
     * @param int|null $remove_short_values <p>The min. string length or null to disable</p>
8760
     *
8761
     * @psalm-pure
8762
     *
8763
     * @return string[]
8764
     */
8765 16
    public static function str_to_words(
8766
        string $str,
8767
        string $char_list = '',
8768
        bool $remove_empty_values = false,
8769
        int $remove_short_values = null
8770
    ): array {
8771 16
        if ($str === '') {
8772 4
            return $remove_empty_values ? [] : [''];
8773
        }
8774
8775 16
        $char_list = self::rxClass($char_list, '\pL');
8776
8777 16
        $return = \preg_split("/({$char_list}+(?:[\p{Pd}’']{$char_list}+)*)/u", $str, -1, \PREG_SPLIT_DELIM_CAPTURE);
8778 16
        if ($return === false) {
8779
            return $remove_empty_values ? [] : [''];
8780
        }
8781
8782
        if (
8783 16
            $remove_short_values === null
8784
            &&
8785 16
            !$remove_empty_values
8786
        ) {
8787 16
            return $return;
0 ignored issues
show
Bug Best Practice introduced by
The expression return $return returns an array which contains values of type array which are incompatible with the documented value type string.
Loading history...
8788
        }
8789
8790 2
        $tmp_return = self::reduce_string_array(
8791 2
            $return,
8792 2
            $remove_empty_values,
8793 2
            $remove_short_values
8794
        );
8795
8796 2
        foreach ($tmp_return as &$item) {
8797 2
            $item = (string) $item;
8798
        }
8799
8800 2
        return $tmp_return;
8801
    }
8802
8803
    /**
8804
     * Truncates the string to a given length. If $substring is provided, and
8805
     * truncating occurs, the string is further truncated so that the substring
8806
     * may be appended without exceeding the desired length.
8807
     *
8808
     * @param string $str
8809
     * @param int    $length    <p>Desired length of the truncated string.</p>
8810
     * @param string $substring [optional] <p>The substring to append if it can fit. Default: ''</p>
8811
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
8812
     *
8813
     * @psalm-pure
8814
     *
8815
     * @return string
8816
     *                <p>A string after truncating.</p>
8817
     */
8818 22
    public static function str_truncate(
8819
        string $str,
8820
        int $length,
8821
        string $substring = '',
8822
        string $encoding = 'UTF-8'
8823
    ): string {
8824 22
        if ($str === '') {
8825
            return '';
8826
        }
8827
8828 22
        if ($encoding === 'UTF-8') {
8829 10
            if ($length >= (int) \mb_strlen($str)) {
8830 2
                return $str;
8831
            }
8832
8833 8
            if ($substring !== '') {
8834 4
                $length -= (int) \mb_strlen($substring);
8835
8836
                /** @noinspection UnnecessaryCastingInspection */
8837 4
                return (string) \mb_substr($str, 0, $length) . $substring;
8838
            }
8839
8840 4
            return (string) \mb_substr($str, 0, $length);
8841
        }
8842
8843 12
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
8844
8845 12
        if ($length >= (int) self::strlen($str, $encoding)) {
8846 2
            return $str;
8847
        }
8848
8849 10
        if ($substring !== '') {
8850 6
            $length -= (int) self::strlen($substring, $encoding);
8851
        }
8852
8853
        return (
8854 10
               (string) self::substr(
8855 10
                   $str,
8856 10
                   0,
8857 10
                   $length,
8858 10
                   $encoding
8859
               )
8860 10
               ) . $substring;
8861
    }
8862
8863
    /**
8864
     * Truncates the string to a given length, while ensuring that it does not
8865
     * split words. If $substring is provided, and truncating occurs, the
8866
     * string is further truncated so that the substring may be appended without
8867
     * exceeding the desired length.
8868
     *
8869
     * @param string $str
8870
     * @param int    $length                                 <p>Desired length of the truncated string.</p>
8871
     * @param string $substring                              [optional] <p>The substring to append if it can fit.
8872
     *                                                       Default:
8873
     *                                                       ''</p>
8874
     * @param string $encoding                               [optional] <p>Default: 'UTF-8'</p>
8875
     * @param bool   $ignore_do_not_split_words_for_one_word [optional] <p>Default: false</p>
8876
     *
8877
     * @psalm-pure
8878
     *
8879
     * @return string
8880
     *                <p>A string after truncating.</p>
8881
     */
8882 47
    public static function str_truncate_safe(
8883
        string $str,
8884
        int $length,
8885
        string $substring = '',
8886
        string $encoding = 'UTF-8',
8887
        bool $ignore_do_not_split_words_for_one_word = false
8888
    ): string {
8889 47
        if ($str === '' || $length <= 0) {
8890 1
            return $substring;
8891
        }
8892
8893 47
        if ($encoding === 'UTF-8') {
8894 21
            if ($length >= (int) \mb_strlen($str)) {
8895 5
                return $str;
8896
            }
8897
8898
            // need to further trim the string so we can append the substring
8899 17
            $length -= (int) \mb_strlen($substring);
8900 17
            if ($length <= 0) {
8901 1
                return $substring;
8902
            }
8903
8904
            /** @var false|string $truncated - needed for PhpStan (stubs error) */
8905 17
            $truncated = \mb_substr($str, 0, $length);
8906 17
            if ($truncated === false) {
8907
                return '';
8908
            }
8909
8910
            // if the last word was truncated
8911 17
            $space_position = \mb_strpos($str, ' ', $length - 1);
8912 17
            if ($space_position !== $length) {
8913
                // find pos of the last occurrence of a space, get up to that
8914 13
                $last_position = \mb_strrpos($truncated, ' ', 0);
8915
8916
                if (
8917 13
                    $last_position !== false
8918
                    ||
8919
                    (
8920 3
                        $space_position !== false
8921
                        &&
8922 13
                        !$ignore_do_not_split_words_for_one_word
8923
                    )
8924
                ) {
8925 17
                    $truncated = (string) \mb_substr($truncated, 0, (int) $last_position);
8926
                }
8927
            }
8928
        } else {
8929 26
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
8930
8931 26
            if ($length >= (int) self::strlen($str, $encoding)) {
8932 4
                return $str;
8933
            }
8934
8935
            // need to further trim the string so we can append the substring
8936 22
            $length -= (int) self::strlen($substring, $encoding);
8937 22
            if ($length <= 0) {
8938
                return $substring;
8939
            }
8940
8941 22
            $truncated = self::substr($str, 0, $length, $encoding);
8942
8943 22
            if ($truncated === false) {
8944
                return '';
8945
            }
8946
8947
            // if the last word was truncated
8948 22
            $space_position = self::strpos($str, ' ', $length - 1, $encoding);
8949 22
            if ($space_position !== $length) {
8950
                // find pos of the last occurrence of a space, get up to that
8951 12
                $last_position = self::strrpos($truncated, ' ', 0, $encoding);
8952
8953
                if (
8954 12
                    $last_position !== false
8955
                    ||
8956
                    (
8957 4
                        $space_position !== false
8958
                        &&
8959 12
                        !$ignore_do_not_split_words_for_one_word
8960
                    )
8961
                ) {
8962 9
                    $truncated = (string) self::substr($truncated, 0, (int) $last_position, $encoding);
8963
                }
8964
            }
8965
        }
8966
8967 39
        return $truncated . $substring;
8968
    }
8969
8970
    /**
8971
     * Returns a lowercase and trimmed string separated by underscores.
8972
     * Underscores are inserted before uppercase characters (with the exception
8973
     * of the first character of the string), and in place of spaces as well as
8974
     * dashes.
8975
     *
8976
     * @param string $str
8977
     *
8978
     * @psalm-pure
8979
     *
8980
     * @return string
8981
     *                <p>The underscored string.</p>
8982
     */
8983 16
    public static function str_underscored(string $str): string
8984
    {
8985 16
        return self::str_delimit($str, '_');
8986
    }
8987
8988
    /**
8989
     * Returns an UpperCamelCase version of the supplied string. It trims
8990
     * surrounding spaces, capitalizes letters following digits, spaces, dashes
8991
     * and underscores, and removes spaces, dashes, underscores.
8992
     *
8993
     * @param string      $str                           <p>The input string.</p>
8994
     * @param string      $encoding                      [optional] <p>Default: 'UTF-8'</p>
8995
     * @param bool        $clean_utf8                    [optional] <p>Remove non UTF-8 chars from the string.</p>
8996
     * @param string|null $lang                          [optional] <p>Set the language for special cases: az, el, lt,
8997
     *                                                   tr</p>
8998
     * @param bool        $try_to_keep_the_string_length [optional] <p>true === try to keep the string length: e.g. ẞ
8999
     *                                                   -> ß</p>
9000
     *
9001
     * @psalm-pure
9002
     *
9003
     * @return string
9004
     *                <p>A string in UpperCamelCase.</p>
9005
     */
9006 13
    public static function str_upper_camelize(
9007
        string $str,
9008
        string $encoding = 'UTF-8',
9009
        bool $clean_utf8 = false,
9010
        string $lang = null,
9011
        bool $try_to_keep_the_string_length = false
9012
    ): string {
9013 13
        return self::ucfirst(self::str_camelize($str, $encoding), $encoding, $clean_utf8, $lang, $try_to_keep_the_string_length);
9014
    }
9015
9016
    /**
9017
     * Get the number of words in a specific string.
9018
     *
9019
     * EXAMPLES: <code>
9020
     * // format: 0 -> return only word count (int)
9021
     * //
9022
     * UTF8::str_word_count('中文空白 öäü abc#c'); // 4
9023
     * UTF8::str_word_count('中文空白 öäü abc#c', 0, '#'); // 3
9024
     *
9025
     * // format: 1 -> return words (array)
9026
     * //
9027
     * UTF8::str_word_count('中文空白 öäü abc#c', 1); // array('中文空白', 'öäü', 'abc', 'c')
9028
     * UTF8::str_word_count('中文空白 öäü abc#c', 1, '#'); // array('中文空白', 'öäü', 'abc#c')
9029
     *
9030
     * // format: 2 -> return words with offset (array)
9031
     * //
9032
     * UTF8::str_word_count('中文空白 öäü ab#c', 2); // array(0 => '中文空白', 5 => 'öäü', 9 => 'abc', 13 => 'c')
9033
     * UTF8::str_word_count('中文空白 öäü ab#c', 2, '#'); // array(0 => '中文空白', 5 => 'öäü', 9 => 'abc#c')
9034
     * </code>
9035
     *
9036
     * @param string $str       <p>The input string.</p>
9037
     * @param int    $format    [optional] <p>
9038
     *                          <strong>0</strong> => return a number of words (default)<br>
9039
     *                          <strong>1</strong> => return an array of words<br>
9040
     *                          <strong>2</strong> => return an array of words with word-offset as key
9041
     *                          </p>
9042
     * @param string $char_list [optional] <p>Additional chars that contains to words and do not start a new word.</p>
9043
     *
9044
     * @psalm-pure
9045
     *
9046
     * @return int|string[]
9047
     *                      <p>The number of words in the string.</p>
9048
     */
9049 2
    public static function str_word_count(string $str, int $format = 0, string $char_list = '')
9050
    {
9051 2
        $str_parts = self::str_to_words($str, $char_list);
9052
9053 2
        $len = \count($str_parts);
9054
9055 2
        if ($format === 1) {
9056 2
            $number_of_words = [];
9057 2
            for ($i = 1; $i < $len; $i += 2) {
9058 2
                $number_of_words[] = $str_parts[$i];
9059
            }
9060 2
        } elseif ($format === 2) {
9061 2
            $number_of_words = [];
9062 2
            $offset = (int) self::strlen($str_parts[0]);
9063 2
            for ($i = 1; $i < $len; $i += 2) {
9064 2
                $number_of_words[$offset] = $str_parts[$i];
9065 2
                $offset += (int) self::strlen($str_parts[$i]) + (int) self::strlen($str_parts[$i + 1]);
9066
            }
9067
        } else {
9068 2
            $number_of_words = (int) (($len - 1) / 2);
9069
        }
9070
9071 2
        return $number_of_words;
9072
    }
9073
9074
    /**
9075
     * Case-insensitive string comparison.
9076
     *
9077
     * INFO: Case-insensitive version of UTF8::strcmp()
9078
     *
9079
     * EXAMPLE: <code>UTF8::strcasecmp("iñtërnâtiôn\nàlizætiøn", "Iñtërnâtiôn\nàlizætiøn"); // 0</code>
9080
     *
9081
     * @param string $str1     <p>The first string.</p>
9082
     * @param string $str2     <p>The second string.</p>
9083
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
9084
     *
9085
     * @psalm-pure
9086
     *
9087
     * @return int
9088
     *             <strong>&lt; 0</strong> if str1 is less than str2;<br>
9089
     *             <strong>&gt; 0</strong> if str1 is greater than str2,<br>
9090
     *             <strong>0</strong> if they are equal
9091
     */
9092 23
    public static function strcasecmp(
9093
        string $str1,
9094
        string $str2,
9095
        string $encoding = 'UTF-8'
9096
    ): int {
9097 23
        return self::strcmp(
9098 23
            self::strtocasefold(
9099 23
                $str1,
9100 23
                true,
9101 23
                false,
9102 23
                $encoding,
9103 23
                null,
9104 23
                false
9105
            ),
9106 23
            self::strtocasefold(
9107 23
                $str2,
9108 23
                true,
9109 23
                false,
9110 23
                $encoding,
9111 23
                null,
9112 23
                false
9113
            )
9114
        );
9115
    }
9116
9117
    /**
9118
     * Case-sensitive string comparison.
9119
     *
9120
     * EXAMPLE: <code>UTF8::strcmp("iñtërnâtiôn\nàlizætiøn", "iñtërnâtiôn\nàlizætiøn"); // 0</code>
9121
     *
9122
     * @param string $str1 <p>The first string.</p>
9123
     * @param string $str2 <p>The second string.</p>
9124
     *
9125
     * @psalm-pure
9126
     *
9127
     * @return int
9128
     *             <strong>&lt; 0</strong> if str1 is less than str2<br>
9129
     *             <strong>&gt; 0</strong> if str1 is greater than str2<br>
9130
     *             <strong>0</strong> if they are equal
9131
     */
9132 29
    public static function strcmp(string $str1, string $str2): int
9133
    {
9134 29
        if ($str1 === $str2) {
9135 21
            return 0;
9136
        }
9137
9138 24
        return \strcmp(
9139
            /** @phpstan-ignore-next-line - we use only NFD */
9140 24
            \Normalizer::normalize($str1, \Normalizer::NFD),
9141
            /** @phpstan-ignore-next-line - we use only NFD */
9142 24
            \Normalizer::normalize($str2, \Normalizer::NFD)
9143
        );
9144
    }
9145
9146
    /**
9147
     * Find length of initial segment not matching mask.
9148
     *
9149
     * @param string   $str
9150
     * @param string   $char_list
9151
     * @param int      $offset
9152
     * @param int|null $length
9153
     * @param string   $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
9154
     *
9155
     * @psalm-pure
9156
     *
9157
     * @return int
9158
     */
9159 12
    public static function strcspn(
9160
        string $str,
9161
        string $char_list,
9162
        int $offset = 0,
9163
        int $length = null,
9164
        string $encoding = 'UTF-8'
9165
    ): int {
9166 12
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
9167
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
9168
        }
9169
9170 12
        if ($char_list === '') {
9171 2
            return (int) self::strlen($str, $encoding);
9172
        }
9173
9174 11
        if ($offset || $length !== null) {
9175 3
            if ($encoding === 'UTF-8') {
9176 3
                if ($length === null) {
9177 2
                    $str_tmp = \mb_substr($str, $offset);
9178
                } else {
9179 3
                    $str_tmp = \mb_substr($str, $offset, $length);
9180
                }
9181
            } else {
9182
                $str_tmp = self::substr($str, $offset, $length, $encoding);
9183
            }
9184
9185 3
            if ($str_tmp === false) {
9186
                return 0;
9187
            }
9188
9189 3
            $str = $str_tmp;
9190
        }
9191
9192 11
        if ($str === '') {
9193 2
            return 0;
9194
        }
9195
9196 10
        $matches = [];
9197 10
        if (\preg_match('/^(.*?)' . self::rxClass($char_list) . '/us', $str, $matches)) {
9198 9
            $return = self::strlen($matches[1], $encoding);
9199 9
            if ($return === false) {
9200
                return 0;
9201
            }
9202
9203 9
            return $return;
9204
        }
9205
9206 2
        return (int) self::strlen($str, $encoding);
9207
    }
9208
9209
    /**
9210
     * Create a UTF-8 string from code points.
9211
     *
9212
     * INFO: opposite to UTF8::codepoints()
9213
     *
9214
     * EXAMPLE: <code>UTF8::string(array(246, 228, 252)); // 'öäü'</code>
9215
     *
9216
     * @param int|int[]|string|string[] $intOrHex <p>Integer or Hexadecimal codepoints.</p>
9217
     *
9218
     * @phpstan-param int[]|numeric-string[]|int|numeric-string $intOrHex
9219
     *
9220
     * @psalm-pure
9221
     *
9222
     * @return string
9223
     *                <p>A UTF-8 encoded string.</p>
9224
     */
9225 4
    public static function string($intOrHex): string
9226
    {
9227 4
        if ($intOrHex === []) {
9228 4
            return '';
9229
        }
9230
9231 4
        if (!\is_array($intOrHex)) {
9232 1
            $intOrHex = [$intOrHex];
9233
        }
9234
9235 4
        $str = '';
9236 4
        foreach ($intOrHex as $strPart) {
9237 4
            $str .= '&#' . (int) $strPart . ';';
9238
        }
9239
9240
        // We cannot use html_entity_decode() here, as it will not return
9241
        // characters for many values < 160.
9242 4
        return mb_convert_encoding($str, 'UTF-8', 'HTML-ENTITIES');
0 ignored issues
show
Bug Best Practice introduced by
The expression return mb_convert_encodi...TF-8', 'HTML-ENTITIES') could return the type array which is incompatible with the type-hinted return string. Consider adding an additional type-check to rule them out.
Loading history...
9243
    }
9244
9245
    /**
9246
     * Checks if string starts with "BOM" (Byte Order Mark Character) character.
9247
     *
9248
     * EXAMPLE: <code>UTF8::string_has_bom("\xef\xbb\xbf foobar"); // true</code>
9249
     *
9250
     * @param string $str <p>The input string.</p>
9251
     *
9252
     * @psalm-pure
9253
     *
9254
     * @return bool
9255
     *              <p>
9256
     *              <strong>true</strong> if the string has BOM at the start,<br>
9257
     *              <strong>false</strong> otherwise
9258
     *              </p>
9259
     */
9260 40
    public static function string_has_bom(string $str): bool
9261
    {
9262 40
        foreach (self::$BOM as $bom_string => &$bom_byte_length) {
9263 40
            if (\strncmp($str, $bom_string, $bom_byte_length) === 0) {
9264 40
                return true;
9265
            }
9266
        }
9267
9268 40
        return false;
9269
    }
9270
9271
    /**
9272
     * Strip HTML and PHP tags from a string + clean invalid UTF-8.
9273
     *
9274
     * EXAMPLE: <code>UTF8::strip_tags("<span>κόσμε\xa0\xa1</span>"); // 'κόσμε'</code>
9275
     *
9276
     * @see http://php.net/manual/en/function.strip-tags.php
9277
     *
9278
     * @param string      $str            <p>
9279
     *                                    The input string.
9280
     *                                    </p>
9281
     * @param string|null $allowable_tags [optional] <p>
9282
     *                                    You can use the optional second parameter to specify tags which should
9283
     *                                    not be stripped.
9284
     *                                    </p>
9285
     *                                    <p>
9286
     *                                    HTML comments and PHP tags are also stripped. This is hardcoded and
9287
     *                                    can not be changed with allowable_tags.
9288
     *                                    </p>
9289
     * @param bool        $clean_utf8     [optional] <p>Remove non UTF-8 chars from the string.</p>
9290
     *
9291
     * @psalm-pure
9292
     *
9293
     * @return string
9294
     *                <p>The stripped string.</p>
9295
     */
9296 4
    public static function strip_tags(
9297
        string $str,
9298
        string $allowable_tags = null,
9299
        bool $clean_utf8 = false
9300
    ): string {
9301 4
        if ($str === '') {
9302 1
            return '';
9303
        }
9304
9305 4
        if ($clean_utf8) {
9306 2
            $str = self::clean($str);
9307
        }
9308
9309 4
        if ($allowable_tags === null) {
9310 4
            return \strip_tags($str);
9311
        }
9312
9313 2
        return \strip_tags($str, $allowable_tags);
9314
    }
9315
9316
    /**
9317
     * Strip all whitespace characters. This includes tabs and newline
9318
     * characters, as well as multibyte whitespace such as the thin space
9319
     * and ideographic space.
9320
     *
9321
     * EXAMPLE: <code>UTF8::strip_whitespace('   Ο     συγγραφέας  '); // 'Οσυγγραφέας'</code>
9322
     *
9323
     * @param string $str
9324
     *
9325
     * @psalm-pure
9326
     *
9327
     * @return string
9328
     */
9329 36
    public static function strip_whitespace(string $str): string
9330
    {
9331 36
        if ($str === '') {
9332 3
            return '';
9333
        }
9334
9335 33
        return (string) \preg_replace('/[[:space:]]+/u', '', $str);
9336
    }
9337
9338
    /**
9339
     * Find the position of the first occurrence of a substring in a string, case-insensitive.
9340
     *
9341
     * INFO: use UTF8::stripos_in_byte() for the byte-length
9342
     *
9343
     * EXAMPLE: <code>UTF8::stripos('aσσb', 'ΣΣ'); // 1</code> (σσ == ΣΣ)
9344
     *
9345
     * @see http://php.net/manual/en/function.mb-stripos.php
9346
     *
9347
     * @param string $haystack   <p>The string from which to get the position of the first occurrence of needle.</p>
9348
     * @param string $needle     <p>The string to find in haystack.</p>
9349
     * @param int    $offset     [optional] <p>The position in haystack to start searching.</p>
9350
     * @param string $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
9351
     * @param bool   $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
9352
     *
9353
     * @psalm-pure
9354
     *
9355
     * @return false|int
9356
     *                   Return the <strong>(int)</strong> numeric position of the first occurrence of needle in the
9357
     *                   haystack string,<br> or <strong>false</strong> if needle is not found
9358
     */
9359 25
    public static function stripos(
9360
        string $haystack,
9361
        string $needle,
9362
        int $offset = 0,
9363
        string $encoding = 'UTF-8',
9364
        bool $clean_utf8 = false
9365
    ) {
9366 25
        if ($haystack === '') {
9367 5
            if (\PHP_VERSION_ID >= 80000 && $needle === '') {
9368
                return 0;
9369
            }
9370
9371 5
            return false;
9372
        }
9373
9374 24
        if ($needle === '' && \PHP_VERSION_ID < 80000) {
9375 2
            return false;
9376
        }
9377
9378 24
        if ($clean_utf8) {
9379
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
9380
            // if invalid characters are found in $haystack before $needle
9381 1
            $haystack = self::clean($haystack);
9382 1
            $needle = self::clean($needle);
9383
        }
9384
9385 24
        if (self::$SUPPORT['mbstring'] === true) {
9386 24
            if ($encoding === 'UTF-8') {
9387 24
                return \mb_stripos($haystack, $needle, $offset);
9388
            }
9389
9390 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
9391
9392 2
            return \mb_stripos($haystack, $needle, $offset, $encoding);
9393
        }
9394
9395 2
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
9396
9397
        if (
9398 2
            $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
9399
            &&
9400 2
            $offset >= 0 // grapheme_stripos() can't handle negative offset
9401
            &&
9402 2
            self::$SUPPORT['intl'] === true
9403
        ) {
9404
            $return_tmp = \grapheme_stripos($haystack, $needle, $offset);
9405
            if ($return_tmp !== false) {
9406
                return $return_tmp;
9407
            }
9408
        }
9409
9410
        //
9411
        // fallback for ascii only
9412
        //
9413
9414 2
        if (ASCII::is_ascii($haystack . $needle)) {
9415 2
            return \stripos($haystack, $needle, $offset);
9416
        }
9417
9418
        //
9419
        // fallback via vanilla php
9420
        //
9421
9422 2
        $haystack = self::strtocasefold($haystack, true, false, $encoding, null, false);
9423 2
        $needle = self::strtocasefold($needle, true, false, $encoding, null, false);
9424
9425 2
        return self::strpos($haystack, $needle, $offset, $encoding);
9426
    }
9427
9428
    /**
9429
     * Returns all of haystack starting from and including the first occurrence of needle to the end.
9430
     *
9431
     * EXAMPLE: <code>
9432
     * $str = 'iñtërnâtiônàlizætiøn';
9433
     * $search = 'NÂT';
9434
     *
9435
     * UTF8::stristr($str, $search)); // 'nâtiônàlizætiøn'
9436
     * UTF8::stristr($str, $search, true)); // 'iñtër'
9437
     * </code>
9438
     *
9439
     * @param string $haystack      <p>The input string. Must be valid UTF-8.</p>
9440
     * @param string $needle        <p>The string to look for. Must be valid UTF-8.</p>
9441
     * @param bool   $before_needle [optional] <p>
9442
     *                              If <b>TRUE</b>, it returns the part of the
9443
     *                              haystack before the first occurrence of the needle (excluding the needle).
9444
     *                              </p>
9445
     * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
9446
     * @param bool   $clean_utf8    [optional] <p>Remove non UTF-8 chars from the string.</p>
9447
     *
9448
     * @psalm-pure
9449
     *
9450
     * @return false|string
9451
     *                      <p>A sub-string,<br>or <strong>false</strong> if needle is not found.</p>
9452
     */
9453 13
    public static function stristr(
9454
        string $haystack,
9455
        string $needle,
9456
        bool $before_needle = false,
9457
        string $encoding = 'UTF-8',
9458
        bool $clean_utf8 = false
9459
    ) {
9460 13
        if ($haystack === '') {
9461 3
            if (\PHP_VERSION_ID >= 80000 && $needle === '') {
9462
                return '';
9463
            }
9464
9465 3
            return false;
9466
        }
9467
9468 11
        if ($clean_utf8) {
9469
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
9470
            // if invalid characters are found in $haystack before $needle
9471 1
            $needle = self::clean($needle);
9472 1
            $haystack = self::clean($haystack);
9473
        }
9474
9475 11
        if ($needle === '') {
9476 2
            if (\PHP_VERSION_ID >= 80000) {
9477
                return $haystack;
9478
            }
9479
9480 2
            return false;
9481
        }
9482
9483 10
        if (self::$SUPPORT['mbstring'] === true) {
9484 10
            if ($encoding === 'UTF-8') {
9485 10
                return \mb_stristr($haystack, $needle, $before_needle);
9486
            }
9487
9488 1
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
9489
9490 1
            return \mb_stristr($haystack, $needle, $before_needle, $encoding);
9491
        }
9492
9493
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
9494
9495
        if (
9496
            $encoding !== 'UTF-8'
9497
            &&
9498
            self::$SUPPORT['mbstring'] === false
9499
        ) {
9500
            /**
9501
             * @psalm-suppress ImpureFunctionCall - is is only a warning
9502
             */
9503
            \trigger_error('UTF8::stristr() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
9504
        }
9505
9506
        if (
9507
            $encoding === 'UTF-8' // INFO: "grapheme_stristr()" can't handle other encodings
9508
            &&
9509
            self::$SUPPORT['intl'] === true
9510
        ) {
9511
            $return_tmp = \grapheme_stristr($haystack, $needle, $before_needle);
9512
            if ($return_tmp !== false) {
9513
                return $return_tmp;
9514
            }
9515
        }
9516
9517
        if (ASCII::is_ascii($needle . $haystack)) {
9518
            return \stristr($haystack, $needle, $before_needle);
9519
        }
9520
9521
        \preg_match('/^(.*?)' . \preg_quote($needle, '/') . '/usi', $haystack, $match);
9522
9523
        if (!isset($match[1])) {
9524
            return false;
9525
        }
9526
9527
        if ($before_needle) {
9528
            return $match[1];
9529
        }
9530
9531
        return self::substr($haystack, (int) self::strlen($match[1], $encoding), null, $encoding);
9532
    }
9533
9534
    /**
9535
     * Get the string length, not the byte-length!
9536
     *
9537
     * INFO: use UTF8::strwidth() for the char-length
9538
     *
9539
     * EXAMPLE: <code>UTF8::strlen("Iñtërnâtiôn\xE9àlizætiøn")); // 20</code>
9540
     *
9541
     * @see http://php.net/manual/en/function.mb-strlen.php
9542
     *
9543
     * @param string $str        <p>The string being checked for length.</p>
9544
     * @param string $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
9545
     * @param bool   $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
9546
     *
9547
     * @psalm-pure
9548
     *
9549
     * @return false|int
9550
     *                   <p>
9551
     *                   The number <strong>(int)</strong> of characters in the string $str having character encoding
9552
     *                   $encoding.
9553
     *                   (One multi-byte character counted as +1).
9554
     *                   <br>
9555
     *                   Can return <strong>false</strong>, if e.g. mbstring is not installed and we process invalid
9556
     *                   chars.
9557
     *                   </p>
9558
     */
9559 174
    public static function strlen(
9560
        string $str,
9561
        string $encoding = 'UTF-8',
9562
        bool $clean_utf8 = false
9563
    ) {
9564 174
        if ($str === '') {
9565 21
            return 0;
9566
        }
9567
9568 172
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
9569 12
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
9570
        }
9571
9572 172
        if ($clean_utf8) {
9573
            // "mb_strlen" and "\iconv_strlen" returns wrong length,
9574
            // if invalid characters are found in $str
9575 5
            $str = self::clean($str);
9576
        }
9577
9578
        //
9579
        // fallback via mbstring
9580
        //
9581
9582 172
        if (self::$SUPPORT['mbstring'] === true) {
9583 166
            if ($encoding === 'UTF-8') {
9584
                /** @noinspection PhpUsageOfSilenceOperatorInspection - ignore warnings, it's working anyway */
9585 166
                return @\mb_strlen($str);
9586
            }
9587
9588
            /** @noinspection PhpUsageOfSilenceOperatorInspection - ignore warnings, it's working anyway */
9589 4
            return @\mb_strlen($str, $encoding);
9590
        }
9591
9592
        //
9593
        // fallback for binary || ascii only
9594
        //
9595
9596
        if (
9597 8
            $encoding === 'CP850'
9598
            ||
9599 8
            $encoding === 'ASCII'
9600
        ) {
9601
            return \strlen($str);
9602
        }
9603
9604
        if (
9605 8
            $encoding !== 'UTF-8'
9606
            &&
9607 8
            self::$SUPPORT['mbstring'] === false
9608
            &&
9609 8
            self::$SUPPORT['iconv'] === false
9610
        ) {
9611
            /**
9612
             * @psalm-suppress ImpureFunctionCall - is is only a warning
9613
             */
9614 2
            \trigger_error('UTF8::strlen() without mbstring / iconv cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
9615
        }
9616
9617
        //
9618
        // fallback via iconv
9619
        //
9620
9621 8
        if (self::$SUPPORT['iconv'] === true) {
9622
            $return_tmp = \iconv_strlen($str, $encoding);
9623
            if ($return_tmp !== false) {
9624
                return $return_tmp;
9625
            }
9626
        }
9627
9628
        //
9629
        // fallback via intl
9630
        //
9631
9632
        if (
9633 8
            $encoding === 'UTF-8' // INFO: "grapheme_strlen()" can't handle other encodings
9634
            &&
9635 8
            self::$SUPPORT['intl'] === true
9636
        ) {
9637
            $return_tmp = \grapheme_strlen($str);
9638
            if ($return_tmp !== null) {
9639
                return $return_tmp;
9640
            }
9641
        }
9642
9643
        //
9644
        // fallback for ascii only
9645
        //
9646
9647 8
        if (ASCII::is_ascii($str)) {
9648 4
            return \strlen($str);
9649
        }
9650
9651
        //
9652
        // fallback via vanilla php
9653
        //
9654
9655 8
        \preg_match_all('/./us', $str, $parts);
9656
9657 8
        $return_tmp = \count($parts[0]);
9658 8
        if ($return_tmp === 0) {
9659
            return false;
9660
        }
9661
9662 8
        return $return_tmp;
9663
    }
9664
9665
    /**
9666
     * Get string length in byte.
9667
     *
9668
     * @param string $str
9669
     *
9670
     * @psalm-pure
9671
     *
9672
     * @return int
9673
     */
9674 1
    public static function strlen_in_byte(string $str): int
9675
    {
9676 1
        if ($str === '') {
9677
            return 0;
9678
        }
9679
9680 1
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
9681
            // "mb_" is available if overload is used, so use it ...
9682
            return \mb_strlen($str, 'CP850'); // 8-BIT
9683
        }
9684
9685 1
        return \strlen($str);
9686
    }
9687
9688
    /**
9689
     * Case-insensitive string comparisons using a "natural order" algorithm.
9690
     *
9691
     * INFO: natural order version of UTF8::strcasecmp()
9692
     *
9693
     * EXAMPLES: <code>
9694
     * UTF8::strnatcasecmp('2', '10Hello WORLD 中文空白!'); // -1
9695
     * UTF8::strcasecmp('2Hello world 中文空白!', '10Hello WORLD 中文空白!'); // 1
9696
     *
9697
     * UTF8::strnatcasecmp('10Hello world 中文空白!', '2Hello WORLD 中文空白!'); // 1
9698
     * UTF8::strcasecmp('10Hello world 中文空白!', '2Hello WORLD 中文空白!'); // -1
9699
     * </code>
9700
     *
9701
     * @param string $str1     <p>The first string.</p>
9702
     * @param string $str2     <p>The second string.</p>
9703
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
9704
     *
9705
     * @psalm-pure
9706
     *
9707
     * @return int
9708
     *             <strong>&lt; 0</strong> if str1 is less than str2<br>
9709
     *             <strong>&gt; 0</strong> if str1 is greater than str2<br>
9710
     *             <strong>0</strong> if they are equal
9711
     */
9712 2
    public static function strnatcasecmp(string $str1, string $str2, string $encoding = 'UTF-8'): int
9713
    {
9714 2
        return self::strnatcmp(
9715 2
            self::strtocasefold($str1, true, false, $encoding, null, false),
9716 2
            self::strtocasefold($str2, true, false, $encoding, null, false)
9717
        );
9718
    }
9719
9720
    /**
9721
     * String comparisons using a "natural order" algorithm
9722
     *
9723
     * INFO: natural order version of UTF8::strcmp()
9724
     *
9725
     * EXAMPLES: <code>
9726
     * UTF8::strnatcmp('2Hello world 中文空白!', '10Hello WORLD 中文空白!'); // -1
9727
     * UTF8::strcmp('2Hello world 中文空白!', '10Hello WORLD 中文空白!'); // 1
9728
     *
9729
     * UTF8::strnatcmp('10Hello world 中文空白!', '2Hello WORLD 中文空白!'); // 1
9730
     * UTF8::strcmp('10Hello world 中文空白!', '2Hello WORLD 中文空白!'); // -1
9731
     * </code>
9732
     *
9733
     * @see http://php.net/manual/en/function.strnatcmp.php
9734
     *
9735
     * @param string $str1 <p>The first string.</p>
9736
     * @param string $str2 <p>The second string.</p>
9737
     *
9738
     * @psalm-pure
9739
     *
9740
     * @return int
9741
     *             <strong>&lt; 0</strong> if str1 is less than str2;<br>
9742
     *             <strong>&gt; 0</strong> if str1 is greater than str2;<br>
9743
     *             <strong>0</strong> if they are equal
9744
     */
9745 4
    public static function strnatcmp(string $str1, string $str2): int
9746
    {
9747 4
        if ($str1 === $str2) {
9748 4
            return 0;
9749
        }
9750
9751 4
        return \strnatcmp(
9752 4
            (string) self::strtonatfold($str1),
9753 4
            (string) self::strtonatfold($str2)
9754
        );
9755
    }
9756
9757
    /**
9758
     * Case-insensitive string comparison of the first n characters.
9759
     *
9760
     * EXAMPLE: <code>
9761
     * UTF8::strcasecmp("iñtërnâtiôn\nàlizætiøn321", "iñtërnâtiôn\nàlizætiøn123", 5); // 0
9762
     * </code>
9763
     *
9764
     * @see http://php.net/manual/en/function.strncasecmp.php
9765
     *
9766
     * @param string $str1     <p>The first string.</p>
9767
     * @param string $str2     <p>The second string.</p>
9768
     * @param int    $len      <p>The length of strings to be used in the comparison.</p>
9769
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
9770
     *
9771
     * @psalm-pure
9772
     *
9773
     * @return int
9774
     *             <strong>&lt; 0</strong> if <i>str1</i> is less than <i>str2</i>;<br>
9775
     *             <strong>&gt; 0</strong> if <i>str1</i> is greater than <i>str2</i>;<br>
9776
     *             <strong>0</strong> if they are equal
9777
     */
9778 2
    public static function strncasecmp(
9779
        string $str1,
9780
        string $str2,
9781
        int $len,
9782
        string $encoding = 'UTF-8'
9783
    ): int {
9784 2
        return self::strncmp(
9785 2
            self::strtocasefold($str1, true, false, $encoding, null, false),
9786 2
            self::strtocasefold($str2, true, false, $encoding, null, false),
9787 2
            $len
9788
        );
9789
    }
9790
9791
    /**
9792
     * String comparison of the first n characters.
9793
     *
9794
     * EXAMPLE: <code>
9795
     * UTF8::strncmp("Iñtërnâtiôn\nàlizætiøn321", "Iñtërnâtiôn\nàlizætiøn123", 5); // 0
9796
     * </code>
9797
     *
9798
     * @see http://php.net/manual/en/function.strncmp.php
9799
     *
9800
     * @param string $str1     <p>The first string.</p>
9801
     * @param string $str2     <p>The second string.</p>
9802
     * @param int    $len      <p>Number of characters to use in the comparison.</p>
9803
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
9804
     *
9805
     * @psalm-pure
9806
     *
9807
     * @return int
9808
     *             <strong>&lt; 0</strong> if <i>str1</i> is less than <i>str2</i>;<br>
9809
     *             <strong>&gt; 0</strong> if <i>str1</i> is greater than <i>str2</i>;<br>
9810
     *             <strong>0</strong> if they are equal
9811
     */
9812 4
    public static function strncmp(
9813
        string $str1,
9814
        string $str2,
9815
        int $len,
9816
        string $encoding = 'UTF-8'
9817
    ): int {
9818 4
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
9819
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
9820
        }
9821
9822 4
        if ($encoding === 'UTF-8') {
9823 4
            $str1 = (string) \mb_substr($str1, 0, $len);
9824 4
            $str2 = (string) \mb_substr($str2, 0, $len);
9825
        } else {
9826
            $str1 = (string) self::substr($str1, 0, $len, $encoding);
9827
            $str2 = (string) self::substr($str2, 0, $len, $encoding);
9828
        }
9829
9830 4
        return self::strcmp($str1, $str2);
9831
    }
9832
9833
    /**
9834
     * Search a string for any of a set of characters.
9835
     *
9836
     * EXAMPLE: <code>UTF8::strpbrk('-中文空白-', '白'); // '白-'</code>
9837
     *
9838
     * @see http://php.net/manual/en/function.strpbrk.php
9839
     *
9840
     * @param string $haystack  <p>The string where char_list is looked for.</p>
9841
     * @param string $char_list <p>This parameter is case-sensitive.</p>
9842
     *
9843
     * @psalm-pure
9844
     *
9845
     * @return false|string
9846
     *                      <p>The string starting from the character found, or false if it is not found.</p>
9847
     */
9848 2
    public static function strpbrk(string $haystack, string $char_list)
9849
    {
9850 2
        if ($haystack === '' || $char_list === '') {
9851 2
            return false;
9852
        }
9853
9854 2
        if (\preg_match('/' . self::rxClass($char_list) . '/us', $haystack, $m)) {
9855 2
            return \substr($haystack, (int) \strpos($haystack, $m[0]));
9856
        }
9857
9858 2
        return false;
9859
    }
9860
9861
    /**
9862
     * Find the position of the first occurrence of a substring in a string.
9863
     *
9864
     * INFO: use UTF8::strpos_in_byte() for the byte-length
9865
     *
9866
     * EXAMPLE: <code>UTF8::strpos('ABC-ÖÄÜ-中文空白-中文空白', '中'); // 8</code>
9867
     *
9868
     * @see http://php.net/manual/en/function.mb-strpos.php
9869
     *
9870
     * @param string     $haystack   <p>The string from which to get the position of the first occurrence of needle.</p>
9871
     * @param int|string $needle     <p>The string to find in haystack.<br>Or a code point as int.</p>
9872
     * @param int        $offset     [optional] <p>The search offset. If it is not specified, 0 is used.</p>
9873
     * @param string     $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
9874
     * @param bool       $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
9875
     *
9876
     * @psalm-pure
9877
     *
9878
     * @return false|int
9879
     *                   The <strong>(int)</strong> numeric position of the first occurrence of needle in the haystack
9880
     *                   string.<br> If needle is not found it returns false.
9881
     */
9882 52
    public static function strpos(
9883
        string $haystack,
9884
        $needle,
9885
        int $offset = 0,
9886
        string $encoding = 'UTF-8',
9887
        bool $clean_utf8 = false
9888
    ) {
9889 52
        if ($haystack === '') {
9890 4
            if (\PHP_VERSION_ID >= 80000) {
9891
                if ($needle === '') {
9892
                    return 0;
9893
                }
9894
            } else {
9895 4
                return false;
9896
            }
9897
        }
9898
9899
        // iconv and mbstring do not support integer $needle
9900 51
        if ((int) $needle === $needle) {
9901
            $needle = (string) self::chr($needle);
9902
        }
9903 51
        $needle = (string) $needle;
9904
9905 51
        if ($haystack === '') {
9906
            if (\PHP_VERSION_ID >= 80000 && $needle === '') {
9907
                return 0;
9908
            }
9909
9910
            return false;
9911
        }
9912
9913 51
        if ($needle === '' && \PHP_VERSION_ID < 80000) {
9914 2
            return false;
9915
        }
9916
9917 51
        if ($clean_utf8) {
9918
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
9919
            // if invalid characters are found in $haystack before $needle
9920 3
            $needle = self::clean($needle);
9921 3
            $haystack = self::clean($haystack);
9922
        }
9923
9924 51
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
9925 10
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
9926
        }
9927
9928
        //
9929
        // fallback via mbstring
9930
        //
9931
9932 51
        if (self::$SUPPORT['mbstring'] === true) {
9933 49
            if ($encoding === 'UTF-8') {
9934
                /** @noinspection PhpUsageOfSilenceOperatorInspection - Offset not contained in string */
9935 49
                return @\mb_strpos($haystack, $needle, $offset);
9936
            }
9937
9938
            /** @noinspection PhpUsageOfSilenceOperatorInspection - Offset not contained in string */
9939 2
            return @\mb_strpos($haystack, $needle, $offset, $encoding);
9940
        }
9941
9942
        //
9943
        // fallback for binary || ascii only
9944
        //
9945
        if (
9946 4
            $encoding === 'CP850'
9947
            ||
9948 4
            $encoding === 'ASCII'
9949
        ) {
9950 2
            return \strpos($haystack, $needle, $offset);
9951
        }
9952
9953
        if (
9954 4
            $encoding !== 'UTF-8'
9955
            &&
9956 4
            self::$SUPPORT['iconv'] === false
9957
            &&
9958 4
            self::$SUPPORT['mbstring'] === false
9959
        ) {
9960
            /**
9961
             * @psalm-suppress ImpureFunctionCall - is is only a warning
9962
             */
9963 2
            \trigger_error('UTF8::strpos() without mbstring / iconv cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
9964
        }
9965
9966
        //
9967
        // fallback via intl
9968
        //
9969
9970
        if (
9971 4
            $encoding === 'UTF-8' // INFO: "grapheme_strpos()" can't handle other encodings
9972
            &&
9973 4
            $offset >= 0 // grapheme_strpos() can't handle negative offset
9974
            &&
9975 4
            self::$SUPPORT['intl'] === true
9976
        ) {
9977
            $return_tmp = \grapheme_strpos($haystack, $needle, $offset);
9978
            if ($return_tmp !== false) {
9979
                return $return_tmp;
9980
            }
9981
        }
9982
9983
        //
9984
        // fallback via iconv
9985
        //
9986
9987
        if (
9988 4
            $offset >= 0 // iconv_strpos() can't handle negative offset
9989
            &&
9990 4
            self::$SUPPORT['iconv'] === true
9991
        ) {
9992
            // ignore invalid negative offset to keep compatibility
9993
            // with php < 5.5.35, < 5.6.21, < 7.0.6
9994
            $return_tmp = \iconv_strpos($haystack, $needle, $offset > 0 ? $offset : 0, $encoding);
9995
            if ($return_tmp !== false) {
9996
                return $return_tmp;
9997
            }
9998
        }
9999
10000
        //
10001
        // fallback for ascii only
10002
        //
10003
10004 4
        if (ASCII::is_ascii($haystack . $needle)) {
10005
            /** @noinspection PhpUsageOfSilenceOperatorInspection - Offset not contained in string */
10006 2
            return @\strpos($haystack, $needle, $offset);
10007
        }
10008
10009
        //
10010
        // fallback via vanilla php
10011
        //
10012
10013 4
        $haystack_tmp = self::substr($haystack, $offset, null, $encoding);
10014 4
        if ($haystack_tmp === false) {
10015
            $haystack_tmp = '';
10016
        }
10017 4
        $haystack = (string) $haystack_tmp;
10018
10019 4
        if ($offset < 0) {
10020
            $offset = 0;
10021
        }
10022
10023 4
        $pos = \strpos($haystack, $needle);
10024 4
        if ($pos === false) {
10025 3
            return false;
10026
        }
10027
10028 4
        if ($pos) {
10029 4
            return $offset + (int) self::strlen(\substr($haystack, 0, $pos), $encoding);
10030
        }
10031
10032 2
        return $offset + 0;
10033
    }
10034
10035
    /**
10036
     * Find the position of the first occurrence of a substring in a string.
10037
     *
10038
     * @param string $haystack <p>
10039
     *                         The string being checked.
10040
     *                         </p>
10041
     * @param string $needle   <p>
10042
     *                         The position counted from the beginning of haystack.
10043
     *                         </p>
10044
     * @param int    $offset   [optional] <p>
10045
     *                         The search offset. If it is not specified, 0 is used.
10046
     *                         </p>
10047
     *
10048
     * @psalm-pure
10049
     *
10050
     * @return false|int
10051
     *                   <p>The numeric position of the first occurrence of needle in the
10052
     *                   haystack string. If needle is not found, it returns false.</p>
10053
     */
10054 2
    public static function strpos_in_byte(string $haystack, string $needle, int $offset = 0)
10055
    {
10056 2
        if ($haystack === '' || $needle === '') {
10057
            return false;
10058
        }
10059
10060 2
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
10061
            // "mb_" is available if overload is used, so use it ...
10062
            return \mb_strpos($haystack, $needle, $offset, 'CP850'); // 8-BIT
10063
        }
10064
10065 2
        return \strpos($haystack, $needle, $offset);
10066
    }
10067
10068
    /**
10069
     * Find the position of the first occurrence of a substring in a string, case-insensitive.
10070
     *
10071
     * @param string $haystack <p>
10072
     *                         The string being checked.
10073
     *                         </p>
10074
     * @param string $needle   <p>
10075
     *                         The position counted from the beginning of haystack.
10076
     *                         </p>
10077
     * @param int    $offset   [optional] <p>
10078
     *                         The search offset. If it is not specified, 0 is used.
10079
     *                         </p>
10080
     *
10081
     * @psalm-pure
10082
     *
10083
     * @return false|int
10084
     *                   <p>The numeric position of the first occurrence of needle in the
10085
     *                   haystack string. If needle is not found, it returns false.</p>
10086
     */
10087 2
    public static function stripos_in_byte(string $haystack, string $needle, int $offset = 0)
10088
    {
10089 2
        if ($haystack === '' || $needle === '') {
10090
            return false;
10091
        }
10092
10093 2
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
10094
            // "mb_" is available if overload is used, so use it ...
10095
            return \mb_stripos($haystack, $needle, $offset, 'CP850'); // 8-BIT
10096
        }
10097
10098 2
        return \stripos($haystack, $needle, $offset);
10099
    }
10100
10101
    /**
10102
     * Find the last occurrence of a character in a string within another.
10103
     *
10104
     * EXAMPLE: <code>UTF8::strrchr('κόσμεκόσμε-äöü', 'κόσμε'); // 'κόσμε-äöü'</code>
10105
     *
10106
     * @see http://php.net/manual/en/function.mb-strrchr.php
10107
     *
10108
     * @param string $haystack      <p>The string from which to get the last occurrence of needle.</p>
10109
     * @param string $needle        <p>The string to find in haystack</p>
10110
     * @param bool   $before_needle [optional] <p>
10111
     *                              Determines which portion of haystack
10112
     *                              this function returns.
10113
     *                              If set to true, it returns all of haystack
10114
     *                              from the beginning to the last occurrence of needle.
10115
     *                              If set to false, it returns all of haystack
10116
     *                              from the last occurrence of needle to the end,
10117
     *                              </p>
10118
     * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
10119
     * @param bool   $clean_utf8    [optional] <p>Remove non UTF-8 chars from the string.</p>
10120
     *
10121
     * @psalm-pure
10122
     *
10123
     * @return false|string
10124
     *                      <p>The portion of haystack or false if needle is not found.</p>
10125
     */
10126 2
    public static function strrchr(
10127
        string $haystack,
10128
        string $needle,
10129
        bool $before_needle = false,
10130
        string $encoding = 'UTF-8',
10131
        bool $clean_utf8 = false
10132
    ) {
10133 2
        if ($haystack === '' || $needle === '') {
10134 2
            return false;
10135
        }
10136
10137 2
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
10138 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
10139
        }
10140
10141 2
        if ($clean_utf8) {
10142
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
10143
            // if invalid characters are found in $haystack before $needle
10144 2
            $needle = self::clean($needle);
10145 2
            $haystack = self::clean($haystack);
10146
        }
10147
10148
        //
10149
        // fallback via mbstring
10150
        //
10151
10152 2
        if (self::$SUPPORT['mbstring'] === true) {
10153 2
            if ($encoding === 'UTF-8') {
10154 2
                return \mb_strrchr($haystack, $needle, $before_needle);
10155
            }
10156
10157 2
            return \mb_strrchr($haystack, $needle, $before_needle, $encoding);
10158
        }
10159
10160
        //
10161
        // fallback for binary || ascii only
10162
        //
10163
10164
        if (
10165
            !$before_needle
10166
            &&
10167
            (
10168
                $encoding === 'CP850'
10169
                ||
10170
                $encoding === 'ASCII'
10171
            )
10172
        ) {
10173
            return \strrchr($haystack, $needle);
10174
        }
10175
10176
        if (
10177
            $encoding !== 'UTF-8'
10178
            &&
10179
            self::$SUPPORT['mbstring'] === false
10180
        ) {
10181
            /**
10182
             * @psalm-suppress ImpureFunctionCall - is is only a warning
10183
             */
10184
            \trigger_error('UTF8::strrchr() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
10185
        }
10186
10187
        //
10188
        // fallback via iconv
10189
        //
10190
10191
        if (self::$SUPPORT['iconv'] === true) {
10192
            $needle_tmp = self::substr($needle, 0, 1, $encoding);
10193
            if ($needle_tmp === false) {
10194
                return false;
10195
            }
10196
            $needle = $needle_tmp;
10197
10198
            $pos = \iconv_strrpos($haystack, $needle, $encoding);
10199
            if ($pos === false) {
10200
                return false;
10201
            }
10202
10203
            if ($before_needle) {
10204
                return self::substr($haystack, 0, $pos, $encoding);
10205
            }
10206
10207
            return self::substr($haystack, $pos, null, $encoding);
10208
        }
10209
10210
        //
10211
        // fallback via vanilla php
10212
        //
10213
10214
        $needle_tmp = self::substr($needle, 0, 1, $encoding);
10215
        if ($needle_tmp === false) {
10216
            return false;
10217
        }
10218
        $needle = $needle_tmp;
10219
10220
        $pos = self::strrpos($haystack, $needle, 0, $encoding);
10221
        if ($pos === false) {
10222
            return false;
10223
        }
10224
10225
        if ($before_needle) {
10226
            return self::substr($haystack, 0, $pos, $encoding);
10227
        }
10228
10229
        return self::substr($haystack, $pos, null, $encoding);
10230
    }
10231
10232
    /**
10233
     * Reverses characters order in the string.
10234
     *
10235
     * EXAMPLE: <code>UTF8::strrev('κ-öäü'); // 'üäö-κ'</code>
10236
     *
10237
     * @param string $str      <p>The input string.</p>
10238
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
10239
     *
10240
     * @psalm-pure
10241
     *
10242
     * @return string
10243
     *                <p>The string with characters in the reverse sequence.</p>
10244
     */
10245 10
    public static function strrev(string $str, string $encoding = 'UTF-8'): string
10246
    {
10247 10
        if ($str === '') {
10248 4
            return '';
10249
        }
10250
10251
        // init
10252 8
        $reversed = '';
10253
10254 8
        $str = self::emoji_encode($str, true);
10255
10256 8
        if ($encoding === 'UTF-8') {
10257 8
            if (self::$SUPPORT['intl'] === true) {
10258
                // try "grapheme" first: https://stackoverflow.com/questions/17496493/strrev-dosent-support-utf-8
10259 8
                $i = (int) \grapheme_strlen($str);
10260 8
                while ($i--) {
10261 8
                    $reversed_tmp = \grapheme_substr($str, $i, 1);
10262 8
                    if ($reversed_tmp !== false) {
10263 8
                        $reversed .= $reversed_tmp;
10264
                    }
10265
                }
10266
            } else {
10267
                $i = (int) \mb_strlen($str);
10268 8
                while ($i--) {
10269
                    $reversed_tmp = \mb_substr($str, $i, 1);
10270
                    if ($reversed_tmp !== false) {
10271
                        $reversed .= $reversed_tmp;
10272
                    }
10273
                }
10274
            }
10275
        } else {
10276
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
10277
10278
            $i = (int) self::strlen($str, $encoding);
10279
            while ($i--) {
10280
                $reversed_tmp = self::substr($str, $i, 1, $encoding);
10281
                if ($reversed_tmp !== false) {
10282
                    $reversed .= $reversed_tmp;
10283
                }
10284
            }
10285
        }
10286
10287 8
        return self::emoji_decode($reversed, true);
10288
    }
10289
10290
    /**
10291
     * Find the last occurrence of a character in a string within another, case-insensitive.
10292
     *
10293
     * EXAMPLE: <code>UTF8::strrichr('Aκόσμεκόσμε-äöü', 'aκόσμε'); // 'Aκόσμεκόσμε-äöü'</code>
10294
     *
10295
     * @see http://php.net/manual/en/function.mb-strrichr.php
10296
     *
10297
     * @param string $haystack      <p>The string from which to get the last occurrence of needle.</p>
10298
     * @param string $needle        <p>The string to find in haystack.</p>
10299
     * @param bool   $before_needle [optional] <p>
10300
     *                              Determines which portion of haystack
10301
     *                              this function returns.
10302
     *                              If set to true, it returns all of haystack
10303
     *                              from the beginning to the last occurrence of needle.
10304
     *                              If set to false, it returns all of haystack
10305
     *                              from the last occurrence of needle to the end,
10306
     *                              </p>
10307
     * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
10308
     * @param bool   $clean_utf8    [optional] <p>Remove non UTF-8 chars from the string.</p>
10309
     *
10310
     * @psalm-pure
10311
     *
10312
     * @return false|string
10313
     *                      <p>The portion of haystack or<br>false if needle is not found.</p>
10314
     */
10315 3
    public static function strrichr(
10316
        string $haystack,
10317
        string $needle,
10318
        bool $before_needle = false,
10319
        string $encoding = 'UTF-8',
10320
        bool $clean_utf8 = false
10321
    ) {
10322 3
        if ($haystack === '' || $needle === '') {
10323 2
            return false;
10324
        }
10325
10326 3
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
10327 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
10328
        }
10329
10330 3
        if ($clean_utf8) {
10331
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
10332
            // if invalid characters are found in $haystack before $needle
10333 2
            $needle = self::clean($needle);
10334 2
            $haystack = self::clean($haystack);
10335
        }
10336
10337
        //
10338
        // fallback via mbstring
10339
        //
10340
10341 3
        if (self::$SUPPORT['mbstring'] === true) {
10342 3
            if ($encoding === 'UTF-8') {
10343 3
                return \mb_strrichr($haystack, $needle, $before_needle);
10344
            }
10345
10346 2
            return \mb_strrichr($haystack, $needle, $before_needle, $encoding);
10347
        }
10348
10349
        //
10350
        // fallback via vanilla php
10351
        //
10352
10353
        $needle_tmp = self::substr($needle, 0, 1, $encoding);
10354
        if ($needle_tmp === false) {
10355
            return false;
10356
        }
10357
        $needle = $needle_tmp;
10358
10359
        $pos = self::strripos($haystack, $needle, 0, $encoding);
10360
        if ($pos === false) {
10361
            return false;
10362
        }
10363
10364
        if ($before_needle) {
10365
            return self::substr($haystack, 0, $pos, $encoding);
10366
        }
10367
10368
        return self::substr($haystack, $pos, null, $encoding);
10369
    }
10370
10371
    /**
10372
     * Find the position of the last occurrence of a substring in a string, case-insensitive.
10373
     *
10374
     * EXAMPLE: <code>UTF8::strripos('ABC-ÖÄÜ-中文空白-中文空白', '中'); // 13</code>
10375
     *
10376
     * @param string     $haystack   <p>The string to look in.</p>
10377
     * @param int|string $needle     <p>The string to look for.</p>
10378
     * @param int        $offset     [optional] <p>Number of characters to ignore in the beginning or end.</p>
10379
     * @param string     $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
10380
     * @param bool       $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
10381
     *
10382
     * @psalm-pure
10383
     *
10384
     * @return false|int
10385
     *                   <p>The <strong>(int)</strong> numeric position of the last occurrence of needle in the haystack
10386
     *                   string.<br>If needle is not found, it returns false.</p>
10387
     */
10388 14
    public static function strripos(
10389
        string $haystack,
10390
        $needle,
10391
        int $offset = 0,
10392
        string $encoding = 'UTF-8',
10393
        bool $clean_utf8 = false
10394
    ) {
10395 14
        if ($haystack === '') {
10396 3
            if (\PHP_VERSION_ID >= 80000) {
10397
                if ($needle === '') {
10398
                    return 0;
10399
                }
10400
            } else {
10401 3
                return false;
10402
            }
10403
        }
10404
10405
        // iconv and mbstring do not support integer $needle
10406 14
        if ((int) $needle === $needle && $needle >= 0) {
10407
            $needle = (string) self::chr($needle);
10408
        }
10409 14
        $needle = (string) $needle;
10410
10411 14
        if ($haystack === '') {
10412
            if (\PHP_VERSION_ID >= 80000 && $needle === '') {
10413
                return 0;
10414
            }
10415
10416
            return false;
10417
        }
10418
10419 14
        if ($needle === '' && \PHP_VERSION_ID < 80000) {
10420 3
            return false;
10421
        }
10422
10423 14
        if ($clean_utf8) {
10424
            // mb_strripos() && iconv_strripos() is not tolerant to invalid characters
10425 3
            $needle = self::clean($needle);
10426 3
            $haystack = self::clean($haystack);
10427
        }
10428
10429 14
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
10430 9
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
10431
        }
10432
10433
        //
10434
        // fallback via mbstrig
10435
        //
10436
10437 14
        if (self::$SUPPORT['mbstring'] === true) {
10438 14
            if ($encoding === 'UTF-8') {
10439 14
                return \mb_strripos($haystack, $needle, $offset);
10440
            }
10441
10442
            return \mb_strripos($haystack, $needle, $offset, $encoding);
10443
        }
10444
10445
        //
10446
        // fallback for binary || ascii only
10447
        //
10448
10449
        if (
10450
            $encoding === 'CP850'
10451
            ||
10452
            $encoding === 'ASCII'
10453
        ) {
10454
            return \strripos($haystack, $needle, $offset);
10455
        }
10456
10457
        if (
10458
            $encoding !== 'UTF-8'
10459
            &&
10460
            self::$SUPPORT['mbstring'] === false
10461
        ) {
10462
            /**
10463
             * @psalm-suppress ImpureFunctionCall - is is only a warning
10464
             */
10465
            \trigger_error('UTF8::strripos() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
10466
        }
10467
10468
        //
10469
        // fallback via intl
10470
        //
10471
10472
        if (
10473
            $encoding === 'UTF-8' // INFO: "grapheme_strripos()" can't handle other encodings
10474
            &&
10475
            $offset >= 0 // grapheme_strripos() can't handle negative offset
10476
            &&
10477
            self::$SUPPORT['intl'] === true
10478
        ) {
10479
            $return_tmp = \grapheme_strripos($haystack, $needle, $offset);
10480
            if ($return_tmp !== false) {
10481
                return $return_tmp;
10482
            }
10483
        }
10484
10485
        //
10486
        // fallback for ascii only
10487
        //
10488
10489
        if (ASCII::is_ascii($haystack . $needle)) {
10490
            return \strripos($haystack, $needle, $offset);
10491
        }
10492
10493
        //
10494
        // fallback via vanilla php
10495
        //
10496
10497
        $haystack = self::strtocasefold($haystack, true, false, $encoding);
10498
        $needle = self::strtocasefold($needle, true, false, $encoding);
10499
10500
        return self::strrpos($haystack, $needle, $offset, $encoding, $clean_utf8);
10501
    }
10502
10503
    /**
10504
     * Finds position of last occurrence of a string within another, case-insensitive.
10505
     *
10506
     * @param string $haystack <p>
10507
     *                         The string from which to get the position of the last occurrence
10508
     *                         of needle.
10509
     *                         </p>
10510
     * @param string $needle   <p>
10511
     *                         The string to find in haystack.
10512
     *                         </p>
10513
     * @param int    $offset   [optional] <p>
10514
     *                         The position in haystack
10515
     *                         to start searching.
10516
     *                         </p>
10517
     *
10518
     * @psalm-pure
10519
     *
10520
     * @return false|int
10521
     *                   <p>eturn the numeric position of the last occurrence of needle in the
10522
     *                   haystack string, or false if needle is not found.</p>
10523
     */
10524 2
    public static function strripos_in_byte(string $haystack, string $needle, int $offset = 0)
10525
    {
10526 2
        if ($haystack === '' || $needle === '') {
10527
            return false;
10528
        }
10529
10530 2
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
10531
            // "mb_" is available if overload is used, so use it ...
10532
            return \mb_strripos($haystack, $needle, $offset, 'CP850'); // 8-BIT
10533
        }
10534
10535 2
        return \strripos($haystack, $needle, $offset);
10536
    }
10537
10538
    /**
10539
     * Find the position of the last occurrence of a substring in a string.
10540
     *
10541
     * EXAMPLE: <code>UTF8::strrpos('ABC-ÖÄÜ-中文空白-中文空白', '中'); // 13</code>
10542
     *
10543
     * @see http://php.net/manual/en/function.mb-strrpos.php
10544
     *
10545
     * @param string     $haystack   <p>The string being checked, for the last occurrence of needle</p>
10546
     * @param int|string $needle     <p>The string to find in haystack.<br>Or a code point as int.</p>
10547
     * @param int        $offset     [optional] <p>May be specified to begin searching an arbitrary number of characters
10548
     *                               into the string. Negative values will stop searching at an arbitrary point prior to
10549
     *                               the end of the string.
10550
     *                               </p>
10551
     * @param string     $encoding   [optional] <p>Set the charset.</p>
10552
     * @param bool       $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
10553
     *
10554
     * @psalm-pure
10555
     *
10556
     * @return false|int
10557
     *                   <p>The <strong>(int)</strong> numeric position of the last occurrence of needle in the haystack
10558
     *                   string.<br>If needle is not found, it returns false.</p>
10559
     */
10560 35
    public static function strrpos(
10561
        string $haystack,
10562
        $needle,
10563
        int $offset = 0,
10564
        string $encoding = 'UTF-8',
10565
        bool $clean_utf8 = false
10566
    ) {
10567 35
        if ($haystack === '') {
10568 4
            if (\PHP_VERSION_ID >= 80000) {
10569
                if ($needle === '') {
10570
                    return 0;
10571
                }
10572
            } else {
10573 4
                return false;
10574
            }
10575
        }
10576
10577
        // iconv and mbstring do not support integer $needle
10578 34
        if ((int) $needle === $needle && $needle >= 0) {
10579 1
            $needle = (string) self::chr($needle);
10580
        }
10581 34
        $needle = (string) $needle;
10582
10583 34
        if ($haystack === '') {
10584
            if (\PHP_VERSION_ID >= 80000 && $needle === '') {
10585
                return 0;
10586
            }
10587
10588
            return false;
10589
        }
10590
10591 34
        if ($needle === '' && \PHP_VERSION_ID < 80000) {
10592 2
            return false;
10593
        }
10594
10595 34
        if ($clean_utf8) {
10596
            // mb_strrpos && iconv_strrpos is not tolerant to invalid characters
10597 4
            $needle = self::clean($needle);
10598 4
            $haystack = self::clean($haystack);
10599
        }
10600
10601 34
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
10602 8
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
10603
        }
10604
10605
        //
10606
        // fallback via mbstring
10607
        //
10608
10609 34
        if (self::$SUPPORT['mbstring'] === true) {
10610 34
            if ($encoding === 'UTF-8') {
10611 34
                return \mb_strrpos($haystack, $needle, $offset);
10612
            }
10613
10614 2
            return \mb_strrpos($haystack, $needle, $offset, $encoding);
10615
        }
10616
10617
        //
10618
        // fallback for binary || ascii only
10619
        //
10620
10621
        if (
10622
            $encoding === 'CP850'
10623
            ||
10624
            $encoding === 'ASCII'
10625
        ) {
10626
            return \strrpos($haystack, $needle, $offset);
10627
        }
10628
10629
        if (
10630
            $encoding !== 'UTF-8'
10631
            &&
10632
            self::$SUPPORT['mbstring'] === false
10633
        ) {
10634
            /**
10635
             * @psalm-suppress ImpureFunctionCall - is is only a warning
10636
             */
10637
            \trigger_error('UTF8::strrpos() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
10638
        }
10639
10640
        //
10641
        // fallback via intl
10642
        //
10643
10644
        if (
10645
            $offset >= 0 // grapheme_strrpos() can't handle negative offset
10646
            &&
10647
            $encoding === 'UTF-8' // INFO: "grapheme_strrpos()" can't handle other encodings
10648
            &&
10649
            self::$SUPPORT['intl'] === true
10650
        ) {
10651
            $return_tmp = \grapheme_strrpos($haystack, $needle, $offset);
10652
            if ($return_tmp !== false) {
10653
                return $return_tmp;
10654
            }
10655
        }
10656
10657
        //
10658
        // fallback for ascii only
10659
        //
10660
10661
        if (ASCII::is_ascii($haystack . $needle)) {
10662
            return \strrpos($haystack, $needle, $offset);
10663
        }
10664
10665
        //
10666
        // fallback via vanilla php
10667
        //
10668
10669
        $haystack_tmp = null;
10670
        if ($offset > 0) {
10671
            $haystack_tmp = self::substr($haystack, $offset);
10672
        } elseif ($offset < 0) {
10673
            $haystack_tmp = self::substr($haystack, 0, $offset);
10674
            $offset = 0;
10675
        }
10676
10677
        if ($haystack_tmp !== null) {
10678
            if ($haystack_tmp === false) {
10679
                $haystack_tmp = '';
10680
            }
10681
            $haystack = (string) $haystack_tmp;
10682
        }
10683
10684
        $pos = \strrpos($haystack, $needle);
10685
        if ($pos === false) {
10686
            return false;
10687
        }
10688
10689
        /** @var false|string $str_tmp - needed for PhpStan (stubs error) */
10690
        $str_tmp = \substr($haystack, 0, $pos);
10691
        if ($str_tmp === false) {
10692
            return false;
10693
        }
10694
10695
        return $offset + (int) self::strlen($str_tmp);
10696
    }
10697
10698
    /**
10699
     * Find the position of the last occurrence of a substring in a string.
10700
     *
10701
     * @param string $haystack <p>
10702
     *                         The string being checked, for the last occurrence
10703
     *                         of needle.
10704
     *                         </p>
10705
     * @param string $needle   <p>
10706
     *                         The string to find in haystack.
10707
     *                         </p>
10708
     * @param int    $offset   [optional] <p>May be specified to begin searching an arbitrary number of characters into
10709
     *                         the string. Negative values will stop searching at an arbitrary point
10710
     *                         prior to the end of the string.
10711
     *                         </p>
10712
     *
10713
     * @psalm-pure
10714
     *
10715
     * @return false|int
10716
     *                   <p>The numeric position of the last occurrence of needle in the
10717
     *                   haystack string. If needle is not found, it returns false.</p>
10718
     */
10719 2
    public static function strrpos_in_byte(string $haystack, string $needle, int $offset = 0)
10720
    {
10721 2
        if ($haystack === '' || $needle === '') {
10722
            return false;
10723
        }
10724
10725 2
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
10726
            // "mb_" is available if overload is used, so use it ...
10727
            return \mb_strrpos($haystack, $needle, $offset, 'CP850'); // 8-BIT
10728
        }
10729
10730 2
        return \strrpos($haystack, $needle, $offset);
10731
    }
10732
10733
    /**
10734
     * Finds the length of the initial segment of a string consisting entirely of characters contained within a given
10735
     * mask.
10736
     *
10737
     * EXAMPLE: <code>UTF8::strspn('iñtërnâtiônàlizætiøn', 'itñ'); // '3'</code>
10738
     *
10739
     * @param string   $str      <p>The input string.</p>
10740
     * @param string   $mask     <p>The mask of chars</p>
10741
     * @param int      $offset   [optional]
10742
     * @param int|null $length   [optional]
10743
     * @param string   $encoding [optional] <p>Set the charset.</p>
10744
     *
10745
     * @psalm-pure
10746
     *
10747
     * @return false|int
10748
     */
10749 10
    public static function strspn(
10750
        string $str,
10751
        string $mask,
10752
        int $offset = 0,
10753
        int $length = null,
10754
        string $encoding = 'UTF-8'
10755
    ) {
10756 10
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
10757
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
10758
        }
10759
10760 10
        if ($offset || $length !== null) {
10761 2
            if ($encoding === 'UTF-8') {
10762 2
                if ($length === null) {
10763
                    $str = (string) \mb_substr($str, $offset);
10764
                } else {
10765 2
                    $str = (string) \mb_substr($str, $offset, $length);
10766
                }
10767
            } else {
10768
                $str = (string) self::substr($str, $offset, $length, $encoding);
10769
            }
10770
        }
10771
10772 10
        if ($str === '' || $mask === '') {
10773 2
            return 0;
10774
        }
10775
10776 8
        $matches = [];
10777
10778 8
        return \preg_match('/^' . self::rxClass($mask) . '+/u', $str, $matches) ? (int) self::strlen($matches[0], $encoding) : 0;
10779
    }
10780
10781
    /**
10782
     * Returns part of haystack string from the first occurrence of needle to the end of haystack.
10783
     *
10784
     * EXAMPLE: <code>
10785
     * $str = 'iñtërnâtiônàlizætiøn';
10786
     * $search = 'nât';
10787
     *
10788
     * UTF8::strstr($str, $search)); // 'nâtiônàlizætiøn'
10789
     * UTF8::strstr($str, $search, true)); // 'iñtër'
10790
     * </code>
10791
     *
10792
     * @param string $haystack      <p>The input string. Must be valid UTF-8.</p>
10793
     * @param string $needle        <p>The string to look for. Must be valid UTF-8.</p>
10794
     * @param bool   $before_needle [optional] <p>
10795
     *                              If <b>TRUE</b>, strstr() returns the part of the
10796
     *                              haystack before the first occurrence of the needle (excluding the needle).
10797
     *                              </p>
10798
     * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
10799
     * @param bool   $clean_utf8    [optional] <p>Remove non UTF-8 chars from the string.</p>
10800
     *
10801
     * @psalm-pure
10802
     *
10803
     * @return false|string
10804
     *                      <p>A sub-string,<br>or <strong>false</strong> if needle is not found.</p>
10805
     */
10806 3
    public static function strstr(
10807
        string $haystack,
10808
        string $needle,
10809
        bool $before_needle = false,
10810
        string $encoding = 'UTF-8',
10811
        bool $clean_utf8 = false
10812
    ) {
10813 3
        if ($haystack === '') {
10814 2
            if (\PHP_VERSION_ID >= 80000 && $needle === '') {
10815
                return '';
10816
            }
10817
10818 2
            return false;
10819
        }
10820
10821 3
        if ($clean_utf8) {
10822
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
10823
            // if invalid characters are found in $haystack before $needle
10824
            $needle = self::clean($needle);
10825
            $haystack = self::clean($haystack);
10826
        }
10827
10828 3
        if ($needle === '') {
10829 1
            if (\PHP_VERSION_ID >= 80000) {
10830
                return $haystack;
10831
            }
10832
10833 1
            return false;
10834
        }
10835
10836 3
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
10837 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
10838
        }
10839
10840
        //
10841
        // fallback via mbstring
10842
        //
10843
10844 3
        if (self::$SUPPORT['mbstring'] === true) {
10845 3
            if ($encoding === 'UTF-8') {
10846 3
                return \mb_strstr($haystack, $needle, $before_needle);
10847
            }
10848
10849 2
            return \mb_strstr($haystack, $needle, $before_needle, $encoding);
10850
        }
10851
10852
        //
10853
        // fallback for binary || ascii only
10854
        //
10855
10856
        if (
10857
            $encoding === 'CP850'
10858
            ||
10859
            $encoding === 'ASCII'
10860
        ) {
10861
            return \strstr($haystack, $needle, $before_needle);
10862
        }
10863
10864
        if (
10865
            $encoding !== 'UTF-8'
10866
            &&
10867
            self::$SUPPORT['mbstring'] === false
10868
        ) {
10869
            /**
10870
             * @psalm-suppress ImpureFunctionCall - is is only a warning
10871
             */
10872
            \trigger_error('UTF8::strstr() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
10873
        }
10874
10875
        //
10876
        // fallback via intl
10877
        //
10878
10879
        if (
10880
            $encoding === 'UTF-8' // INFO: "grapheme_strstr()" can't handle other encodings
10881
            &&
10882
            self::$SUPPORT['intl'] === true
10883
        ) {
10884
            $return_tmp = \grapheme_strstr($haystack, $needle, $before_needle);
10885
            if ($return_tmp !== false) {
10886
                return $return_tmp;
10887
            }
10888
        }
10889
10890
        //
10891
        // fallback for ascii only
10892
        //
10893
10894
        if (ASCII::is_ascii($haystack . $needle)) {
10895
            return \strstr($haystack, $needle, $before_needle);
10896
        }
10897
10898
        //
10899
        // fallback via vanilla php
10900
        //
10901
10902
        \preg_match('/^(.*?)' . \preg_quote($needle, '/') . '/us', $haystack, $match);
10903
10904
        if (!isset($match[1])) {
10905
            return false;
10906
        }
10907
10908
        if ($before_needle) {
10909
            return $match[1];
10910
        }
10911
10912
        return self::substr($haystack, (int) self::strlen($match[1]));
10913
    }
10914
10915
    /**
10916
     * Finds first occurrence of a string within another.
10917
     *
10918
     * @param string $haystack      <p>
10919
     *                              The string from which to get the first occurrence
10920
     *                              of needle.
10921
     *                              </p>
10922
     * @param string $needle        <p>
10923
     *                              The string to find in haystack.
10924
     *                              </p>
10925
     * @param bool   $before_needle [optional] <p>
10926
     *                              Determines which portion of haystack
10927
     *                              this function returns.
10928
     *                              If set to true, it returns all of haystack
10929
     *                              from the beginning to the first occurrence of needle.
10930
     *                              If set to false, it returns all of haystack
10931
     *                              from the first occurrence of needle to the end,
10932
     *                              </p>
10933
     *
10934
     * @psalm-pure
10935
     *
10936
     * @return false|string
10937
     *                      <p>The portion of haystack,
10938
     *                      or false if needle is not found.</p>
10939
     */
10940 2
    public static function strstr_in_byte(
10941
        string $haystack,
10942
        string $needle,
10943
        bool $before_needle = false
10944
    ) {
10945 2
        if ($haystack === '' || $needle === '') {
10946
            return false;
10947
        }
10948
10949 2
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
10950
            // "mb_" is available if overload is used, so use it ...
10951
            return \mb_strstr($haystack, $needle, $before_needle, 'CP850'); // 8-BIT
10952
        }
10953
10954 2
        return \strstr($haystack, $needle, $before_needle);
10955
    }
10956
10957
    /**
10958
     * Unicode transformation for case-less matching.
10959
     *
10960
     * EXAMPLE: <code>UTF8::strtocasefold('ǰ◌̱'); // 'ǰ◌̱'</code>
10961
     *
10962
     * @see http://unicode.org/reports/tr21/tr21-5.html
10963
     *
10964
     * @param string      $str        <p>The input string.</p>
10965
     * @param bool        $full       [optional] <p>
10966
     *                                <b>true</b>, replace full case folding chars (default)<br>
10967
     *                                <b>false</b>, use only limited static array [UTF8::$COMMON_CASE_FOLD]
10968
     *                                </p>
10969
     * @param bool        $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
10970
     * @param string      $encoding   [optional] <p>Set the charset.</p>
10971
     * @param string|null $lang       [optional] <p>Set the language for special cases: az, el, lt, tr</p>
10972
     * @param bool        $lower      [optional] <p>Use lowercase string, otherwise use uppercase string. PS: uppercase
10973
     *                                is for some languages better ...</p>
10974
     *
10975
     * @psalm-pure
10976
     *
10977
     * @return string
10978
     */
10979 32
    public static function strtocasefold(
10980
        string $str,
10981
        bool $full = true,
10982
        bool $clean_utf8 = false,
10983
        string $encoding = 'UTF-8',
10984
        string $lang = null,
10985
        bool $lower = true
10986
    ): string {
10987 32
        if ($str === '') {
10988 5
            return '';
10989
        }
10990
10991 31
        if ($clean_utf8) {
10992
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
10993
            // if invalid characters are found in $haystack before $needle
10994 2
            $str = self::clean($str);
10995
        }
10996
10997 31
        $str = self::fixStrCaseHelper($str, $lower, $full);
10998
10999 31
        if ($lang === null && $encoding === 'UTF-8') {
11000 31
            if ($lower) {
11001 2
                return \mb_strtolower($str);
11002
            }
11003
11004 29
            return \mb_strtoupper($str);
11005
        }
11006
11007 2
        if ($lower) {
11008
            return self::strtolower($str, $encoding, false, $lang);
11009
        }
11010
11011 2
        return self::strtoupper($str, $encoding, false, $lang);
11012
    }
11013
11014
    /**
11015
     * Make a string lowercase.
11016
     *
11017
     * EXAMPLE: <code>UTF8::strtolower('DÉJÀ Σσς Iıİi'); // 'déjà σσς iıii'</code>
11018
     *
11019
     * @see http://php.net/manual/en/function.mb-strtolower.php
11020
     *
11021
     * @param string      $str                           <p>The string being lowercased.</p>
11022
     * @param string      $encoding                      [optional] <p>Set the charset for e.g. "mb_" function</p>
11023
     * @param bool        $clean_utf8                    [optional] <p>Remove non UTF-8 chars from the string.</p>
11024
     * @param string|null $lang                          [optional] <p>Set the language for special cases: az, el, lt,
11025
     *                                                   tr</p>
11026
     * @param bool        $try_to_keep_the_string_length [optional] <p>true === try to keep the string length: e.g. ẞ
11027
     *                                                   -> ß</p>
11028
     *
11029
     * @psalm-pure
11030
     *
11031
     * @return string
11032
     *                <p>String with all alphabetic characters converted to lowercase.</p>
11033
     */
11034 73
    public static function strtolower(
11035
        $str,
11036
        string $encoding = 'UTF-8',
11037
        bool $clean_utf8 = false,
11038
        string $lang = null,
11039
        bool $try_to_keep_the_string_length = false
11040
    ): string {
11041
        // init
11042 73
        $str = (string) $str;
11043
11044 73
        if ($str === '') {
11045 1
            return '';
11046
        }
11047
11048 72
        if ($clean_utf8) {
11049
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
11050
            // if invalid characters are found in $haystack before $needle
11051 2
            $str = self::clean($str);
11052
        }
11053
11054
        // hack for old php version or for the polyfill ...
11055 72
        if ($try_to_keep_the_string_length) {
11056
            $str = self::fixStrCaseHelper($str, true);
11057
        }
11058
11059 72
        if ($lang === null && $encoding === 'UTF-8') {
11060 13
            return \mb_strtolower($str);
11061
        }
11062
11063 61
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
11064
11065 61
        if ($lang !== null) {
11066 2
            if (self::$SUPPORT['intl'] === true) {
11067 2
                if (self::$INTL_TRANSLITERATOR_LIST === null) {
11068
                    self::$INTL_TRANSLITERATOR_LIST = self::getData('transliterator_list');
11069
                }
11070
11071 2
                $language_code = $lang . '-Lower';
11072 2
                if (!\in_array($language_code, self::$INTL_TRANSLITERATOR_LIST, true)) {
11073
                    /**
11074
                     * @psalm-suppress ImpureFunctionCall - is is only a warning
11075
                     */
11076
                    \trigger_error('UTF8::strtolower() cannot handle special language: ' . $lang . ' | supported: ' . \print_r(self::$INTL_TRANSLITERATOR_LIST, true), \E_USER_WARNING);
0 ignored issues
show
Bug introduced by
Are you sure print_r(self::INTL_TRANSLITERATOR_LIST, true) of type string|true can be used in concatenation? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

11076
                    \trigger_error('UTF8::strtolower() cannot handle special language: ' . $lang . ' | supported: ' . /** @scrutinizer ignore-type */ \print_r(self::$INTL_TRANSLITERATOR_LIST, true), \E_USER_WARNING);
Loading history...
11077
11078
                    $language_code = 'Any-Lower';
11079
                }
11080
11081 2
                return (string) \transliterator_transliterate($language_code, $str);
11082
            }
11083
11084
            /**
11085
             * @psalm-suppress ImpureFunctionCall - is is only a warning
11086
             */
11087
            \trigger_error('UTF8::strtolower() without intl cannot handle the "lang" parameter: ' . $lang, \E_USER_WARNING);
11088
        }
11089
11090
        // always fallback via symfony polyfill
11091 61
        return \mb_strtolower($str, $encoding);
11092
    }
11093
11094
    /**
11095
     * Make a string uppercase.
11096
     *
11097
     * EXAMPLE: <code>UTF8::strtoupper('Déjà Σσς Iıİi'); // 'DÉJÀ ΣΣΣ IIİI'</code>
11098
     *
11099
     * @see http://php.net/manual/en/function.mb-strtoupper.php
11100
     *
11101
     * @param string      $str                           <p>The string being uppercased.</p>
11102
     * @param string      $encoding                      [optional] <p>Set the charset.</p>
11103
     * @param bool        $clean_utf8                    [optional] <p>Remove non UTF-8 chars from the string.</p>
11104
     * @param string|null $lang                          [optional] <p>Set the language for special cases: az, el, lt,
11105
     *                                                   tr</p>
11106
     * @param bool        $try_to_keep_the_string_length [optional] <p>true === try to keep the string length: e.g. ẞ
11107
     *                                                   -> ß</p>
11108
     *
11109
     * @psalm-pure
11110
     *
11111
     * @return string
11112
     *                <p>String with all alphabetic characters converted to uppercase.</p>
11113
     */
11114 17
    public static function strtoupper(
11115
        $str,
11116
        string $encoding = 'UTF-8',
11117
        bool $clean_utf8 = false,
11118
        string $lang = null,
11119
        bool $try_to_keep_the_string_length = false
11120
    ): string {
11121
        // init
11122 17
        $str = (string) $str;
11123
11124 17
        if ($str === '') {
11125 1
            return '';
11126
        }
11127
11128 16
        if ($clean_utf8) {
11129
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
11130
            // if invalid characters are found in $haystack before $needle
11131 2
            $str = self::clean($str);
11132
        }
11133
11134
        // hack for old php version or for the polyfill ...
11135 16
        if ($try_to_keep_the_string_length) {
11136 2
            $str = self::fixStrCaseHelper($str);
11137
        }
11138
11139 16
        if ($lang === null && $encoding === 'UTF-8') {
11140 8
            return \mb_strtoupper($str);
11141
        }
11142
11143 10
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
11144
11145 10
        if ($lang !== null) {
11146 2
            if (self::$SUPPORT['intl'] === true) {
11147 2
                if (self::$INTL_TRANSLITERATOR_LIST === null) {
11148
                    self::$INTL_TRANSLITERATOR_LIST = self::getData('transliterator_list');
11149
                }
11150
11151 2
                $language_code = $lang . '-Upper';
11152 2
                if (!\in_array($language_code, self::$INTL_TRANSLITERATOR_LIST, true)) {
11153
                    /**
11154
                     * @psalm-suppress ImpureFunctionCall - is is only a warning
11155
                     */
11156
                    \trigger_error('UTF8::strtoupper() without intl for special language: ' . $lang, \E_USER_WARNING);
11157
11158
                    $language_code = 'Any-Upper';
11159
                }
11160
11161 2
                return (string) \transliterator_transliterate($language_code, $str);
11162
            }
11163
11164
            /**
11165
             * @psalm-suppress ImpureFunctionCall - is is only a warning
11166
             */
11167
            \trigger_error('UTF8::strtolower() without intl cannot handle the "lang"-parameter: ' . $lang, \E_USER_WARNING);
11168
        }
11169
11170
        // always fallback via symfony polyfill
11171 10
        return \mb_strtoupper($str, $encoding);
11172
    }
11173
11174
    /**
11175
     * Translate characters or replace sub-strings.
11176
     *
11177
     * EXAMPLE:
11178
     * <code>
11179
     * $array = [
11180
     *     'Hello'   => '○●◎',
11181
     *     '中文空白' => 'earth',
11182
     * ];
11183
     * UTF8::strtr('Hello 中文空白', $array); // '○●◎ earth'
11184
     * </code>
11185
     *
11186
     * @see http://php.net/manual/en/function.strtr.php
11187
     *
11188
     * @param string          $str  <p>The string being translated.</p>
11189
     * @param string|string[] $from <p>The string replacing from.</p>
11190
     * @param string|string[] $to   [optional] <p>The string being translated to to.</p>
11191
     *
11192
     * @psalm-pure
11193
     *
11194
     * @return string
11195
     *                <p>This function returns a copy of str, translating all occurrences of each character in "from"
11196
     *                to the corresponding character in "to".</p>
11197
     */
11198 2
    public static function strtr(string $str, $from, $to = ''): string
11199
    {
11200 2
        if ($str === '') {
11201
            return '';
11202
        }
11203
11204 2
        if ($from === $to) {
11205
            return $str;
11206
        }
11207
11208 2
        if ($to !== '') {
11209 2
            if (!\is_array($from)) {
11210 2
                $from = self::str_split($from);
11211
            }
11212
11213 2
            if (!\is_array($to)) {
11214 2
                $to = self::str_split($to);
11215
            }
11216
11217 2
            $count_from = \count($from);
11218 2
            $count_to = \count($to);
11219
11220 2
            if ($count_from !== $count_to) {
11221 2
                if ($count_from > $count_to) {
11222 2
                    $from = \array_slice($from, 0, $count_to);
11223 2
                } elseif ($count_from < $count_to) {
11224 2
                    $to = \array_slice($to, 0, $count_from);
11225
                }
11226
            }
11227
11228 2
            $from = \array_combine($from, $to);
11229 2
            if ($from === false) {
11230
                throw new \InvalidArgumentException('The number of elements for each array isn\'t equal or the arrays are empty: (from: ' . \print_r($from, true) . ' | to: ' . \print_r($to, true) . ')');
0 ignored issues
show
Bug introduced by
Are you sure print_r($to, true) of type string|true can be used in concatenation? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

11230
                throw new \InvalidArgumentException('The number of elements for each array isn\'t equal or the arrays are empty: (from: ' . \print_r($from, true) . ' | to: ' . /** @scrutinizer ignore-type */ \print_r($to, true) . ')');
Loading history...
Bug introduced by
Are you sure print_r($from, true) of type string|true can be used in concatenation? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

11230
                throw new \InvalidArgumentException('The number of elements for each array isn\'t equal or the arrays are empty: (from: ' . /** @scrutinizer ignore-type */ \print_r($from, true) . ' | to: ' . \print_r($to, true) . ')');
Loading history...
11231
            }
11232
        }
11233
11234 2
        if (\is_string($from)) {
11235 2
            return \str_replace($from, $to, $str);
0 ignored issues
show
Bug introduced by
It seems like $to can also be of type array<mixed,string[]>; however, parameter $replace of str_replace() does only seem to accept string|string[], maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

11235
            return \str_replace($from, /** @scrutinizer ignore-type */ $to, $str);
Loading history...
11236
        }
11237
11238 2
        return \strtr($str, $from);
11239
    }
11240
11241
    /**
11242
     * Return the width of a string.
11243
     *
11244
     * INFO: use UTF8::strlen() for the byte-length
11245
     *
11246
     * EXAMPLE: <code>UTF8::strwidth("Iñtërnâtiôn\xE9àlizætiøn")); // 21</code>
11247
     *
11248
     * @param string $str        <p>The input string.</p>
11249
     * @param string $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
11250
     * @param bool   $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
11251
     *
11252
     * @psalm-pure
11253
     *
11254
     * @return int
11255
     */
11256 2
    public static function strwidth(
11257
        string $str,
11258
        string $encoding = 'UTF-8',
11259
        bool $clean_utf8 = false
11260
    ): int {
11261 2
        if ($str === '') {
11262 2
            return 0;
11263
        }
11264
11265 2
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
11266 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
11267
        }
11268
11269 2
        if ($clean_utf8) {
11270
            // iconv and mbstring are not tolerant to invalid encoding
11271
            // further, their behaviour is inconsistent with that of PHP's substr
11272 2
            $str = self::clean($str);
11273
        }
11274
11275
        //
11276
        // fallback via mbstring
11277
        //
11278
11279 2
        if (self::$SUPPORT['mbstring'] === true) {
11280 2
            if ($encoding === 'UTF-8') {
11281 2
                return \mb_strwidth($str);
11282
            }
11283
11284
            return \mb_strwidth($str, $encoding);
11285
        }
11286
11287
        //
11288
        // fallback via vanilla php
11289
        //
11290
11291
        if ($encoding !== 'UTF-8') {
11292
            $str = self::encode('UTF-8', $str, false, $encoding);
11293
        }
11294
11295
        $wide = 0;
11296
        $str = (string) \preg_replace('/[\x{1100}-\x{115F}\x{2329}\x{232A}\x{2E80}-\x{303E}\x{3040}-\x{A4CF}\x{AC00}-\x{D7A3}\x{F900}-\x{FAFF}\x{FE10}-\x{FE19}\x{FE30}-\x{FE6F}\x{FF00}-\x{FF60}\x{FFE0}-\x{FFE6}\x{20000}-\x{2FFFD}\x{30000}-\x{3FFFD}]/u', '', $str, -1, $wide);
11297
11298
        return ($wide << 1) + (int) self::strlen($str);
11299
    }
11300
11301
    /**
11302
     * Get part of a string.
11303
     *
11304
     * EXAMPLE: <code>UTF8::substr('中文空白', 1, 2); // '文空'</code>
11305
     *
11306
     * @see http://php.net/manual/en/function.mb-substr.php
11307
     *
11308
     * @param string   $str        <p>The string being checked.</p>
11309
     * @param int      $offset     <p>The first position used in str.</p>
11310
     * @param int|null $length     [optional] <p>The maximum length of the returned string.</p>
11311
     * @param string   $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
11312
     * @param bool     $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
11313
     *
11314
     * @psalm-pure
11315
     *
11316
     * @return false|string
11317
     *                      The portion of <i>str</i> specified by the <i>offset</i> and
11318
     *                      <i>length</i> parameters.</p><p>If <i>str</i> is shorter than <i>offset</i>
11319
     *                      characters long, <b>FALSE</b> will be returned.
11320
     */
11321 172
    public static function substr(
11322
        string $str,
11323
        int $offset = 0,
11324
        int $length = null,
11325
        string $encoding = 'UTF-8',
11326
        bool $clean_utf8 = false
11327
    ) {
11328
        // empty string
11329 172
        if ($str === '' || $length === 0) {
11330 8
            return '';
11331
        }
11332
11333 168
        if ($clean_utf8) {
11334
            // iconv and mbstring are not tolerant to invalid encoding
11335
            // further, their behaviour is inconsistent with that of PHP's substr
11336 2
            $str = self::clean($str);
11337
        }
11338
11339
        // whole string
11340 168
        if (!$offset && $length === null) {
11341 7
            return $str;
11342
        }
11343
11344 163
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
11345 19
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
11346
        }
11347
11348
        //
11349
        // fallback via mbstring
11350
        //
11351
11352 163
        if (self::$SUPPORT['mbstring'] === true && $encoding === 'UTF-8') {
11353 161
            if ($length === null) {
11354 64
                return \mb_substr($str, $offset);
11355
            }
11356
11357 102
            return \mb_substr($str, $offset, $length);
11358
        }
11359
11360
        //
11361
        // fallback for binary || ascii only
11362
        //
11363
11364
        if (
11365 4
            $encoding === 'CP850'
11366
            ||
11367 4
            $encoding === 'ASCII'
11368
        ) {
11369
            if ($length === null) {
11370
                return \substr($str, $offset);
11371
            }
11372
11373
            return \substr($str, $offset, $length);
11374
        }
11375
11376
        // otherwise we need the string-length
11377 4
        $str_length = 0;
11378 4
        if ($offset || $length === null) {
11379 4
            $str_length = self::strlen($str, $encoding);
11380
        }
11381
11382
        // e.g.: invalid chars + mbstring not installed
11383 4
        if ($str_length === false) {
11384
            return false;
11385
        }
11386
11387
        // empty string
11388 4
        if ($offset === $str_length && !$length) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $length of type integer|null is loosely compared to false; this is ambiguous if the integer can be 0. You might want to explicitly use === null instead.

In PHP, under loose comparison (like ==, or !=, or switch conditions), values of different types might be equal.

For integer values, zero is a special case, in particular the following results might be unexpected:

0   == false // true
0   == null  // true
123 == false // false
123 == null  // false

// It is often better to use strict comparison
0 === false // false
0 === null  // false
Loading history...
11389
            return '';
11390
        }
11391
11392
        // impossible
11393 4
        if ($offset && $offset > $str_length) {
11394
            return '';
11395
        }
11396
11397 4
        $length = $length ?? $str_length;
11398
11399
        if (
11400 4
            $encoding !== 'UTF-8'
11401
            &&
11402 4
            self::$SUPPORT['mbstring'] === false
11403
        ) {
11404
            /**
11405
             * @psalm-suppress ImpureFunctionCall - is is only a warning
11406
             */
11407 2
            \trigger_error('UTF8::substr() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
11408
        }
11409
11410
        //
11411
        // fallback via intl
11412
        //
11413
11414
        if (
11415 4
            $encoding === 'UTF-8' // INFO: "grapheme_substr()" can't handle other encodings
11416
            &&
11417 4
            $offset >= 0 // grapheme_substr() can't handle negative offset
11418
            &&
11419 4
            self::$SUPPORT['intl'] === true
11420
        ) {
11421
            $return_tmp = \grapheme_substr($str, $offset, $length);
11422
            if ($return_tmp !== false) {
11423
                return $return_tmp;
11424
            }
11425
        }
11426
11427
        //
11428
        // fallback via iconv
11429
        //
11430
11431
        if (
11432 4
            $length >= 0 // "iconv_substr()" can't handle negative length
11433
            &&
11434 4
            self::$SUPPORT['iconv'] === true
11435
        ) {
11436
            $return_tmp = \iconv_substr($str, $offset, $length);
11437
            if ($return_tmp !== false) {
11438
                return $return_tmp;
11439
            }
11440
        }
11441
11442
        //
11443
        // fallback for ascii only
11444
        //
11445
11446 4
        if (ASCII::is_ascii($str)) {
11447
            return \substr($str, $offset, $length);
11448
        }
11449
11450
        //
11451
        // fallback via vanilla php
11452
        //
11453
11454
        // split to array, and remove invalid characters
11455
        // &&
11456
        // extract relevant part, and join to make sting again
11457 4
        return \implode('', \array_slice(self::str_split($str), $offset, $length));
11458
    }
11459
11460
    /**
11461
     * Binary-safe comparison of two strings from an offset, up to a length of characters.
11462
     *
11463
     * EXAMPLE: <code>
11464
     * UTF8::substr_compare("○●◎\r", '●◎', 0, 2); // -1
11465
     * UTF8::substr_compare("○●◎\r", '◎●', 1, 2); // 1
11466
     * UTF8::substr_compare("○●◎\r", '●◎', 1, 2); // 0
11467
     * </code>
11468
     *
11469
     * @param string   $str1               <p>The main string being compared.</p>
11470
     * @param string   $str2               <p>The secondary string being compared.</p>
11471
     * @param int      $offset             [optional] <p>The start position for the comparison. If negative, it starts
11472
     *                                     counting from the end of the string.</p>
11473
     * @param int|null $length             [optional] <p>The length of the comparison. The default value is the largest
11474
     *                                     of the length of the str compared to the length of main_str less the
11475
     *                                     offset.</p>
11476
     * @param bool     $case_insensitivity [optional] <p>If case_insensitivity is TRUE, comparison is case
11477
     *                                     insensitive.</p>
11478
     * @param string   $encoding           [optional] <p>Set the charset for e.g. "mb_" function</p>
11479
     *
11480
     * @psalm-pure
11481
     *
11482
     * @return int
11483
     *             <strong>&lt; 0</strong> if str1 is less than str2;<br>
11484
     *             <strong>&gt; 0</strong> if str1 is greater than str2,<br>
11485
     *             <strong>0</strong> if they are equal
11486
     */
11487 2
    public static function substr_compare(
11488
        string $str1,
11489
        string $str2,
11490
        int $offset = 0,
11491
        int $length = null,
11492
        bool $case_insensitivity = false,
11493
        string $encoding = 'UTF-8'
11494
    ): int {
11495
        if (
11496 2
            $offset !== 0
11497
            ||
11498 2
            $length !== null
11499
        ) {
11500 2
            if ($encoding === 'UTF-8') {
11501 2
                if ($length === null) {
11502 2
                    $str1 = (string) \mb_substr($str1, $offset);
11503
                } else {
11504 2
                    $str1 = (string) \mb_substr($str1, $offset, $length);
11505
                }
11506 2
                $str2 = (string) \mb_substr($str2, 0, (int) self::strlen($str1));
11507
            } else {
11508
                $encoding = self::normalize_encoding($encoding, 'UTF-8');
11509
11510
                $str1 = (string) self::substr($str1, $offset, $length, $encoding);
11511
                $str2 = (string) self::substr($str2, 0, (int) self::strlen($str1), $encoding);
11512
            }
11513
        }
11514
11515 2
        if ($case_insensitivity) {
11516 2
            return self::strcasecmp($str1, $str2, $encoding);
11517
        }
11518
11519 2
        return self::strcmp($str1, $str2);
11520
    }
11521
11522
    /**
11523
     * Count the number of substring occurrences.
11524
     *
11525
     * EXAMPLE: <code>UTF8::substr_count('中文空白', '文空', 1, 2); // 1</code>
11526
     *
11527
     * @see http://php.net/manual/en/function.substr-count.php
11528
     *
11529
     * @param string   $haystack   <p>The string to search in.</p>
11530
     * @param string   $needle     <p>The substring to search for.</p>
11531
     * @param int      $offset     [optional] <p>The offset where to start counting.</p>
11532
     * @param int|null $length     [optional] <p>
11533
     *                             The maximum length after the specified offset to search for the
11534
     *                             substring. It outputs a warning if the offset plus the length is
11535
     *                             greater than the haystack length.
11536
     *                             </p>
11537
     * @param string   $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
11538
     * @param bool     $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
11539
     *
11540
     * @psalm-pure
11541
     *
11542
     * @return false|int
11543
     *                   <p>This functions returns an integer or false if there isn't a string.</p>
11544
     */
11545 5
    public static function substr_count(
11546
        string $haystack,
11547
        string $needle,
11548
        int $offset = 0,
11549
        int $length = null,
11550
        string $encoding = 'UTF-8',
11551
        bool $clean_utf8 = false
11552
    ) {
11553 5
        if ($needle === '') {
11554 2
            return false;
11555
        }
11556
11557 5
        if ($haystack === '') {
11558 2
            if (\PHP_VERSION_ID >= 80000) {
11559
                return 0;
11560
            }
11561
11562 2
            return 0;
11563
        }
11564
11565 5
        if ($length === 0) {
11566 2
            return 0;
11567
        }
11568
11569 5
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
11570 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
11571
        }
11572
11573 5
        if ($clean_utf8) {
11574
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
11575
            // if invalid characters are found in $haystack before $needle
11576
            $needle = self::clean($needle);
11577
            $haystack = self::clean($haystack);
11578
        }
11579
11580 5
        if ($offset || $length > 0) {
11581 2
            if ($length === null) {
11582 2
                $length_tmp = self::strlen($haystack, $encoding);
11583 2
                if ($length_tmp === false) {
11584
                    return false;
11585
                }
11586 2
                $length = $length_tmp;
11587
            }
11588
11589 2
            if ($encoding === 'UTF-8') {
11590 2
                $haystack = (string) \mb_substr($haystack, $offset, $length);
11591
            } else {
11592 2
                $haystack = (string) \mb_substr($haystack, $offset, $length, $encoding);
11593
            }
11594
        }
11595
11596
        if (
11597 5
            $encoding !== 'UTF-8'
11598
            &&
11599 5
            self::$SUPPORT['mbstring'] === false
11600
        ) {
11601
            /**
11602
             * @psalm-suppress ImpureFunctionCall - is is only a warning
11603
             */
11604
            \trigger_error('UTF8::substr_count() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
11605
        }
11606
11607 5
        if (self::$SUPPORT['mbstring'] === true) {
11608 5
            if ($encoding === 'UTF-8') {
11609 5
                return \mb_substr_count($haystack, $needle);
11610
            }
11611
11612 2
            return \mb_substr_count($haystack, $needle, $encoding);
11613
        }
11614
11615
        \preg_match_all('/' . \preg_quote($needle, '/') . '/us', $haystack, $matches, \PREG_SET_ORDER);
11616
11617
        return \count($matches);
11618
    }
11619
11620
    /**
11621
     * Count the number of substring occurrences.
11622
     *
11623
     * @param string   $haystack <p>
11624
     *                           The string being checked.
11625
     *                           </p>
11626
     * @param string   $needle   <p>
11627
     *                           The string being found.
11628
     *                           </p>
11629
     * @param int      $offset   [optional] <p>
11630
     *                           The offset where to start counting
11631
     *                           </p>
11632
     * @param int|null $length   [optional] <p>
11633
     *                           The maximum length after the specified offset to search for the
11634
     *                           substring. It outputs a warning if the offset plus the length is
11635
     *                           greater than the haystack length.
11636
     *                           </p>
11637
     *
11638
     * @psalm-pure
11639
     *
11640
     * @return false|int
11641
     *                   <p>The number of times the
11642
     *                   needle substring occurs in the
11643
     *                   haystack string.</p>
11644
     */
11645 4
    public static function substr_count_in_byte(
11646
        string $haystack,
11647
        string $needle,
11648
        int $offset = 0,
11649
        int $length = null
11650
    ) {
11651 4
        if ($haystack === '' || $needle === '') {
11652 1
            return 0;
11653
        }
11654
11655
        if (
11656 3
            ($offset || $length !== null)
11657
            &&
11658 3
            self::$SUPPORT['mbstring_func_overload'] === true
11659
        ) {
11660
            if ($length === null) {
11661
                $length_tmp = self::strlen($haystack);
11662
                if ($length_tmp === false) {
11663
                    return false;
11664
                }
11665
                $length = $length_tmp;
11666
            }
11667
11668
            if (
11669
                (
11670
                    $length !== 0
11671
                    &&
11672
                    $offset !== 0
11673
                )
11674
                &&
11675
                ($length + $offset) <= 0
11676
                &&
11677
                \PHP_VERSION_ID < 71000 // output from "substr_count()" have changed in PHP 7.1
11678
            ) {
11679
                return false;
11680
            }
11681
11682
            /** @var false|string $haystack_tmp - needed for PhpStan (stubs error) */
11683
            $haystack_tmp = \substr($haystack, $offset, $length);
11684
            if ($haystack_tmp === false) {
11685
                $haystack_tmp = '';
11686
            }
11687
            $haystack = (string) $haystack_tmp;
11688
        }
11689
11690 3
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
11691
            // "mb_" is available if overload is used, so use it ...
11692
            return \mb_substr_count($haystack, $needle, 'CP850'); // 8-BIT
11693
        }
11694
11695 3
        if ($length === null) {
11696 3
            return \substr_count($haystack, $needle, $offset);
11697
        }
11698
11699
        return \substr_count($haystack, $needle, $offset, $length);
11700
    }
11701
11702
    /**
11703
     * Returns the number of occurrences of $substring in the given string.
11704
     * By default, the comparison is case-sensitive, but can be made insensitive
11705
     * by setting $case_sensitive to false.
11706
     *
11707
     * @param string $str            <p>The input string.</p>
11708
     * @param string $substring      <p>The substring to search for.</p>
11709
     * @param bool   $case_sensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
11710
     * @param string $encoding       [optional] <p>Set the charset for e.g. "mb_" function</p>
11711
     *
11712
     * @psalm-pure
11713
     *
11714
     * @return int
11715
     */
11716 15
    public static function substr_count_simple(
11717
        string $str,
11718
        string $substring,
11719
        bool $case_sensitive = true,
11720
        string $encoding = 'UTF-8'
11721
    ): int {
11722 15
        if ($str === '' || $substring === '') {
11723 2
            return 0;
11724
        }
11725
11726 13
        if ($encoding === 'UTF-8') {
11727 7
            if ($case_sensitive) {
11728
                return (int) \mb_substr_count($str, $substring);
11729
            }
11730
11731 7
            return (int) \mb_substr_count(
11732 7
                \mb_strtoupper($str),
11733 7
                \mb_strtoupper($substring)
11734
            );
11735
        }
11736
11737 6
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
11738
11739 6
        if ($case_sensitive) {
11740 3
            return (int) \mb_substr_count($str, $substring, $encoding);
11741
        }
11742
11743 3
        return (int) \mb_substr_count(
11744 3
            self::strtocasefold($str, true, false, $encoding, null, false),
11745 3
            self::strtocasefold($substring, true, false, $encoding, null, false),
11746 3
            $encoding
11747
        );
11748
    }
11749
11750
    /**
11751
     * Removes a prefix ($needle) from the beginning of the string ($haystack), case-insensitive.
11752
     *
11753
     * EXMAPLE: <code>
11754
     * UTF8::substr_ileft('ΚόσμεMiddleEnd', 'Κόσμε'); // 'MiddleEnd'
11755
     * UTF8::substr_ileft('ΚόσμεMiddleEnd', 'κόσμε'); // 'MiddleEnd'
11756
     * </code>
11757
     *
11758
     * @param string $haystack <p>The string to search in.</p>
11759
     * @param string $needle   <p>The substring to search for.</p>
11760
     *
11761
     * @psalm-pure
11762
     *
11763
     * @return string
11764
     *                <p>Return the sub-string.</p>
11765
     */
11766 2
    public static function substr_ileft(string $haystack, string $needle): string
11767
    {
11768 2
        if ($haystack === '') {
11769 2
            return '';
11770
        }
11771
11772 2
        if ($needle === '') {
11773 2
            return $haystack;
11774
        }
11775
11776 2
        if (self::str_istarts_with($haystack, $needle)) {
11777 2
            $haystack = (string) \mb_substr($haystack, (int) self::strlen($needle));
11778
        }
11779
11780 2
        return $haystack;
11781
    }
11782
11783
    /**
11784
     * Get part of a string process in bytes.
11785
     *
11786
     * @param string   $str    <p>The string being checked.</p>
11787
     * @param int      $offset <p>The first position used in str.</p>
11788
     * @param int|null $length [optional] <p>The maximum length of the returned string.</p>
11789
     *
11790
     * @psalm-pure
11791
     *
11792
     * @return false|string
11793
     *                      The portion of <i>str</i> specified by the <i>offset</i> and
11794
     *                      <i>length</i> parameters.</p><p>If <i>str</i> is shorter than <i>offset</i>
11795
     *                      characters long, <b>FALSE</b> will be returned.
11796
     */
11797 1
    public static function substr_in_byte(string $str, int $offset = 0, int $length = null)
11798
    {
11799
        // empty string
11800 1
        if ($str === '' || $length === 0) {
11801
            return '';
11802
        }
11803
11804
        // whole string
11805 1
        if (!$offset && $length === null) {
11806
            return $str;
11807
        }
11808
11809 1
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
11810
            // "mb_" is available if overload is used, so use it ...
11811
            return \mb_substr($str, $offset, $length, 'CP850'); // 8-BIT
11812
        }
11813
11814 1
        return \substr($str, $offset, $length ?? 2147483647);
11815
    }
11816
11817
    /**
11818
     * Removes a suffix ($needle) from the end of the string ($haystack), case-insensitive.
11819
     *
11820
     * EXAMPLE: <code>
11821
     * UTF8::substr_iright('BeginMiddleΚόσμε', 'Κόσμε'); // 'BeginMiddle'
11822
     * UTF8::substr_iright('BeginMiddleΚόσμε', 'κόσμε'); // 'BeginMiddle'
11823
     * </code>
11824
     *
11825
     * @param string $haystack <p>The string to search in.</p>
11826
     * @param string $needle   <p>The substring to search for.</p>
11827
     *
11828
     * @psalm-pure
11829
     *
11830
     * @return string
11831
     *                <p>Return the sub-string.<p>
11832
     */
11833 2
    public static function substr_iright(string $haystack, string $needle): string
11834
    {
11835 2
        if ($haystack === '') {
11836 2
            return '';
11837
        }
11838
11839 2
        if ($needle === '') {
11840 2
            return $haystack;
11841
        }
11842
11843 2
        if (self::str_iends_with($haystack, $needle)) {
11844 2
            $haystack = (string) \mb_substr($haystack, 0, (int) self::strlen($haystack) - (int) self::strlen($needle));
11845
        }
11846
11847 2
        return $haystack;
11848
    }
11849
11850
    /**
11851
     * Removes a prefix ($needle) from the beginning of the string ($haystack).
11852
     *
11853
     * EXAMPLE: <code>
11854
     * UTF8::substr_left('ΚόσμεMiddleEnd', 'Κόσμε'); // 'MiddleEnd'
11855
     * UTF8::substr_left('ΚόσμεMiddleEnd', 'κόσμε'); // 'ΚόσμεMiddleEnd'
11856
     * </code>
11857
     *
11858
     * @param string $haystack <p>The string to search in.</p>
11859
     * @param string $needle   <p>The substring to search for.</p>
11860
     *
11861
     * @psalm-pure
11862
     *
11863
     * @return string
11864
     *                <p>Return the sub-string.</p>
11865
     */
11866 2
    public static function substr_left(string $haystack, string $needle): string
11867
    {
11868 2
        if ($haystack === '') {
11869 2
            return '';
11870
        }
11871
11872 2
        if ($needle === '') {
11873 2
            return $haystack;
11874
        }
11875
11876 2
        if (self::str_starts_with($haystack, $needle)) {
11877 2
            $haystack = (string) \mb_substr($haystack, (int) self::strlen($needle));
11878
        }
11879
11880 2
        return $haystack;
11881
    }
11882
11883
    /**
11884
     * Replace text within a portion of a string.
11885
     *
11886
     * EXAMPLE: <code>UTF8::substr_replace(array('Iñtërnâtiônàlizætiøn', 'foo'), 'æ', 1); // array('Iæñtërnâtiônàlizætiøn', 'fæoo')</code>
11887
     *
11888
     * source: https://gist.github.com/stemar/8287074
11889
     *
11890
     * @param string|string[] $str         <p>The input string or an array of stings.</p>
11891
     * @param string|string[] $replacement <p>The replacement string or an array of stings.</p>
11892
     * @param int|int[]       $offset      <p>
11893
     *                                     If start is positive, the replacing will begin at the start'th offset
11894
     *                                     into string.
11895
     *                                     <br><br>
11896
     *                                     If start is negative, the replacing will begin at the start'th character
11897
     *                                     from the end of string.
11898
     *                                     </p>
11899
     * @param int|int[]|null  $length      [optional] <p>If given and is positive, it represents the length of the
11900
     *                                     portion of string which is to be replaced. If it is negative, it
11901
     *                                     represents the number of characters from the end of string at which to
11902
     *                                     stop replacing. If it is not given, then it will default to strlen(
11903
     *                                     string ); i.e. end the replacing at the end of string. Of course, if
11904
     *                                     length is zero then this function will have the effect of inserting
11905
     *                                     replacement into string at the given start offset.</p>
11906
     * @param string          $encoding    [optional] <p>Set the charset for e.g. "mb_" function</p>
11907
     *
11908
     * @psalm-pure
11909
     *
11910
     * @return string|string[]
11911
     *                         <p>The result string is returned. If string is an array then array is returned.</p>
11912
     *
11913
     * @template TSubstrReplace
11914
     * @phpstan-param TSubstrReplace $str
11915
     * @phpstan-return TSubstrReplace
11916
     */
11917 10
    public static function substr_replace(
11918
        $str,
11919
        $replacement,
11920
        $offset,
11921
        $length = null,
11922
        string $encoding = 'UTF-8'
11923
    ) {
11924 10
        if (\is_array($str)) {
11925 1
            $num = \count($str);
11926
11927
            // the replacement
11928 1
            if (\is_array($replacement)) {
11929 1
                $replacement = \array_slice($replacement, 0, $num);
11930
            } else {
11931 1
                $replacement = \array_pad([$replacement], $num, $replacement);
11932
            }
11933
11934
            // the offset
11935 1
            if (\is_array($offset)) {
11936 1
                $offset = \array_slice($offset, 0, $num);
11937 1
                foreach ($offset as &$value_tmp) {
11938 1
                    $value_tmp = (int) $value_tmp === $value_tmp ? $value_tmp : 0;
11939
                }
11940 1
                unset($value_tmp);
11941
            } else {
11942 1
                $offset = \array_pad([$offset], $num, $offset);
11943
            }
11944
11945
            // the length
11946 1
            if ($length === null) {
11947 1
                $length = \array_fill(0, $num, 0);
11948 1
            } elseif (\is_array($length)) {
11949 1
                $length = \array_slice($length, 0, $num);
11950 1
                foreach ($length as &$value_tmp_V2) {
11951 1
                    $value_tmp_V2 = (int) $value_tmp_V2 === $value_tmp_V2 ? $value_tmp_V2 : $num;
11952
                }
11953 1
                unset($value_tmp_V2);
11954
            } else {
11955 1
                $length = \array_pad([$length], $num, $length);
11956
            }
11957
11958
            // recursive call
11959
            /** @phpstan-ignore-next-line - phpstan currently can't handle recursive calls */
11960 1
            return \array_map([self::class, 'substr_replace'], $str, $replacement, $offset, $length);
11961
        }
11962
11963 10
        if (\is_array($replacement)) {
11964 1
            if ($replacement !== []) {
11965 1
                $replacement = $replacement[0];
11966
            } else {
11967 1
                $replacement = '';
11968
            }
11969
        }
11970
11971
        // init
11972 10
        $str = (string) $str;
11973 10
        $replacement = (string) $replacement;
11974
11975 10
        if (\is_array($length)) {
11976
            throw new \InvalidArgumentException('Parameter "$length" can only be an array, if "$str" is also an array.');
11977
        }
11978
11979 10
        if (\is_array($offset)) {
11980
            throw new \InvalidArgumentException('Parameter "$offset" can only be an array, if "$str" is also an array.');
11981
        }
11982
11983 10
        if ($str === '') {
11984 1
            return $replacement;
11985
        }
11986
11987 9
        if (self::$SUPPORT['mbstring'] === true) {
11988 9
            $string_length = (int) self::strlen($str, $encoding);
11989
11990 9
            if ($offset < 0) {
11991 1
                $offset = (int) \max(0, $string_length + $offset);
11992 9
            } elseif ($offset > $string_length) {
11993 1
                $offset = $string_length;
11994
            }
11995
11996 9
            if ($length !== null && $length < 0) {
11997 1
                $length = (int) \max(0, $string_length - $offset + $length);
11998 9
            } elseif ($length === null || $length > $string_length) {
11999 4
                $length = $string_length;
12000
            }
12001
12002 9
            if (($offset + $length) > $string_length) {
12003 4
                $length = $string_length - $offset;
12004
            }
12005
12006 9
            return ((string) \mb_substr($str, 0, $offset, $encoding)) .
12007 9
                   $replacement .
12008 9
                   ((string) \mb_substr($str, $offset + $length, $string_length - $offset - $length, $encoding));
12009
        }
12010
12011
        //
12012
        // fallback for ascii only
12013
        //
12014
12015
        if (ASCII::is_ascii($str)) {
12016
            return ($length === null) ?
12017
                \substr_replace($str, $replacement, $offset) :
12018
                \substr_replace($str, $replacement, $offset, $length);
12019
        }
12020
12021
        //
12022
        // fallback via vanilla php
12023
        //
12024
12025
        \preg_match_all('/./us', $str, $str_matches);
12026
        \preg_match_all('/./us', $replacement, $replacement_matches);
12027
12028
        if ($length === null) {
12029
            $length_tmp = self::strlen($str, $encoding);
12030
            if ($length_tmp === false) {
12031
                // e.g.: non mbstring support + invalid chars
12032
                return '';
12033
            }
12034
            $length = $length_tmp;
12035
        }
12036
12037
        \array_splice($str_matches[0], $offset, $length, $replacement_matches[0]);
12038
12039
        return \implode('', $str_matches[0]);
12040
    }
12041
12042
    /**
12043
     * Removes a suffix ($needle) from the end of the string ($haystack).
12044
     *
12045
     * EXAMPLE: <code>
12046
     * UTF8::substr_right('BeginMiddleΚόσμε', 'Κόσμε'); // 'BeginMiddle'
12047
     * UTF8::substr_right('BeginMiddleΚόσμε', 'κόσμε'); // 'BeginMiddleΚόσμε'
12048
     * </code>
12049
     *
12050
     * @param string $haystack <p>The string to search in.</p>
12051
     * @param string $needle   <p>The substring to search for.</p>
12052
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
12053
     *
12054
     * @psalm-pure
12055
     *
12056
     * @return string
12057
     *                <p>Return the sub-string.</p>
12058
     */
12059 2
    public static function substr_right(
12060
        string $haystack,
12061
        string $needle,
12062
        string $encoding = 'UTF-8'
12063
    ): string {
12064 2
        if ($haystack === '') {
12065 2
            return '';
12066
        }
12067
12068 2
        if ($needle === '') {
12069 2
            return $haystack;
12070
        }
12071
12072
        if (
12073 2
            $encoding === 'UTF-8'
12074
            &&
12075 2
            \substr($haystack, -\strlen($needle)) === $needle
12076
        ) {
12077 2
            return (string) \mb_substr($haystack, 0, (int) \mb_strlen($haystack) - (int) \mb_strlen($needle));
12078
        }
12079
12080 2
        if (\substr($haystack, -\strlen($needle)) === $needle) {
12081
            return (string) self::substr(
12082
                $haystack,
12083
                0,
12084
                (int) self::strlen($haystack, $encoding) - (int) self::strlen($needle, $encoding),
12085
                $encoding
12086
            );
12087
        }
12088
12089 2
        return $haystack;
12090
    }
12091
12092
    /**
12093
     * Returns a case swapped version of the string.
12094
     *
12095
     * EXAMPLE: <code>UTF8::swapCase('déJÀ σσς iıII'); // 'DÉjà ΣΣΣ IIii'</code>
12096
     *
12097
     * @param string $str        <p>The input string.</p>
12098
     * @param string $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
12099
     * @param bool   $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
12100
     *
12101
     * @psalm-pure
12102
     *
12103
     * @return string
12104
     *                <p>Each character's case swapped.</p>
12105
     */
12106 6
    public static function swapCase(string $str, string $encoding = 'UTF-8', bool $clean_utf8 = false): string
12107
    {
12108 6
        if ($str === '') {
12109 1
            return '';
12110
        }
12111
12112 6
        if ($clean_utf8) {
12113
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
12114
            // if invalid characters are found in $haystack before $needle
12115 2
            $str = self::clean($str);
12116
        }
12117
12118 6
        if ($encoding === 'UTF-8') {
12119 4
            return (string) (\mb_strtolower($str) ^ \mb_strtoupper($str) ^ $str);
12120
        }
12121
12122 4
        return (string) (self::strtolower($str, $encoding) ^ self::strtoupper($str, $encoding) ^ $str);
12123
    }
12124
12125
    /**
12126
     * Checks whether symfony-polyfills are used.
12127
     *
12128
     * @psalm-pure
12129
     *
12130
     * @return bool
12131
     *              <p><strong>true</strong> if in use, <strong>false</strong> otherwise</p>
12132
     *
12133
     * @internal <p>Please do not use it anymore, we will make is private in next major version.</p>
12134
     */
12135
    public static function symfony_polyfill_used(): bool
12136
    {
12137
        // init
12138
        $return = false;
12139
12140
        $return_tmp = \extension_loaded('mbstring');
12141
        if (!$return_tmp && \function_exists('mb_strlen')) {
12142
            $return = true;
12143
        }
12144
12145
        $return_tmp = \extension_loaded('iconv');
12146
        if (!$return_tmp && \function_exists('iconv')) {
12147
            $return = true;
12148
        }
12149
12150
        return $return;
12151
    }
12152
12153
    /**
12154
     * @param string $str
12155
     * @param int    $tab_length
12156
     *
12157
     * @psalm-pure
12158
     *
12159
     * @return string
12160
     */
12161 6
    public static function tabs_to_spaces(string $str, int $tab_length = 4): string
12162
    {
12163 6
        if ($tab_length === 4) {
12164 3
            $spaces = '    ';
12165 3
        } elseif ($tab_length === 2) {
12166 1
            $spaces = '  ';
12167
        } else {
12168 2
            $spaces = \str_repeat(' ', $tab_length);
12169
        }
12170
12171 6
        return \str_replace("\t", $spaces, $str);
12172
    }
12173
12174
    /**
12175
     * Converts the first character of each word in the string to uppercase
12176
     * and all other chars to lowercase.
12177
     *
12178
     * @param string      $str                           <p>The input string.</p>
12179
     * @param string      $encoding                      [optional] <p>Set the charset for e.g. "mb_" function</p>
12180
     * @param bool        $clean_utf8                    [optional] <p>Remove non UTF-8 chars from the string.</p>
12181
     * @param string|null $lang                          [optional] <p>Set the language for special cases: az, el, lt,
12182
     *                                                   tr</p>
12183
     * @param bool        $try_to_keep_the_string_length [optional] <p>true === try to keep the string length: e.g. ẞ
12184
     *                                                   -> ß</p>
12185
     *
12186
     * @psalm-pure
12187
     *
12188
     * @return string
12189
     *                <p>A string with all characters of $str being title-cased.</p>
12190
     */
12191 5
    public static function titlecase(
12192
        string $str,
12193
        string $encoding = 'UTF-8',
12194
        bool $clean_utf8 = false,
12195
        string $lang = null,
12196
        bool $try_to_keep_the_string_length = false
12197
    ): string {
12198 5
        if ($clean_utf8) {
12199
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
12200
            // if invalid characters are found in $haystack before $needle
12201
            $str = self::clean($str);
12202
        }
12203
12204
        if (
12205 5
            $lang === null
12206
            &&
12207 5
            !$try_to_keep_the_string_length
12208
        ) {
12209 5
            if ($encoding === 'UTF-8') {
12210 3
                return \mb_convert_case($str, \MB_CASE_TITLE);
12211
            }
12212
12213 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
12214
12215 2
            return \mb_convert_case($str, \MB_CASE_TITLE, $encoding);
12216
        }
12217
12218
        return self::str_titleize(
12219
            $str,
12220
            null,
12221
            $encoding,
12222
            false,
12223
            $lang,
12224
            $try_to_keep_the_string_length,
12225
            false
12226
        );
12227
    }
12228
12229
    /**
12230
     * Convert a string into ASCII.
12231
     *
12232
     * EXAMPLE: <code>UTF8::to_ascii('déjà σσς iıii'); // 'deja sss iiii'</code>
12233
     *
12234
     * @param string $str     <p>The input string.</p>
12235
     * @param string $unknown [optional] <p>Character use if character unknown. (default is ?)</p>
12236
     * @param bool   $strict  [optional] <p>Use "transliterator_transliterate()" from PHP-Intl | WARNING: bad
12237
     *                        performance</p>
12238
     *
12239
     * @psalm-pure
12240
     *
12241
     * @return string
12242
     */
12243 37
    public static function to_ascii(
12244
        string $str,
12245
        string $unknown = '?',
12246
        bool $strict = false
12247
    ): string {
12248 37
        return ASCII::to_transliterate($str, $unknown, $strict);
12249
    }
12250
12251
    /**
12252
     * @param bool|float|int|string $str
12253
     *
12254
     * @psalm-pure
12255
     *
12256
     * @return bool
12257
     */
12258 25
    public static function to_boolean($str): bool
12259
    {
12260
        // init
12261 25
        $str = (string) $str;
12262
12263 25
        if ($str === '') {
12264 2
            return false;
12265
        }
12266
12267
        // Info: http://php.net/manual/en/filter.filters.validate.php
12268
        $map = [
12269 23
            'true'  => true,
12270
            '1'     => true,
12271
            'on'    => true,
12272
            'yes'   => true,
12273
            'false' => false,
12274
            '0'     => false,
12275
            'off'   => false,
12276
            'no'    => false,
12277
        ];
12278
12279 23
        if (isset($map[$str])) {
12280 13
            return $map[$str];
12281
        }
12282
12283 10
        $key = \strtolower($str);
12284 10
        if (isset($map[$key])) {
12285 2
            return $map[$key];
12286
        }
12287
12288 8
        if (\is_numeric($str)) {
12289 6
            return ((float) $str) > 0;
12290
        }
12291
12292 2
        return (bool) \trim($str);
12293
    }
12294
12295
    /**
12296
     * Convert given string to safe filename (and keep string case).
12297
     *
12298
     * @param string $str
12299
     * @param bool   $use_transliterate No transliteration, conversion etc. is done by default - unsafe characters are
12300
     *                                  simply replaced with hyphen.
12301
     * @param string $fallback_char
12302
     *
12303
     * @psalm-pure
12304
     *
12305
     * @return string
12306
     */
12307 1
    public static function to_filename(
12308
        string $str,
12309
        bool $use_transliterate = false,
12310
        string $fallback_char = '-'
12311
    ): string {
12312 1
        return ASCII::to_filename(
12313 1
            $str,
12314 1
            $use_transliterate,
12315 1
            $fallback_char
12316
        );
12317
    }
12318
12319
    /**
12320
     * Convert a string into "ISO-8859"-encoding (Latin-1).
12321
     *
12322
     * EXAMPLE: <code>UTF8::to_utf8(UTF8::to_iso8859('  -ABC-中文空白-  ')); // '  -ABC-????-  '</code>
12323
     *
12324
     * @param string|string[] $str
12325
     *
12326
     * @psalm-pure
12327
     *
12328
     * @return string|string[]
12329
     *
12330
     * @template TToIso8859
12331
     * @phpstan-param TToIso8859 $str
12332
     * @phpstan-return TToIso8859
12333
     */
12334 8
    public static function to_iso8859($str)
12335
    {
12336 8
        if (\is_array($str)) {
12337 2
            foreach ($str as &$v) {
12338 2
                $v = self::to_iso8859($v);
12339
            }
12340
12341 2
            return $str;
12342
        }
12343
12344 8
        $str = (string) $str;
12345 8
        if ($str === '') {
12346 2
            return '';
12347
        }
12348
12349 8
        return self::utf8_decode($str);
12350
    }
12351
12352
    /**
12353
     * This function leaves UTF-8 characters alone, while converting almost all non-UTF8 to UTF8.
12354
     *
12355
     * <ul>
12356
     * <li>It decode UTF-8 codepoints and Unicode escape sequences.</li>
12357
     * <li>It assumes that the encoding of the original string is either WINDOWS-1252 or ISO-8859.</li>
12358
     * <li>WARNING: It does not remove invalid UTF-8 characters, so you maybe need to use "UTF8::clean()" for this
12359
     * case.</li>
12360
     * </ul>
12361
     *
12362
     * EXAMPLE: <code>UTF8::to_utf8(["\u0063\u0061\u0074"]); // array('cat')</code>
12363
     *
12364
     * @param string|string[] $str                        <p>Any string or array of strings.</p>
12365
     * @param bool            $decode_html_entity_to_utf8 <p>Set to true, if you need to decode html-entities.</p>
12366
     *
12367
     * @psalm-pure
12368
     *
12369
     * @return string|string[]
12370
     *                         <p>The UTF-8 encoded string</p>
12371
     *
12372
     * @template TToUtf8
12373
     * @phpstan-param TToUtf8 $str
12374
     * @phpstan-return TToUtf8
12375
     */
12376 42
    public static function to_utf8($str, bool $decode_html_entity_to_utf8 = false)
12377
    {
12378 42
        if (\is_array($str)) {
12379 4
            foreach ($str as &$v) {
12380 4
                $v = self::to_utf8_string($v, $decode_html_entity_to_utf8);
12381
            }
12382
12383
            /** @phpstan-var TToUtf8 $str */
12384 4
            return $str;
12385
        }
12386
12387
        /** @phpstan-var TToUtf8 $str */
12388 42
        $str = self::to_utf8_string($str, $decode_html_entity_to_utf8);
12389
12390 42
        return $str;
12391
    }
12392
12393
    /**
12394
     * This function leaves UTF-8 characters alone, while converting almost all non-UTF8 to UTF8.
12395
     *
12396
     * <ul>
12397
     * <li>It decode UTF-8 codepoints and Unicode escape sequences.</li>
12398
     * <li>It assumes that the encoding of the original string is either WINDOWS-1252 or ISO-8859.</li>
12399
     * <li>WARNING: It does not remove invalid UTF-8 characters, so you maybe need to use "UTF8::clean()" for this
12400
     * case.</li>
12401
     * </ul>
12402
     *
12403
     * EXAMPLE: <code>UTF8::to_utf8_string("\u0063\u0061\u0074"); // 'cat'</code>
12404
     *
12405
     * @param string $str                        <p>Any string.</p>
12406
     * @param bool   $decode_html_entity_to_utf8 <p>Set to true, if you need to decode html-entities.</p>
12407
     *
12408
     * @psalm-pure
12409
     *
12410
     * @return string
12411
     *                <p>The UTF-8 encoded string</p>
12412
     */
12413 42
    public static function to_utf8_string(string $str, bool $decode_html_entity_to_utf8 = false): string
12414
    {
12415 42
        if ($str === '') {
12416 7
            return $str;
12417
        }
12418
12419 42
        $max = \strlen($str);
12420 42
        $buf = '';
12421
12422 42
        for ($i = 0; $i < $max; ++$i) {
12423 42
            $c1 = $str[$i];
12424
12425 42
            if ($c1 >= "\xC0") { // should be converted to UTF8, if it's not UTF8 already
12426
12427 38
                if ($c1 <= "\xDF") { // looks like 2 bytes UTF8
12428
12429 35
                    $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
12430
12431 35
                    if ($c2 >= "\x80" && $c2 <= "\xBF") { // yeah, almost sure it's UTF8 already
12432 21
                        $buf .= $c1 . $c2;
12433 21
                        ++$i;
12434
                    } else { // not valid UTF8 - convert it
12435 35
                        $buf .= self::to_utf8_convert_helper($c1);
12436
                    }
12437 35
                } elseif ($c1 >= "\xE0" && $c1 <= "\xEF") { // looks like 3 bytes UTF8
12438
12439 34
                    $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
12440 34
                    $c3 = $i + 2 >= $max ? "\x00" : $str[$i + 2];
12441
12442 34
                    if ($c2 >= "\x80" && $c2 <= "\xBF" && $c3 >= "\x80" && $c3 <= "\xBF") { // yeah, almost sure it's UTF8 already
12443 17
                        $buf .= $c1 . $c2 . $c3;
12444 17
                        $i += 2;
12445
                    } else { // not valid UTF8 - convert it
12446 34
                        $buf .= self::to_utf8_convert_helper($c1);
12447
                    }
12448 28
                } elseif ($c1 >= "\xF0" && $c1 <= "\xF7") { // looks like 4 bytes UTF8
12449
12450 28
                    $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
12451 28
                    $c3 = $i + 2 >= $max ? "\x00" : $str[$i + 2];
12452 28
                    $c4 = $i + 3 >= $max ? "\x00" : $str[$i + 3];
12453
12454 28
                    if ($c2 >= "\x80" && $c2 <= "\xBF" && $c3 >= "\x80" && $c3 <= "\xBF" && $c4 >= "\x80" && $c4 <= "\xBF") { // yeah, almost sure it's UTF8 already
12455 10
                        $buf .= $c1 . $c2 . $c3 . $c4;
12456 10
                        $i += 3;
12457
                    } else { // not valid UTF8 - convert it
12458 28
                        $buf .= self::to_utf8_convert_helper($c1);
12459
                    }
12460
                } else { // doesn't look like UTF8, but should be converted
12461
12462 38
                    $buf .= self::to_utf8_convert_helper($c1);
12463
                }
12464 40
            } elseif (($c1 & "\xC0") === "\x80") { // needs conversion
12465
12466 4
                $buf .= self::to_utf8_convert_helper($c1);
12467
            } else { // it doesn't need conversion
12468
12469 40
                $buf .= $c1;
12470
            }
12471
        }
12472
12473
        // decode unicode escape sequences + unicode surrogate pairs
12474 42
        $buf = \preg_replace_callback(
12475 42
            '/\\\\u([dD][89abAB][0-9a-fA-F]{2})\\\\u([dD][cdefCDEF][\da-fA-F]{2})|\\\\u([0-9a-fA-F]{4})/',
12476
            /**
12477
             * @param array $matches
12478
             *
12479
             * @psalm-pure
12480
             *
12481
             * @return string
12482
             */
12483
            static function (array $matches): string {
12484 13
                if (isset($matches[3])) {
12485 13
                    $cp = (int) \hexdec($matches[3]);
12486
                } else {
12487
                    // http://unicode.org/faq/utf_bom.html#utf16-4
12488 1
                    $cp = ((int) \hexdec($matches[1]) << 10)
12489 1
                          + (int) \hexdec($matches[2])
12490 1
                          + 0x10000
12491 1
                          - (0xD800 << 10)
12492 1
                          - 0xDC00;
12493
                }
12494
12495
                // https://github.com/php/php-src/blob/php-7.3.2/ext/standard/html.c#L471
12496
                //
12497
                // php_utf32_utf8(unsigned char *buf, unsigned k)
12498
12499 13
                if ($cp < 0x80) {
12500 8
                    return (string) self::chr($cp);
12501
                }
12502
12503 10
                if ($cp < 0xA0) {
12504
                    /** @noinspection UnnecessaryCastingInspection */
12505
                    return (string) self::chr(0xC0 | $cp >> 6) . (string) self::chr(0x80 | $cp & 0x3F);
12506
                }
12507
12508 10
                return self::decimal_to_chr($cp);
12509 42
            },
12510 42
            $buf
12511
        );
12512
12513 42
        if ($buf === null) {
12514
            return '';
12515
        }
12516
12517
        // decode UTF-8 codepoints
12518 42
        if ($decode_html_entity_to_utf8) {
12519 3
            $buf = self::html_entity_decode($buf);
12520
        }
12521
12522 42
        return $buf;
12523
    }
12524
12525
    /**
12526
     * Returns the given string as an integer, or null if the string isn't numeric.
12527
     *
12528
     * @param string $str
12529
     *
12530
     * @psalm-pure
12531
     *
12532
     * @return int|null
12533
     *                  <p>null if the string isn't numeric</p>
12534
     */
12535 1
    public static function to_int(string $str)
12536
    {
12537 1
        if (\is_numeric($str)) {
12538 1
            return (int) $str;
12539
        }
12540
12541 1
        return null;
12542
    }
12543
12544
    /**
12545
     * Returns the given input as string, or null if the input isn't int|float|string
12546
     * and do not implement the "__toString()" method.
12547
     *
12548
     * @param float|int|object|string|null $input
12549
     *
12550
     * @psalm-pure
12551
     *
12552
     * @return string|null
12553
     *                     <p>null if the input isn't int|float|string and has no "__toString()" method</p>
12554
     */
12555 1
    public static function to_string($input)
12556
    {
12557 1
        if ($input === null) {
12558
            return null;
12559
        }
12560
12561
        /** @var string $input_type - hack for psalm */
12562 1
        $input_type = \gettype($input);
12563
12564
        if (
12565 1
            $input_type === 'string'
12566
            ||
12567 1
            $input_type === 'integer'
12568
            ||
12569 1
            $input_type === 'float'
12570
            ||
12571 1
            $input_type === 'double'
12572
        ) {
12573 1
            return (string) $input;
12574
        }
12575
12576
        /** @phpstan-ignore-next-line - "gettype": FP? */
12577 1
        if ($input_type === 'object' && \method_exists($input, '__toString')) {
12578 1
            return (string) $input;
12579
        }
12580
12581 1
        return null;
12582
    }
12583
12584
    /**
12585
     * Strip whitespace or other characters from the beginning and end of a UTF-8 string.
12586
     *
12587
     * INFO: This is slower then "trim()"
12588
     *
12589
     * We can only use the original-function, if we use <= 7-Bit in the string / chars
12590
     * but the check for ASCII (7-Bit) cost more time, then we can safe here.
12591
     *
12592
     * EXAMPLE: <code>UTF8::trim('   -ABC-中文空白-  '); // '-ABC-中文空白-'</code>
12593
     *
12594
     * @param string      $str   <p>The string to be trimmed</p>
12595
     * @param string|null $chars [optional] <p>Optional characters to be stripped</p>
12596
     *
12597
     * @psalm-pure
12598
     *
12599
     * @return string
12600
     *                <p>The trimmed string.</p>
12601
     */
12602 57
    public static function trim(string $str = '', string $chars = null): string
12603
    {
12604 57
        if ($str === '') {
12605 9
            return '';
12606
        }
12607
12608 50
        if (self::$SUPPORT['mbstring'] === true) {
12609 50
            if ($chars !== null) {
12610
                /** @noinspection PregQuoteUsageInspection */
12611 28
                $chars = \preg_quote($chars);
12612 28
                $pattern = "^[${chars}]+|[${chars}]+\$";
12613
            } else {
12614 22
                $pattern = '^[\\s]+|[\\s]+$';
12615
            }
12616
12617 50
            return (string) \mb_ereg_replace($pattern, '', $str);
12618
        }
12619
12620 8
        if ($chars !== null) {
12621
            $chars = \preg_quote($chars, '/');
12622
            $pattern = "^[${chars}]+|[${chars}]+\$";
12623
        } else {
12624 8
            $pattern = '^[\\s]+|[\\s]+$';
12625
        }
12626
12627 8
        return self::regex_replace($str, $pattern, '');
12628
    }
12629
12630
    /**
12631
     * Makes string's first char uppercase.
12632
     *
12633
     * EXAMPLE: <code>UTF8::ucfirst('ñtërnâtiônàlizætiøn foo'); // 'Ñtërnâtiônàlizætiøn foo'</code>
12634
     *
12635
     * @param string      $str                           <p>The input string.</p>
12636
     * @param string      $encoding                      [optional] <p>Set the charset for e.g. "mb_" function</p>
12637
     * @param bool        $clean_utf8                    [optional] <p>Remove non UTF-8 chars from the string.</p>
12638
     * @param string|null $lang                          [optional] <p>Set the language for special cases: az, el, lt,
12639
     *                                                   tr</p>
12640
     * @param bool        $try_to_keep_the_string_length [optional] <p>true === try to keep the string length: e.g. ẞ
12641
     *                                                   -> ß</p>
12642
     *
12643
     * @psalm-pure
12644
     *
12645
     * @return string
12646
     *                <p>The resulting string with with char uppercase.</p>
12647
     */
12648 69
    public static function ucfirst(
12649
        string $str,
12650
        string $encoding = 'UTF-8',
12651
        bool $clean_utf8 = false,
12652
        string $lang = null,
12653
        bool $try_to_keep_the_string_length = false
12654
    ): string {
12655 69
        if ($str === '') {
12656 3
            return '';
12657
        }
12658
12659 68
        if ($clean_utf8) {
12660
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
12661
            // if invalid characters are found in $haystack before $needle
12662 1
            $str = self::clean($str);
12663
        }
12664
12665 68
        $use_mb_functions = $lang === null && !$try_to_keep_the_string_length;
12666
12667 68
        if ($encoding === 'UTF-8') {
12668 22
            $str_part_two = (string) \mb_substr($str, 1);
12669
12670 22
            if ($use_mb_functions) {
12671 22
                $str_part_one = \mb_strtoupper(
12672 22
                    (string) \mb_substr($str, 0, 1)
12673
                );
12674
            } else {
12675
                $str_part_one = self::strtoupper(
12676
                    (string) \mb_substr($str, 0, 1),
12677
                    $encoding,
12678
                    false,
12679
                    $lang,
12680 22
                    $try_to_keep_the_string_length
12681
                );
12682
            }
12683
        } else {
12684 47
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
12685
12686 47
            $str_part_two = (string) self::substr($str, 1, null, $encoding);
12687
12688 47
            if ($use_mb_functions) {
12689 47
                $str_part_one = \mb_strtoupper(
12690 47
                    (string) \mb_substr($str, 0, 1, $encoding),
12691 47
                    $encoding
12692
                );
12693
            } else {
12694
                $str_part_one = self::strtoupper(
12695
                    (string) self::substr($str, 0, 1, $encoding),
12696
                    $encoding,
12697
                    false,
12698
                    $lang,
12699
                    $try_to_keep_the_string_length
12700
                );
12701
            }
12702
        }
12703
12704 68
        return $str_part_one . $str_part_two;
12705
    }
12706
12707
    /**
12708
     * Uppercase for all words in the string.
12709
     *
12710
     * EXAMPLE: <code>UTF8::ucwords('iñt ërn âTi ônà liz æti øn'); // 'Iñt Ërn ÂTi Ônà Liz Æti Øn'</code>
12711
     *
12712
     * @param string   $str        <p>The input string.</p>
12713
     * @param string[] $exceptions [optional] <p>Exclusion for some words.</p>
12714
     * @param string   $char_list  [optional] <p>Additional chars that contains to words and do not start a new
12715
     *                             word.</p>
12716
     * @param string   $encoding   [optional] <p>Set the charset.</p>
12717
     * @param bool     $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
12718
     *
12719
     * @psalm-pure
12720
     *
12721
     * @return string
12722
     */
12723 9
    public static function ucwords(
12724
        string $str,
12725
        array $exceptions = [],
12726
        string $char_list = '',
12727
        string $encoding = 'UTF-8',
12728
        bool $clean_utf8 = false
12729
    ): string {
12730 9
        if (!$str) {
12731 2
            return '';
12732
        }
12733
12734
        // INFO: mb_convert_case($str, MB_CASE_TITLE);
12735
        // -> MB_CASE_TITLE didn't only uppercase the first letter, it also lowercase all other letters
12736
12737 8
        if ($clean_utf8) {
12738
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
12739
            // if invalid characters are found in $haystack before $needle
12740 1
            $str = self::clean($str);
12741
        }
12742
12743 8
        $use_php_default_functions = !(bool) ($char_list . \implode('', $exceptions));
12744
12745
        if (
12746 8
            $use_php_default_functions
12747
            &&
12748 8
            ASCII::is_ascii($str)
12749
        ) {
12750
            return \ucwords($str);
12751
        }
12752
12753 8
        $words = self::str_to_words($str, $char_list);
12754 8
        $use_exceptions = $exceptions !== [];
12755
12756 8
        $words_str = '';
12757 8
        foreach ($words as &$word) {
12758 8
            if (!$word) {
12759 8
                continue;
12760
            }
12761
12762
            if (
12763 8
                !$use_exceptions
12764
                ||
12765 8
                !\in_array($word, $exceptions, true)
12766
            ) {
12767 8
                $words_str .= self::ucfirst($word, $encoding);
12768
            } else {
12769 8
                $words_str .= $word;
12770
            }
12771
        }
12772
12773 8
        return $words_str;
12774
    }
12775
12776
    /**
12777
     * Multi decode HTML entity + fix urlencoded-win1252-chars.
12778
     *
12779
     * EXAMPLE: <code>UTF8::urldecode('tes%20öäü%20\u00edtest+test'); // 'tes öäü ítest test'</code>
12780
     *
12781
     * e.g:
12782
     * 'test+test'                     => 'test test'
12783
     * 'D&#252;sseldorf'               => 'Düsseldorf'
12784
     * 'D%FCsseldorf'                  => 'Düsseldorf'
12785
     * 'D&#xFC;sseldorf'               => 'Düsseldorf'
12786
     * 'D%26%23xFC%3Bsseldorf'         => 'Düsseldorf'
12787
     * 'Düsseldorf'                   => 'Düsseldorf'
12788
     * 'D%C3%BCsseldorf'               => 'Düsseldorf'
12789
     * 'D%C3%83%C2%BCsseldorf'         => 'Düsseldorf'
12790
     * 'D%25C3%2583%25C2%25BCsseldorf' => 'Düsseldorf'
12791
     *
12792
     * @param string $str          <p>The input string.</p>
12793
     * @param bool   $multi_decode <p>Decode as often as possible.</p>
12794
     *
12795
     * @psalm-pure
12796
     *
12797
     * @return string
12798
     */
12799 4
    public static function urldecode(string $str, bool $multi_decode = true): string
12800
    {
12801 4
        if ($str === '') {
12802 3
            return '';
12803
        }
12804
12805 4
        $str = self::urldecode_unicode_helper($str);
12806
12807 4
        if ($multi_decode) {
12808
            do {
12809 3
                $str_compare = $str;
12810
12811
                /**
12812
                 * @psalm-suppress PossiblyInvalidArgument
12813
                 */
12814 3
                $str = \urldecode(
12815 3
                    self::html_entity_decode(
12816 3
                        self::to_utf8($str),
12817 3
                        \ENT_QUOTES | \ENT_HTML5
12818
                    )
12819
                );
12820 3
            } while ($str_compare !== $str);
12821
        } else {
12822
            /**
12823
             * @psalm-suppress PossiblyInvalidArgument
12824
             */
12825 1
            $str = \urldecode(
12826 1
                self::html_entity_decode(
12827 1
                    self::to_utf8($str),
12828 1
                    \ENT_QUOTES | \ENT_HTML5
12829
                )
12830
            );
12831
        }
12832
12833 4
        return self::fix_simple_utf8($str);
12834
    }
12835
12836
    /**
12837
     * Decodes a UTF-8 string to ISO-8859-1.
12838
     *
12839
     * EXAMPLE: <code>UTF8::encode('UTF-8', UTF8::utf8_decode('-ABC-中文空白-')); // '-ABC-????-'</code>
12840
     *
12841
     * @param string $str             <p>The input string.</p>
12842
     * @param bool   $keep_utf8_chars
12843
     *
12844
     * @psalm-pure
12845
     *
12846
     * @return string
12847
     */
12848 14
    public static function utf8_decode(string $str, bool $keep_utf8_chars = false): string
12849
    {
12850 14
        if ($str === '') {
12851 6
            return '';
12852
        }
12853
12854
        // save for later comparision
12855 14
        $str_backup = $str;
12856 14
        $len = \strlen($str);
12857
12858 14
        if (self::$ORD === null) {
12859
            self::$ORD = self::getData('ord');
12860
        }
12861
12862 14
        if (self::$CHR === null) {
12863
            self::$CHR = self::getData('chr');
12864
        }
12865
12866 14
        $no_char_found = '?';
12867 14
        for ($i = 0, $j = 0; $i < $len; ++$i, ++$j) {
12868 14
            switch ($str[$i] & "\xF0") {
12869 14
                case "\xC0":
12870 13
                case "\xD0":
12871 13
                    $c = (self::$ORD[$str[$i] & "\x1F"] << 6) | self::$ORD[$str[++$i] & "\x3F"];
12872 13
                    $str[$j] = $c < 256 ? self::$CHR[$c] : $no_char_found;
12873
12874 13
                    break;
12875
12876 13
                case "\xF0":
12877
                    ++$i;
12878
12879
                // no break
12880
12881 13
                case "\xE0":
12882 11
                    $str[$j] = $no_char_found;
12883 11
                    $i += 2;
12884
12885 11
                    break;
12886
12887
                default:
12888 12
                    $str[$j] = $str[$i];
12889
            }
12890
        }
12891
12892
        /** @var false|string $return - needed for PhpStan (stubs error) */
12893 14
        $return = \substr($str, 0, $j);
12894 14
        if ($return === false) {
12895
            $return = '';
12896
        }
12897
12898
        if (
12899 14
            $keep_utf8_chars
12900
            &&
12901 14
            (int) self::strlen($return) >= (int) self::strlen($str_backup)
12902
        ) {
12903 2
            return $str_backup;
12904
        }
12905
12906 14
        return $return;
12907
    }
12908
12909
    /**
12910
     * Encodes an ISO-8859-1 string to UTF-8.
12911
     *
12912
     * EXAMPLE: <code>UTF8::utf8_decode(UTF8::utf8_encode('-ABC-中文空白-')); // '-ABC-中文空白-'</code>
12913
     *
12914
     * @param string $str <p>The input string.</p>
12915
     *
12916
     * @psalm-pure
12917
     *
12918
     * @return string
12919
     */
12920 16
    public static function utf8_encode(string $str): string
12921
    {
12922 16
        if ($str === '') {
12923 14
            return '';
12924
        }
12925
12926
        /** @var false|string $str - the polyfill maybe return false */
12927 16
        $str = \utf8_encode($str);
0 ignored issues
show
Bug introduced by
It seems like $str can also be of type false; however, parameter $string of utf8_encode() does only seem to accept string, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

12927
        $str = \utf8_encode(/** @scrutinizer ignore-type */ $str);
Loading history...
12928
12929 16
        if ($str === false) {
12930
            return '';
12931
        }
12932
12933 16
        return $str;
12934
    }
12935
12936
    /**
12937
     * Returns an array with all utf8 whitespace characters.
12938
     *
12939
     * @see http://www.bogofilter.org/pipermail/bogofilter/2003-March/001889.html
12940
     *
12941
     * @psalm-pure
12942
     *
12943
     * @return string[]
12944
     *                  An array with all known whitespace characters as values and the type of whitespace as keys
12945
     *                  as defined in above URL
12946
     */
12947 2
    public static function whitespace_table(): array
12948
    {
12949 2
        return self::$WHITESPACE_TABLE;
12950
    }
12951
12952
    /**
12953
     * Limit the number of words in a string.
12954
     *
12955
     * EXAMPLE: <code>UTF8::words_limit('fòô bàř fòô', 2, ''); // 'fòô bàř'</code>
12956
     *
12957
     * @param string $str        <p>The input string.</p>
12958
     * @param int    $limit      <p>The limit of words as integer.</p>
12959
     * @param string $str_add_on <p>Replacement for the striped string.</p>
12960
     *
12961
     * @psalm-pure
12962
     *
12963
     * @return string
12964
     */
12965 2
    public static function words_limit(
12966
        string $str,
12967
        int $limit = 100,
12968
        string $str_add_on = '…'
12969
    ): string {
12970 2
        if ($str === '' || $limit < 1) {
12971 2
            return '';
12972
        }
12973
12974 2
        \preg_match('/^\\s*+(?:[^\\s]++\\s*+){1,' . $limit . '}/u', $str, $matches);
12975
12976
        if (
12977 2
            !isset($matches[0])
12978
            ||
12979 2
            \mb_strlen($str) === (int) \mb_strlen($matches[0])
12980
        ) {
12981 2
            return $str;
12982
        }
12983
12984 2
        return \rtrim($matches[0]) . $str_add_on;
12985
    }
12986
12987
    /**
12988
     * Wraps a string to a given number of characters
12989
     *
12990
     * EXAMPLE: <code>UTF8::wordwrap('Iñtërnâtiônàlizætiøn', 2, '<br>', true)); // 'Iñ<br>të<br>rn<br>ât<br>iô<br>nà<br>li<br>zæ<br>ti<br>øn'</code>
12991
     *
12992
     * @see http://php.net/manual/en/function.wordwrap.php
12993
     *
12994
     * @param string $str   <p>The input string.</p>
12995
     * @param int    $width [optional] <p>The column width.</p>
12996
     * @param string $break [optional] <p>The line is broken using the optional break parameter.</p>
12997
     * @param bool   $cut   [optional] <p>
12998
     *                      If the cut is set to true, the string is
12999
     *                      always wrapped at or before the specified width. So if you have
13000
     *                      a word that is larger than the given width, it is broken apart.
13001
     *                      </p>
13002
     *
13003
     * @psalm-pure
13004
     *
13005
     * @return string
13006
     *                <p>The given string wrapped at the specified column.</p>
13007
     */
13008 12
    public static function wordwrap(
13009
        string $str,
13010
        int $width = 75,
13011
        string $break = "\n",
13012
        bool $cut = false
13013
    ): string {
13014 12
        if ($str === '' || $break === '') {
13015 4
            return '';
13016
        }
13017
13018 10
        $str_split = \explode($break, $str);
13019
13020
        /** @var string[] $charsArray */
13021 10
        $charsArray = [];
13022 10
        $word_split = '';
13023 10
        foreach ($str_split as $i => $i_value) {
13024 10
            if ($i) {
13025 3
                $charsArray[] = $break;
13026 3
                $word_split .= '#';
13027
            }
13028
13029 10
            foreach (self::str_split($i_value) as $c) {
13030 10
                $charsArray[] = $c;
13031 10
                if ($c === ' ') {
13032 3
                    $word_split .= ' ';
13033
                } else {
13034 10
                    $word_split .= '?';
13035
                }
13036
            }
13037
        }
13038
13039 10
        $str_return = '';
13040 10
        $j = 0;
13041 10
        $b = -1;
13042 10
        $i = -1;
13043 10
        $word_split = \wordwrap($word_split, $width, '#', $cut);
13044
13045 10
        $max = \mb_strlen($word_split);
13046
        /** @noinspection PhpAssignmentInConditionInspection - is ok here */
13047 10
        while (($b = \mb_strpos($word_split, '#', $b + 1)) !== false) {
13048 8
            for (++$i; $i < $b; ++$i) {
13049 8
                if (isset($charsArray[$j])) {
13050 8
                    $str_return .= $charsArray[$j];
13051 8
                    unset($charsArray[$j]);
13052
                }
13053 8
                ++$j;
13054
13055
                // prevent endless loop, e.g. if there is a error in the "mb_*" polyfill
13056 8
                if ($i > $max) {
13057
                    break 2;
13058
                }
13059
            }
13060
13061
            if (
13062 8
                $break === $charsArray[$j]
13063
                ||
13064 8
                $charsArray[$j] === ' '
13065
            ) {
13066 5
                unset($charsArray[$j++]);
13067
            }
13068
13069 8
            $str_return .= $break;
13070
13071
            // prevent endless loop, e.g. if there is a error in the "mb_*" polyfill
13072 8
            if ($b > $max) {
13073
                break;
13074
            }
13075
        }
13076
13077 10
        return $str_return . \implode('', $charsArray);
13078
    }
13079
13080
    /**
13081
     * Line-Wrap the string after $limit, but split the string by "$delimiter" before ...
13082
     *    ... so that we wrap the per line.
13083
     *
13084
     * @param string      $str             <p>The input string.</p>
13085
     * @param int         $width           [optional] <p>The column width.</p>
13086
     * @param string      $break           [optional] <p>The line is broken using the optional break parameter.</p>
13087
     * @param bool        $cut             [optional] <p>
13088
     *                                     If the cut is set to true, the string is
13089
     *                                     always wrapped at or before the specified width. So if you have
13090
     *                                     a word that is larger than the given width, it is broken apart.
13091
     *                                     </p>
13092
     * @param bool        $add_final_break [optional] <p>
13093
     *                                     If this flag is true, then the method will add a $break at the end
13094
     *                                     of the result string.
13095
     *                                     </p>
13096
     * @param string|null $delimiter       [optional] <p>
13097
     *                                     You can change the default behavior, where we split the string by newline.
13098
     *                                     </p>
13099
     *
13100
     * @psalm-pure
13101
     *
13102
     * @return string
13103
     */
13104 1
    public static function wordwrap_per_line(
13105
        string $str,
13106
        int $width = 75,
13107
        string $break = "\n",
13108
        bool $cut = false,
13109
        bool $add_final_break = true,
13110
        string $delimiter = null
13111
    ): string {
13112 1
        if ($delimiter === null) {
13113 1
            $strings = \preg_split('/\\r\\n|\\r|\\n/', $str);
13114
        } else {
13115 1
            $strings = \explode($delimiter, $str);
13116
        }
13117
13118 1
        $string_helper_array = [];
13119 1
        if ($strings !== false) {
13120 1
            foreach ($strings as $value) {
13121 1
                $string_helper_array[] = self::wordwrap($value, $width, $break, $cut);
13122
            }
13123
        }
13124
13125 1
        if ($add_final_break) {
13126 1
            $final_break = $break;
13127
        } else {
13128 1
            $final_break = '';
13129
        }
13130
13131 1
        return \implode($delimiter ?? "\n", $string_helper_array) . $final_break;
13132
    }
13133
13134
    /**
13135
     * Returns an array of Unicode White Space characters.
13136
     *
13137
     * @psalm-pure
13138
     *
13139
     * @return string[]
13140
     *                  <p>An array with numeric code point as key and White Space Character as value.</p>
13141
     */
13142 2
    public static function ws(): array
13143
    {
13144 2
        return self::$WHITESPACE;
13145
    }
13146
13147
    /**
13148
     * Convert an UTF-8 encoded string to a single-byte string suitable for
13149
     * functions such as levenshtein.
13150
     *
13151
     * The function simply uses (and updates) a tailored dynamic encoding
13152
     * (in/out map parameter) where non-ascii characters are remapped to
13153
     * the range [128-255] in order of appearance.
13154
     *
13155
     * Thus it supports up to 128 different multibyte code points max over
13156
     * the whole set of strings sharing this encoding.
13157
     *
13158
     * Source: https://github.com/KEINOS/mb_levenshtein
13159
     *
13160
     * @param  string $str  UTF-8 string to be converted to extended ASCII.
13161
     * @param  array  $map  Reference of the map.
13162
     *
13163
     * @return void
13164
     */
13165 5
    private static function convertMbAscii(string &$str, array &$map)
13166
    {
13167
        // find all utf-8 characters
13168 5
        $matches = [];
13169 5
        if (!\preg_match_all('/[\xC0-\xF7][\x80-\xBF]+/', $str, $matches)) {
13170 3
            return; // plain ascii string
13171
        }
13172
13173
        // update the encoding map with the characters not already met
13174 4
        $mapCount = \count($map);
13175 4
        foreach ($matches[0] as $mbc) {
13176 4
            if (!isset($map[$mbc])) {
13177 4
                $map[$mbc] = \chr(128 + $mapCount);
13178 4
                $mapCount++;
13179
            }
13180
        }
13181
13182
        // finally remap non-ascii characters
13183 4
        $str = \strtr($str, $map);
13184 4
    }
13185
13186
    /**
13187
     * Checks whether the passed string contains only byte sequences that are valid UTF-8 characters.
13188
     *
13189
     * EXAMPLE: <code>
13190
     * UTF8::is_utf8_string('Iñtërnâtiônàlizætiøn']); // true
13191
     * //
13192
     * UTF8::is_utf8_string("Iñtërnâtiônàlizætiøn\xA0\xA1"); // false
13193
     * </code>
13194
     *
13195
     * @see          http://hsivonen.iki.fi/php-utf8/
13196
     *
13197
     * @param string $str    <p>The string to be checked.</p>
13198
     * @param bool   $strict <p>Check also if the string is not UTF-16 or UTF-32.</p>
13199
     *
13200
     * @psalm-pure
13201
     *
13202
     * @return bool
13203
     *
13204
     * @noinspection ReturnTypeCanBeDeclaredInspection
13205
     */
13206 110
    private static function is_utf8_string(string $str, bool $strict = false)
13207
    {
13208 110
        if ($str === '') {
13209 15
            return true;
13210
        }
13211
13212 103
        if ($strict) {
13213 2
            $is_binary = self::is_binary($str, true);
13214
13215 2
            if ($is_binary && self::is_utf16($str, false) !== false) {
13216 2
                return false;
13217
            }
13218
13219 2
            if ($is_binary && self::is_utf32($str, false) !== false) {
13220
                return false;
13221
            }
13222
        }
13223
13224 103
        if (self::$SUPPORT['pcre_utf8']) {
13225
            // If even just the first character can be matched, when the /u
13226
            // modifier is used, then it's valid UTF-8. If the UTF-8 is somehow
13227
            // invalid, nothing at all will match, even if the string contains
13228
            // some valid sequences
13229 103
            return \preg_match('/^./us', $str) === 1;
13230
        }
13231
13232 2
        $mState = 0; // cached expected number of octets after the current octet
13233
        // until the beginning of the next UTF8 character sequence
13234 2
        $mUcs4 = 0; // cached Unicode character
13235 2
        $mBytes = 1; // cached expected number of octets in the current sequence
13236
13237 2
        if (self::$ORD === null) {
13238
            self::$ORD = self::getData('ord');
13239
        }
13240
13241 2
        $len = \strlen($str);
13242 2
        for ($i = 0; $i < $len; ++$i) {
13243 2
            $in = self::$ORD[$str[$i]];
13244
13245 2
            if ($mState === 0) {
13246
                // When mState is zero we expect either a US-ASCII character or a
13247
                // multi-octet sequence.
13248 2
                if ((0x80 & $in) === 0) {
13249
                    // US-ASCII, pass straight through.
13250 2
                    $mBytes = 1;
13251 2
                } elseif ((0xE0 & $in) === 0xC0) {
13252
                    // First octet of 2 octet sequence.
13253 2
                    $mUcs4 = $in;
13254 2
                    $mUcs4 = ($mUcs4 & 0x1F) << 6;
13255 2
                    $mState = 1;
13256 2
                    $mBytes = 2;
13257 2
                } elseif ((0xF0 & $in) === 0xE0) {
13258
                    // First octet of 3 octet sequence.
13259 2
                    $mUcs4 = $in;
13260 2
                    $mUcs4 = ($mUcs4 & 0x0F) << 12;
13261 2
                    $mState = 2;
13262 2
                    $mBytes = 3;
13263
                } elseif ((0xF8 & $in) === 0xF0) {
13264
                    // First octet of 4 octet sequence.
13265
                    $mUcs4 = $in;
13266
                    $mUcs4 = ($mUcs4 & 0x07) << 18;
13267
                    $mState = 3;
13268
                    $mBytes = 4;
13269
                } elseif ((0xFC & $in) === 0xF8) {
13270
                    /* First octet of 5 octet sequence.
13271
                     *
13272
                     * This is illegal because the encoded codepoint must be either
13273
                     * (a) not the shortest form or
13274
                     * (b) outside the Unicode range of 0-0x10FFFF.
13275
                     * Rather than trying to resynchronize, we will carry on until the end
13276
                     * of the sequence and let the later error handling code catch it.
13277
                     */
13278
                    $mUcs4 = $in;
13279
                    $mUcs4 = ($mUcs4 & 0x03) << 24;
13280
                    $mState = 4;
13281
                    $mBytes = 5;
13282
                } elseif ((0xFE & $in) === 0xFC) {
13283
                    // First octet of 6 octet sequence, see comments for 5 octet sequence.
13284
                    $mUcs4 = $in;
13285
                    $mUcs4 = ($mUcs4 & 1) << 30;
13286
                    $mState = 5;
13287
                    $mBytes = 6;
13288
                } else {
13289
                    // Current octet is neither in the US-ASCII range nor a legal first
13290
                    // octet of a multi-octet sequence.
13291 2
                    return false;
13292
                }
13293 2
            } elseif ((0xC0 & $in) === 0x80) {
13294
13295
                // When mState is non-zero, we expect a continuation of the multi-octet
13296
                // sequence
13297
13298
                // Legal continuation.
13299 2
                $shift = ($mState - 1) * 6;
13300 2
                $tmp = $in;
13301 2
                $tmp = ($tmp & 0x0000003F) << $shift;
13302 2
                $mUcs4 |= $tmp;
13303
                // Prefix: End of the multi-octet sequence. mUcs4 now contains the final
13304
                // Unicode code point to be output.
13305 2
                if (--$mState === 0) {
13306
                    // Check for illegal sequences and code points.
13307
                    //
13308
                    // From Unicode 3.1, non-shortest form is illegal
13309
                    if (
13310 2
                        ($mBytes === 2 && $mUcs4 < 0x0080)
13311
                        ||
13312 2
                        ($mBytes === 3 && $mUcs4 < 0x0800)
13313
                        ||
13314 2
                        ($mBytes === 4 && $mUcs4 < 0x10000)
13315
                        ||
13316 2
                        ($mBytes > 4)
13317
                        ||
13318
                        // From Unicode 3.2, surrogate characters are illegal.
13319 2
                        (($mUcs4 & 0xFFFFF800) === 0xD800)
13320
                        ||
13321
                        // Code points outside the Unicode range are illegal.
13322 2
                        ($mUcs4 > 0x10FFFF)
13323
                    ) {
13324
                        return false;
13325
                    }
13326
                    // initialize UTF8 cache
13327 2
                    $mState = 0;
13328 2
                    $mUcs4 = 0;
13329 2
                    $mBytes = 1;
13330
                }
13331
            } else {
13332
                // ((0xC0 & (*in) != 0x80) && (mState != 0))
13333
                // Incomplete multi-octet sequence.
13334
                return false;
13335
            }
13336
        }
13337
13338 2
        return $mState === 0;
13339
    }
13340
13341
    /**
13342
     * @param string $str
13343
     * @param bool   $use_lowercase      <p>Use uppercase by default, otherwise use lowercase.</p>
13344
     * @param bool   $use_full_case_fold <p>Convert not only common cases.</p>
13345
     *
13346
     * @psalm-pure
13347
     *
13348
     * @return string
13349
     *
13350
     * @noinspection ReturnTypeCanBeDeclaredInspection
13351
     */
13352 33
    private static function fixStrCaseHelper(
13353
        string $str,
13354
        bool $use_lowercase = false,
13355
        bool $use_full_case_fold = false
13356
    ) {
13357 33
        $upper = self::$COMMON_CASE_FOLD['upper'];
13358 33
        $lower = self::$COMMON_CASE_FOLD['lower'];
13359
13360 33
        if ($use_lowercase) {
13361 2
            $str = \str_replace(
13362 2
                $upper,
13363 2
                $lower,
13364 2
                $str
13365
            );
13366
        } else {
13367 31
            $str = \str_replace(
13368 31
                $lower,
13369 31
                $upper,
13370 31
                $str
13371
            );
13372
        }
13373
13374 33
        if ($use_full_case_fold) {
13375
            /**
13376
             * @psalm-suppress ImpureStaticVariable
13377
             *
13378
             * @var array<mixed>|null
13379
             */
13380 31
            static $FULL_CASE_FOLD = null;
13381 31
            if ($FULL_CASE_FOLD === null) {
13382 1
                $FULL_CASE_FOLD = self::getData('caseFolding_full');
13383
            }
13384
13385 31
            if ($use_lowercase) {
13386 2
                $str = \str_replace($FULL_CASE_FOLD[0], $FULL_CASE_FOLD[1], $str);
13387
            } else {
13388 29
                $str = \str_replace($FULL_CASE_FOLD[1], $FULL_CASE_FOLD[0], $str);
13389
            }
13390
        }
13391
13392 33
        return $str;
13393
    }
13394
13395
    /**
13396
     * get data from "/data/*.php"
13397
     *
13398
     * @param string $file
13399
     *
13400
     * @psalm-pure
13401
     *
13402
     * @return array
13403
     *
13404
     * @noinspection ReturnTypeCanBeDeclaredInspection
13405
     */
13406 7
    private static function getData(string $file)
13407
    {
13408
        /** @noinspection PhpIncludeInspection */
13409
        /** @noinspection UsingInclusionReturnValueInspection */
13410
        /** @psalm-suppress UnresolvableInclude */
13411 7
        return include __DIR__ . '/data/' . $file . '.php';
13412
    }
13413
13414
    /**
13415
     * @psalm-pure
13416
     *
13417
     * @return true|null
13418
     *
13419
     * @noinspection ReturnTypeCanBeDeclaredInspection
13420
     */
13421 1
    private static function initEmojiData()
13422
    {
13423 1
        if (self::$EMOJI_KEYS_CACHE === null) {
13424 1
            if (self::$EMOJI === null) {
13425 1
                self::$EMOJI = self::getData('emoji');
13426
            }
13427
13428
            /**
13429
             * @psalm-suppress ImpureFunctionCall - static sort function is used
13430
             */
13431 1
            \uksort(
13432 1
                self::$EMOJI,
13433
                static function (string $a, string $b): int {
13434 1
                    return \strlen($b) <=> \strlen($a);
13435 1
                }
13436
            );
13437
13438 1
            self::$EMOJI_KEYS_CACHE = \array_keys(self::$EMOJI);
13439 1
            self::$EMOJI_VALUES_CACHE = self::$EMOJI;
13440
13441 1
            foreach (self::$EMOJI_KEYS_CACHE as $key) {
13442 1
                $tmp_key = \crc32($key);
13443 1
                self::$EMOJI_KEYS_REVERSIBLE_CACHE[] = '_-_PORTABLE_UTF8_-_' . $tmp_key . '_-_' . \strrev((string) $tmp_key) . '_-_8FTU_ELBATROP_-_';
13444
            }
13445
13446 1
            return true;
13447
        }
13448
13449
        return null;
13450
    }
13451
13452
    /**
13453
     * Checks whether mbstring "overloaded" is active on the server.
13454
     *
13455
     * @psalm-pure
13456
     *
13457
     * @return bool
13458
     */
13459
    private static function mbstring_overloaded(): bool
13460
    {
13461
        /**
13462
         * INI directive 'mbstring.func_overload' is deprecated since PHP 7.2
13463
         */
13464
13465
        /** @noinspection PhpComposerExtensionStubsInspection */
13466
        /** @noinspection PhpUsageOfSilenceOperatorInspection */
13467
        /** @noinspection DeprecatedIniOptionsInspection */
13468
        return \defined('MB_OVERLOAD_STRING')
13469
               &&
13470
               ((int) @\ini_get('mbstring.func_overload') & \MB_OVERLOAD_STRING);
13471
    }
13472
13473
    /**
13474
     * @param array    $strings
13475
     * @param bool     $remove_empty_values
13476
     * @param int|null $remove_short_values
13477
     *
13478
     * @psalm-pure
13479
     *
13480
     * @return array
13481
     *
13482
     * @noinspection ReturnTypeCanBeDeclaredInspection
13483
     */
13484 2
    private static function reduce_string_array(
13485
        array $strings,
13486
        bool $remove_empty_values,
13487
        int $remove_short_values = null
13488
    ) {
13489
        // init
13490 2
        $return = [];
13491
13492 2
        foreach ($strings as &$str) {
13493
            if (
13494 2
                $remove_short_values !== null
13495
                &&
13496 2
                \mb_strlen($str) <= $remove_short_values
13497
            ) {
13498 2
                continue;
13499
            }
13500
13501
            if (
13502 2
                $remove_empty_values
13503
                &&
13504 2
                \trim($str) === ''
13505
            ) {
13506 2
                continue;
13507
            }
13508
13509 2
            $return[] = $str;
13510
        }
13511
13512 2
        return $return;
13513
    }
13514
13515
    /**
13516
     * rxClass
13517
     *
13518
     * @param string $s
13519
     * @param string $class
13520
     *
13521
     * @return string
13522
     *                    *
13523
     * @psalm-pure
13524
     */
13525 36
    private static function rxClass(string $s, string $class = '')
13526
    {
13527
        /**
13528
         * @psalm-suppress ImpureStaticVariable
13529
         *
13530
         * @var array<string,string>
13531
         */
13532 36
        static $RX_CLASS_CACHE = [];
13533
13534 36
        $cache_key = $s . '_' . $class;
13535
13536 36
        if (isset($RX_CLASS_CACHE[$cache_key])) {
13537 24
            return $RX_CLASS_CACHE[$cache_key];
13538
        }
13539
13540 16
        $class_array[] = $class;
0 ignored issues
show
Comprehensibility Best Practice introduced by
$class_array was never initialized. Although not strictly required by PHP, it is generally a good practice to add $class_array = array(); before regardless.
Loading history...
13541
13542
        /** @noinspection SuspiciousLoopInspection */
13543
        /** @noinspection AlterInForeachInspection */
13544 16
        foreach (self::str_split($s) as &$s) {
0 ignored issues
show
Bug introduced by
The expression self::str_split($s) cannot be used as a reference.

Let?s assume that you have the following foreach statement:

foreach ($array as &$itemValue) { }

$itemValue is assigned by reference. This is possible because the expression (in the example $array) can be used as a reference target.

However, if we were to replace $array with something different like the result of a function call as in

foreach (getArray() as &$itemValue) { }

then assigning by reference is not possible anymore as there is no target that could be modified.

Available Fixes

1. Do not assign by reference
foreach (getArray() as $itemValue) { }
2. Assign to a local variable first
$array = getArray();
foreach ($array as &$itemValue) {}
3. Return a reference
function &getArray() { $array = array(); return $array; }

foreach (getArray() as &$itemValue) { }
Loading history...
13545 15
            if ($s === '-') {
13546
                $class_array[0] = '-' . $class_array[0];
13547 15
            } elseif (!isset($s[2])) {
13548 15
                $class_array[0] .= \preg_quote($s, '/');
13549 1
            } elseif (self::strlen($s) === 1) {
13550 1
                $class_array[0] .= $s;
13551
            } else {
13552 15
                $class_array[] = $s;
13553
            }
13554
        }
13555
13556 16
        if ($class_array[0]) {
13557 16
            $class_array[0] = '[' . $class_array[0] . ']';
13558
        }
13559
13560 16
        if (\count($class_array) === 1) {
13561 16
            $return = $class_array[0];
13562
        } else {
13563
            $return = '(?:' . \implode('|', $class_array) . ')';
13564
        }
13565
13566 16
        $RX_CLASS_CACHE[$cache_key] = $return;
13567
13568 16
        return $return;
13569
    }
13570
13571
    /**
13572
     * Personal names such as "Marcus Aurelius" are sometimes typed incorrectly using lowercase ("marcus aurelius").
13573
     *
13574
     * @param string $names
13575
     * @param string $delimiter
13576
     * @param string $encoding
13577
     *
13578
     * @psalm-pure
13579
     *
13580
     * @return string
13581
     *
13582
     * @noinspection ReturnTypeCanBeDeclaredInspection
13583
     */
13584 1
    private static function str_capitalize_name_helper(
13585
        string $names,
13586
        string $delimiter,
13587
        string $encoding = 'UTF-8'
13588
    ) {
13589
        // init
13590 1
        $name_helper_array = \explode($delimiter, $names);
13591 1
        if ($name_helper_array === false) {
13592
            return '';
13593
        }
13594
13595
        $special_cases = [
13596 1
            'names' => [
13597
                'ab',
13598
                'af',
13599
                'al',
13600
                'and',
13601
                'ap',
13602
                'bint',
13603
                'binte',
13604
                'da',
13605
                'de',
13606
                'del',
13607
                'den',
13608
                'der',
13609
                'di',
13610
                'dit',
13611
                'ibn',
13612
                'la',
13613
                'mac',
13614
                'nic',
13615
                'of',
13616
                'ter',
13617
                'the',
13618
                'und',
13619
                'van',
13620
                'von',
13621
                'y',
13622
                'zu',
13623
            ],
13624
            'prefixes' => [
13625
                'al-',
13626
                "d'",
13627
                'ff',
13628
                "l'",
13629
                'mac',
13630
                'mc',
13631
                'nic',
13632
            ],
13633
        ];
13634
13635 1
        foreach ($name_helper_array as &$name) {
13636 1
            if (\in_array($name, $special_cases['names'], true)) {
13637 1
                continue;
13638
            }
13639
13640 1
            $continue = false;
13641
13642 1
            if ($delimiter === '-') {
13643 1
                foreach ((array) $special_cases['names'] as &$beginning) {
13644 1
                    if (\strncmp($name, $beginning, \strlen($beginning)) === 0) {
13645 1
                        $continue = true;
13646
13647 1
                        break;
13648
                    }
13649
                }
13650 1
                unset($beginning);
13651
            }
13652
13653 1
            foreach ((array) $special_cases['prefixes'] as &$beginning) {
13654 1
                if (\strncmp($name, $beginning, \strlen($beginning)) === 0) {
13655 1
                    $continue = true;
13656
13657 1
                    break;
13658
                }
13659
            }
13660 1
            unset($beginning);
13661
13662 1
            if ($continue) {
13663 1
                continue;
13664
            }
13665
13666 1
            $name = self::ucfirst($name, $encoding);
13667
        }
13668
13669 1
        return \implode($delimiter, $name_helper_array);
13670
    }
13671
13672
    /**
13673
     * Generic case-sensitive transformation for collation matching.
13674
     *
13675
     * @param string $str <p>The input string</p>
13676
     *
13677
     * @psalm-pure
13678
     *
13679
     * @return string|null
13680
     *
13681
     * @noinspection ReturnTypeCanBeDeclaredInspection
13682
     */
13683 6
    private static function strtonatfold(string $str)
13684
    {
13685 6
        $str = \Normalizer::normalize($str, \Normalizer::NFD);
13686 6
        if ($str === false) {
13687 2
            return '';
13688
        }
13689
13690 6
        return \preg_replace(
13691 6
            '/\p{Mn}+/u',
13692 6
            '',
13693 6
            $str
13694
        );
13695
    }
13696
13697
    /**
13698
     * @param int|string $input
13699
     *
13700
     * @psalm-pure
13701
     *
13702
     * @return string
13703
     *
13704
     * @noinspection ReturnTypeCanBeDeclaredInspection
13705
     */
13706 30
    private static function to_utf8_convert_helper($input)
13707
    {
13708
        // init
13709 30
        $buf = '';
13710
13711 30
        if (self::$ORD === null) {
13712
            self::$ORD = self::getData('ord');
13713
        }
13714
13715 30
        if (self::$CHR === null) {
13716
            self::$CHR = self::getData('chr');
13717
        }
13718
13719 30
        if (self::$WIN1252_TO_UTF8 === null) {
13720 1
            self::$WIN1252_TO_UTF8 = self::getData('win1252_to_utf8');
13721
        }
13722
13723 30
        $ordC1 = self::$ORD[$input];
13724 30
        if (isset(self::$WIN1252_TO_UTF8[$ordC1])) { // found in Windows-1252 special cases
13725 30
            $buf .= self::$WIN1252_TO_UTF8[$ordC1];
13726
        } else {
13727
            /** @noinspection OffsetOperationsInspection */
13728 1
            $cc1 = self::$CHR[$ordC1 / 64] | "\xC0";
13729 1
            $cc2 = ((string) $input & "\x3F") | "\x80";
0 ignored issues
show
Bug introduced by
Are you sure you want to use the bitwise & or did you mean &&?
Loading history...
Bug introduced by
Are you sure you want to use the bitwise | or did you mean ||?
Loading history...
13730 1
            $buf .= $cc1 . $cc2;
13731
        }
13732
13733 30
        return $buf;
13734
    }
13735
13736
    /**
13737
     * @param string $str
13738
     *
13739
     * @psalm-pure
13740
     *
13741
     * @return string
13742
     *
13743
     * @noinspection ReturnTypeCanBeDeclaredInspection
13744
     */
13745 9
    private static function urldecode_unicode_helper(string $str)
13746
    {
13747 9
        if (\strpos($str, '%u') === false) {
13748 9
            return $str;
13749
        }
13750
13751 7
        $pattern = '/%u([0-9a-fA-F]{3,4})/';
13752 7
        if (\preg_match($pattern, $str)) {
13753 7
            $str = (string) \preg_replace($pattern, '&#x\\1;', $str);
13754
        }
13755
13756 7
        return $str;
13757
    }
13758
}
13759